diff --git a/.gitignore b/.gitignore
index 7810562acd61a05ef0d9e39e4e69cf9e8b2a1fde..8de22b3bb0085a0848762c7d073404b89461a636 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,8 @@ deploy
 *.suo
 .vs/
 .vscode/
+*.blg
+*.bbl
+*.pyc
+/out/
+/doc/_minted-software-manual/
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e578adc74d6e0b60ad709004c5a87a64ac98b960
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,59 @@
+stages:
+   - build
+
+.build_template:
+   stage: build
+   script:
+      - make realclean
+      - make all
+   only:
+      refs:
+         - master
+         - merge_requests
+      variables:
+         - $CI_PROJECT_URL == 'https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM'
+         - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'master'
+
+.build_template_linux:
+   extends: .build_template
+   script:
+      - make realclean
+      - make all
+      - make realclean
+      - make linuxbuild enable-tracing=true
+
+#build_macos:
+#   extends: .build_template
+#   tags:
+#      - macos
+
+build_ubuntu1604:
+   extends: .build_template_linux
+   tags:
+      - ubuntu1604
+
+build_ubuntu1804:
+   extends: .build_template_linux
+   tags:
+      - ubuntu1804
+
+build_ubuntu1804-gcc8:
+   extends: .build_template_linux
+   script:
+      - make realclean
+      - make all toolset=gcc-8
+      - make realclean
+      - make linuxbuild enable-tracing=true toolset=gcc-8
+   tags:
+      - ubuntu1804-gcc8
+
+build_vc191x:
+   extends: .build_template
+   tags:
+      - vc191x
+
+build_vc192x:
+   extends: .build_template
+   tags:
+      - vc192x
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b9737cc9885c4bda03c5cd1bced0182d9d3d0d3..574707d86a77b06cee8e523cf691a8ca388bcb3e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,7 @@ if( CMAKE_SYSTEM_NAME STREQUAL "Linux" )
 endif()
 
 set( EXTENSION_360_VIDEO OFF CACHE BOOL "If EXTENSION_360_VIDEO is on, 360Lib will be added" )
+set( EXTENSION_HDRTOOLS OFF CACHE BOOL "If EXTENSION_HDRTOOLS is on, HDRLib will be added" )
 set( SET_ENABLE_TRACING OFF CACHE BOOL "Set ENABLE_TRACING as a compiler flag" )
 set( ENABLE_TRACING OFF CACHE BOOL "If SET_ENABLE_TRACING is on, it will be set to this value" )
 
@@ -112,6 +113,7 @@ if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
     if( USE_ADDRESS_SANITIZER )
       # add compile options
       add_compile_options( "-fsanitize=address" )
+      add_link_options( "-fsanitize=address" )
     endif()
   endif()
 
@@ -136,6 +138,9 @@ if( EXTENSION_360_VIDEO )
   add_subdirectory( "source/Lib/Lib360" )
   add_subdirectory( "source/Lib/AppEncHelper360" )
 endif()
+if ( EXTENSION_HDRTOOLS )
+  add_subdirectory( "source/Lib/HDRLib")
+endif()
 add_subdirectory( "source/Lib/DecoderAnalyserLib" )
 add_subdirectory( "source/Lib/DecoderLib" )
 add_subdirectory( "source/Lib/EncoderLib" )
@@ -146,6 +151,7 @@ add_subdirectory( "source/App/DecoderApp" )
 add_subdirectory( "source/App/EncoderApp" )
 add_subdirectory( "source/App/SEIRemovalApp" )
 add_subdirectory( "source/App/Parcat" )
+add_subdirectory( "source/App/StreamMergeApp" )
 if( EXTENSION_360_VIDEO )
   add_subdirectory( "source/App/utils/360ConvertApp" )
 endif()
diff --git a/COPYING b/COPYING
index 0227899c94523ffb16b26e6a85ff55add99123e4..a328b7da34cb542dd0137b58b0380080de67f97a 100644
--- a/COPYING
+++ b/COPYING
@@ -3,7 +3,7 @@ License, included below. This software may be subject to other third party
 and contributor rights, including patent rights, and no such rights are
 granted under this license.  Â 
 
-Copyright (c) 2010-2019, ITU/ISO/IEC
+Copyright (c) 2010-2020, ITU/ISO/IEC
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/Makefile b/Makefile
index d61744bed9a7926118b9107c9e59384d12a3126d..c48915825cb53879040adfad525f555ed0770dbf 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ BUILD_SCRIPT := $(CURDIR)/cmake/CMakeBuild/bin/cmake.py
 #
 
 TARGETS := CommonLib DecoderAnalyserApp DecoderAnalyserLib DecoderApp DecoderLib 
-TARGETS += EncoderApp EncoderLib Utilities SEIRemovalApp
+TARGETS += EncoderApp EncoderLib Utilities SEIRemovalApp StreamMergeApp
 
 ifeq ($(OS),Windows_NT)
   ifneq ($(MSYSTEM),)
diff --git a/README.md b/README.md
index b695964769c896c17c0b5f6690e79da19d7c0ca3..d2853b793a91c0f9554b6f081da57508b242888e 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,21 @@
-How to build VTM
-================
+VTM reference software for VVC
+==============================
+
+This software package is the reference software for Versatile Video Coding (VVC). The reference software includes both encoder and decoder functionality.
+
+Reference software is useful in aiding users of a video coding standard to establish and test conformance and interoperability, and to educate users and demonstrate the capabilities of the standard. For these purposes, this software is provided as an aid for the study and implementation of Versatile Video Coding.
+
+The software has been jointly developed by the ITU-T Video Coding Experts Group (VCEG, Question 6 of ITU-T Study Group 16) and the ISO/IEC Moving Picture Experts Group (MPEG, Working Group 11 of Subcommittee 29 of ISO/IEC Joint Technical Committee 1).
+
+A software manual, which contains usage instructions, can be found in the "doc" subdirectory of this software package.
+
+Build instructions
+==================
+
+The CMake tool is used to create platform-specific build files. 
+
+Although CMake may be able to generate 32-bit binaries, **it is generally suggested to build 64-bit binaries**. 32-bit binaries are not able to access more than 2GB of RAM, which will not be sufficient for coding larger image formats. Building in 32-bit environments is not tested and will not be supported.
 
-The software uses CMake to create platform-specific build files. 
 
 Build instructions for plain CMake (suggested)
 ----------------------------------------------
@@ -21,20 +35,38 @@ mkdir build
 Use one of the following CMake commands, based on your platform. Feel free to change the commands to satisfy
 your needs.
 
-**Windows Visual Studio 2015 64 Bit:**
+**Windows Visual Studio 2015/17/19 64 Bit:**
+
+Use the proper generator string for generating Visual Studio files, e.g. for VS 2015:
+
 ```bash
 cd build
 cmake .. -G "Visual Studio 14 2015 Win64"
 ```
+
 Then open the generated solution file in MS Visual Studio.
 
+For VS 2017 use "Visual Studio 15 2017 Win64", for VS 2019 use "Visual Studio 16 2019".
+
+Visual Studio 2019 also allows you to open the CMake directory directly. Choose "File->Open->CMake" for this option.
+
 **macOS Xcode:**
+
+For generating an Xcode workspace type:
 ```bash
 cd build
 cmake .. -G "Xcode"
 ```
 Then open the generated work space in Xcode.
 
+For generating Makefiles with optional non-default compilers, use the following commands:
+
+```bash
+cd build
+cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9
+```
+In this example the brew installed GCC 9 is used for a release build.
+
 **Linux**
 
 For generating Linux Release Makefile:
@@ -68,16 +100,21 @@ To use the default system compiler simply call:
 ```bash
 make all
 ```
-For MSYS2 and MinGW: Open an MSYS MinGW 64-Bit terminal and change into the root directory of this project.
+
+
+**MSYS2 and MinGW (Windows)**
+
+**Note:** Build files for MSYS MinGW were added on request. The build platform is not regularily tested and can't be supported. 
+
+Open an MSYS MinGW 64-Bit terminal and change into the root directory of this project.
 
 Call:
 ```bash
 make all toolset=gcc
 ```
 
+The following tools need to be installed for MSYS2 and MinGW:
 
-Tool Installation on Windows
-----------------------------
 Download CMake: http://www.cmake.org/ and install it.
 
 Python and GnuWin32 are not mandatory, but they simplify the build process for the user.
diff --git a/cfg/444/yuv444.cfg b/cfg/444/yuv444.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..c64afd759ff18af972a5a350252607a7b61815d6
--- /dev/null
+++ b/cfg/444/yuv444.cfg
@@ -0,0 +1 @@
+BDPCM: 2
diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg
index 80c2ee8475c742931beeb0d4d6cdec678d865318..fc8224aa6b470a2436a133437c31a55080b73ca8 100644
--- a/cfg/encoder_intra_vtm.cfg
+++ b/cfg/encoder_intra_vtm.cfg
@@ -3,7 +3,7 @@ BitstreamFile                 : str.bin
 ReconFile                     : rec.yuv
 
 #======== Profile ================
-Profile                       : next
+Profile                       : auto
 
 #======== Unit definition ================
 MaxCUWidth                    : 64          # Maximum coding unit width in pixel
@@ -48,37 +48,20 @@ TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1
 TransformSkipLog2MaxSize      : 5
 SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
 
-#============ Slices ================
-SliceMode                : 0                # 0: Disable all slice options.
-                                            # 1: Enforce maximum number of LCU in an slice,
-                                            # 2: Enforce maximum number of bytes in an 'slice'
-                                            # 3: Enforce maximum number of tiles in a slice
-SliceArgument            : 1500             # Argument for 'SliceMode'.
-                                            # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice.
-                                            # If SliceMode==2 it represents max. bytes per slice.
-                                            # If SliceMode==3 it represents max. tiles per slice.
-
-LFCrossSliceBoundaryFlag : 1                # In-loop filtering, including ALF and DB, is across or not across slice boundary.
-                                            # 0:not across, 1: across
-
-#============ PCM ================
-PCMEnabledFlag                      : 0                # 0: No PCM mode
-PCMLog2MaxSize                      : 5                # Log2 of maximum PCM block size.
-PCMLog2MinSize                      : 3                # Log2 of minimum PCM block size.
-PCMInputBitDepthFlag                : 1                # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth.
-PCMFilterDisableFlag                : 0                # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples.
-
-#============ Lossless ================
-TransquantBypassEnable     : 0                         # Value of PPS flag.
-CUTransquantBypassFlagForce: 0                         # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 0           # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
 
 #============ VTM settings ======================
 LoopFilterTcOffset_div2             : 0
 SEIDecodedPictureHash               : 0
-CbQpOffset                          : 1
-CrQpOffset                          : 1
+CbQpOffset                          : 0
+CrQpOffset                          : 0
+SameCQPTablesForAllChroma           : 1
+QpInValCb                           : 1 31 43
+QpOutValCb                          : 1 32 41
 TemporalSubsampleRatio              : 8
 
+ReWriteParamSets                    : 1
 #============ NEXT ====================
 
 # General
@@ -89,14 +72,16 @@ DualITree                    : 1      # separate partitioning of luma and chroma
 MinQTLumaISlice              : 8
 MinQTChromaISlice            : 4
 MinQTNonISlice               : 8
-MaxBTDepth                   : 3
-MaxBTDepthISliceL            : 3
-MaxBTDepthISliceC            : 3
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
 
 MTS                          : 1
-MTSIntraMaxCand              : 3
+MTSIntraMaxCand              : 4
 MTSInterMaxCand              : 4
 SBT                          : 1
+LFNST                        : 1
+ISP                          : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
@@ -104,16 +89,25 @@ LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 1
 ALF                          : 1
-IBC                          : 0      # turned off in CTC 
+IBC                          : 0      # turned off in CTC
 AllowDisFracMMVD             : 1
 AffineAmvr                   : 0
-LumaReshapeEnable            : 1      # luma reshaping. 0: disable 1:enable 
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 1      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+LMCSOffset                   : 2      # chroma residual scaling offset
+MRL                          : 1
+MIP                          : 1
+JointCbCr                    : 1      # joint coding of chroma residuals (if available): 0: disable, 1: enable
+ChromaTS                     : 1
 
 # Fast tools
 PBIntraFast                  : 1
 ISPFast                      : 1
 FastMrg                      : 1
 AMaxBT                       : 1
+FastMIP                      : 1
+FastLFNST                    : 1
 
 # Encoder optimization tools
 AffineAmvrEncOpt             : 0
diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg
index 6d345e4c713025c432e061cee682f32d7c7eb320..e0a5088970fe37e903e67447a2c09bc95910ffdb 100644
--- a/cfg/encoder_lowdelay_P_vtm.cfg
+++ b/cfg/encoder_lowdelay_P_vtm.cfg
@@ -3,7 +3,7 @@ BitstreamFile                 : str.bin
 ReconFile                     : rec.yuv
 
 #======== Profile ================
-Profile                       : next
+Profile                       : auto
 
 #======== Unit definition ================
 MaxCUWidth                    : 64          # Maximum coding unit width in pixel
@@ -13,15 +13,19 @@ MaxPartitionDepth             : 4           # Maximum coding unit depth
 #======== Coding Structure =============
 IntraPeriod                   : -1          # Period of I-Frame ( -1 = only first)
 DecodingRefreshType           : 0           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
-GOPSize                       : 4           # GOP Size (number of B slice = GOPSize-1)
+GOPSize                       : 8           # GOP Size (number of B slice = GOPSize-1)
 
 IntraQPOffset                 : -1
 LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
-#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures     predict deltaRPS #ref_idcs reference idcs
-Frame1:  P    1   5       -6.5                      0.2590         0          0          1.0   0            0               0           4                4         -1 -5 -9 -13       0
-Frame2:  P    2   4       -6.5                      0.2590         0          0          1.0   0            0               0           4                4         -1 -2 -6 -10       1      -1       5         1 1 1 0 1
-Frame3:  P    3   5       -6.5                      0.2590         0          0          1.0   0            0               0           4                4         -1 -3 -7 -11       1      -1       5         0 1 1 1 1
-Frame4:  P    4   1        0.0                      0.0            0          0          1.0   0            0               0           4                4         -1 -4 -8 -12       1      -1       5         0 1 1 1 1
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:    P   1   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 9 17 25                      0                   0
+Frame2:    P   2   4       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 2 10 18                      0                   0
+Frame3:    P   3   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 3 11 19                      0                   0
+Frame4:    P   4   4       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 4 12 20                      0                   0
+Frame5:    P   5   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 5 13 21                      0                   0
+Frame6:    P   6   4       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 6 14 22                      0                   0
+Frame7:    P   7   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 7 15 23                      0                   0
+Frame8:    P   8   1        0.0                      0.0            0          0          1.0      0            0               0             4                4         1 8 16 24                      0                   0
 
 #=========== Motion Search =============
 FastSearch                    : 1           # 0:Full search  1:TZ search
@@ -56,29 +60,13 @@ TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1
 TransformSkipLog2MaxSize      : 5
 SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
 
-#============ Slices ================
-SliceMode                : 0                # 0: Disable all slice options.
-                                            # 1: Enforce maximum number of LCU in an slice,
-                                            # 2: Enforce maximum number of bytes in an 'slice'
-                                            # 3: Enforce maximum number of tiles in a slice
-SliceArgument            : 1500             # Argument for 'SliceMode'.
-                                            # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice.
-                                            # If SliceMode==2 it represents max. bytes per slice.
-                                            # If SliceMode==3 it represents max. tiles per slice.
-
-LFCrossSliceBoundaryFlag : 1                # In-loop filtering, including ALF and DB, is across or not across slice boundary.
-                                            # 0:not across, 1: across
-
-#============ PCM ================
-PCMEnabledFlag                      : 0                # 0: No PCM mode
-PCMLog2MaxSize                      : 5                # Log2 of maximum PCM block size.
-PCMLog2MinSize                      : 3                # Log2 of minimum PCM block size.
-PCMInputBitDepthFlag                : 1                # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth.
-PCMFilterDisableFlag                : 0                # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples.
-
-#============ Lossless ================
-TransquantBypassEnable     : 0                         # Value of PPS flag.
-CUTransquantBypassFlagForce: 0                         # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled
+#=========== TemporalFilter =================
+TemporalFilter                : 0           # Enable/disable GOP Based Temporal Filter
+TemporalFilterFutureReference : 0           # Enable/disable reading future frames
+TemporalFilterStrengthFrame4  : 0.4         # Enable filter at every 4th frame with strength
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 0           # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
 
 #============ Rate Control ======================
 RateControl                         : 0                # Rate control: enable rate control
@@ -92,9 +80,12 @@ RCForceIntraQP                      : 0                # Rate control: force int
 #============ VTM settings ======================
 LoopFilterTcOffset_div2             : 0
 SEIDecodedPictureHash               : 0
-CbQpOffset                          : 1
-CrQpOffset                          : 1
-
+CbQpOffset                          : 0
+CrQpOffset                          : 0
+SameCQPTablesForAllChroma           : 1
+QpInValCb                           : 32 44
+QpOutValCb                          : 32 41
+ReWriteParamSets                    : 1
 #============ NEXT ====================
 
 # General
@@ -105,14 +96,15 @@ DualITree                    : 1      # separate partitioning of luma and chroma
 MinQTLumaISlice              : 8
 MinQTChromaISlice            : 4
 MinQTNonISlice               : 8
-MaxBTDepth                   : 3
-MaxBTDepthISliceL            : 3
-MaxBTDepthISliceC            : 3
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
 
 MTS                          : 1
 MTSIntraMaxCand              : 3
 MTSInterMaxCand              : 4
 SBT                          : 1
+ISP                          : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
@@ -120,22 +112,34 @@ LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 1
 ALF                          : 1
-MHIntra                      : 1
+CIIP                         : 1
 IBC                          : 0      # turned off in CTC
 AllowDisFracMMVD             : 1
 AffineAmvr                   : 0
-LumaReshapeEnable            : 1      # luma reshaping. 0: disable 1:enable 
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 2      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+LMCSOffset                   : 1      # chroma residual scaling offset
+MRL                          : 1
+MIP                          : 0
+JointCbCr                    : 1      # joint coding of chroma residuals (if available): 0: disable, 1: enable
+PROF                         : 1
+PPSorSliceMode               : 3
+ChromaTS                     : 1
 
 # Fast tools
 PBIntraFast                  : 1
-ISPFast                      : 1
+ISPFast                      : 0
 FastMrg                      : 1
 AMaxBT                       : 1
+FastMIP                      : 0
+FastLocalDualTreeMode        : 2
 
 # Encoder optimization tools
 AffineAmvrEncOpt             : 0
-
+MmvdDisNum                   : 6
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
 
+
diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg
index 07233c18980035d3eb60cdb6a464c17e2b115899..2b449a99efbcb37256ed42fe5b7420dbf146e14a 100644
--- a/cfg/encoder_lowdelay_vtm.cfg
+++ b/cfg/encoder_lowdelay_vtm.cfg
@@ -3,7 +3,7 @@ BitstreamFile                 : str.bin
 ReconFile                     : rec.yuv
 
 #======== Profile ================
-Profile                       : next
+Profile                       : auto
 
 #======== Unit definition ================
 MaxCUWidth                    : 64          # Maximum coding unit width in pixel
@@ -13,15 +13,19 @@ MaxPartitionDepth             : 4           # Maximum coding unit depth
 #======== Coding Structure =============
 IntraPeriod                   : -1          # Period of I-Frame ( -1 = only first)
 DecodingRefreshType           : 0           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
-GOPSize                       : 4           # GOP Size (number of B slice = GOPSize-1)
+GOPSize                       : 8           # GOP Size (number of B slice = GOPSize-1)
 
 IntraQPOffset                 : -1
 LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
-#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures     predict deltaRPS #ref_idcs reference idcs
-Frame1:  B    1   5       -6.5                      0.2590         0          0          1.0      0            0               0           4                4         -1 -5 -9 -13       0
-Frame2:  B    2   4       -6.5                      0.2590         0          0          1.0      0            0               0           4                4         -1 -2 -6 -10       1      -1       5         1 1 1 0 1
-Frame3:  B    3   5       -6.5                      0.2590         0          0          1.0      0            0               0           4                4         -1 -3 -7 -11       1      -1       5         0 1 1 1 1
-Frame4:  B    4   1        0.0                      0.0            0          0          1.0      0            0               0           4                4         -1 -4 -8 -12       1      -1       5         0 1 1 1 1
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:    B   1   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 9 17 25                     4                   4      1 9 17 25
+Frame2:    B   2   4       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 2 10 18                     4                   4      1 2 10 18
+Frame3:    B   3   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 3 11 19                     4                   4      1 3 11 19
+Frame4:    B   4   4       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 4 12 20                     4                   4      1 4 12 20
+Frame5:    B   5   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 5 13 21                     4                   4      1 5 13 21
+Frame6:    B   6   4       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 6 14 22                     4                   4      1 6 14 22
+Frame7:    B   7   5       -6.5                      0.2590         0          0          1.0      0            0               0             4                4         1 7 15 23                     4                   4      1 7 15 23
+Frame8:    B   8   1        0.0                      0.0            0          0          1.0      0            0               0             4                4         1 8 16 24                     4                   4      1 8 16 24
 
 #=========== Motion Search =============
 FastSearch                    : 1           # 0:Full search  1:TZ search
@@ -56,29 +60,13 @@ TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1
 TransformSkipLog2MaxSize      : 5
 SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
 
-#============ Slices ================
-SliceMode                : 0                # 0: Disable all slice options.
-                                            # 1: Enforce maximum number of LCU in an slice,
-                                            # 2: Enforce maximum number of bytes in an 'slice'
-                                            # 3: Enforce maximum number of tiles in a slice
-SliceArgument            : 1500             # Argument for 'SliceMode'.
-                                            # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice.
-                                            # If SliceMode==2 it represents max. bytes per slice.
-                                            # If SliceMode==3 it represents max. tiles per slice.
-
-LFCrossSliceBoundaryFlag : 1                # In-loop filtering, including ALF and DB, is across or not across slice boundary.
-                                            # 0:not across, 1: across
-
-#============ PCM ================
-PCMEnabledFlag                      : 0                # 0: No PCM mode
-PCMLog2MaxSize                      : 5                # Log2 of maximum PCM block size.
-PCMLog2MinSize                      : 3                # Log2 of minimum PCM block size.
-PCMInputBitDepthFlag                : 1                # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth.
-PCMFilterDisableFlag                : 0                # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples.
-
-#============ Lossless ================
-TransquantBypassEnable     : 0                         # Value of PPS flag.
-CUTransquantBypassFlagForce: 0                         # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled
+#=========== TemporalFilter =================
+TemporalFilter                : 0           # Enable/disable GOP Based Temporal Filter
+TemporalFilterFutureReference : 0           # Enable/disable reading future frames
+TemporalFilterStrengthFrame4  : 0.4         # Enable filter at every 4th frame with strength
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 0           # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
 
 #============ Rate Control ======================
 RateControl                         : 0                # Rate control: enable rate control
@@ -92,9 +80,12 @@ RCForceIntraQP                      : 0                # Rate control: force int
 #============ VTM settings ======================
 LoopFilterTcOffset_div2             : 0
 SEIDecodedPictureHash               : 0
-CbQpOffset                          : 1
-CrQpOffset                          : 1
-
+CbQpOffset                          : 0
+CrQpOffset                          : 0
+SameCQPTablesForAllChroma           : 1
+QpInValCb                           : 32 44
+QpOutValCb                          : 32 41
+ReWriteParamSets                    : 1
 #============ NEXT ====================
 
 # General
@@ -105,14 +96,16 @@ DualITree                    : 1      # separate partitioning of luma and chroma
 MinQTLumaISlice              : 8
 MinQTChromaISlice            : 4
 MinQTNonISlice               : 8
-MaxBTDepth                   : 3
-MaxBTDepthISliceL            : 3
-MaxBTDepthISliceC            : 3
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
 
 MTS                          : 1
 MTSIntraMaxCand              : 3
 MTSInterMaxCand              : 4
 SBT                          : 1
+ISP                          : 1
+MMVD                         : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
@@ -120,25 +113,37 @@ LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 1
 ALF                          : 1
-GBi                          : 1 
-GBiFast                      : 1 
-MHIntra                      : 1
+BCW                          : 1
+BcwFast                      : 1
+CIIP                         : 1
 Triangle                     : 1
 IBC                          : 0      # turned off in CTC
 AllowDisFracMMVD             : 1
 AffineAmvr                   : 0
-LumaReshapeEnable            : 1      # luma reshaping. 0: disable 1:enable 
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 2      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+LMCSOffset                   : 1      # chroma residual scaling offset
+MRL                          : 1
+MIP                          : 0
+JointCbCr                    : 1      # joint coding of chroma residuals (if available): 0: disable, 1: enable
+PROF                         : 1
+PPSorSliceMode               : 2
+ChromaTS                     : 1
 
 # Fast tools
 PBIntraFast                  : 1
-ISPFast                      : 1
+ISPFast                      : 0
 FastMrg                      : 1
 AMaxBT                       : 1
+FastMIP                      : 0
+FastLocalDualTreeMode        : 2
 
 # Encoder optimization tools
 AffineAmvrEncOpt             : 0
-
+MmvdDisNum                   : 6
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
 
+
diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 389209a4cec35122dd6b916c5d4e515afc526c32..47f57f957765ea9bc213667a1bd188cad7134c0d 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -3,7 +3,7 @@ BitstreamFile                 : str.bin
 ReconFile                     : rec.yuv
 
 #======== Profile ================
-Profile                       : next
+Profile                       : auto
 
 #======== Unit definition ================
 MaxCUWidth                    : 64          # Maximum coding unit width in pixel
@@ -17,23 +17,23 @@ GOPSize                       : 16          # GOP Size (number of B slice = GOPS
 
 IntraQPOffset                 : -3
 LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
-#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures     predict deltaRPS #ref_idcs reference idcs
-Frame1:  B   16   1        0.0                      0.0            0          0          1.0      0            0              0           2                3         -16 -24 -32            0
-Frame2:  B    8   1       -4.8848                   0.2061         0          0          1.0      0            0              1           2                3         -8  -16   8            1       8        4         1 1 0 1
-Frame3:  B    4   4       -5.7476                   0.2286         0          0          1.0      0            0              2           2                4         -4  -12   4  12        1       4        4         1 1 1 1
-Frame4:  B    2   5       -5.90                     0.2333         0          0          1.0      0            0              3           2                5         -2  -10   2   6  14    1       2        5         1 1 1 1 1
-Frame5:  B    1   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1    1  3    7  15    1       1        6         1 0 1 1 1 1
-Frame6:  B    3   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -3   1   5  13    1      -2        6         1 1 1 1 1 0
-Frame7:  B    6   5       -5.90                     0.2333         0          0          1.0      0            0              3           2                4         -2   -6   2  10        1      -3        6         0 1 1 1 1 0
-Frame8:  B    5   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -5   1   3  11    1       1        5         1 1 1 1 1
-Frame9:  B    7   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -3  -7   1   9    1      -2        6         1 1 1 1 1 0
-Frame10: B   12   4       -5.7476                   0.2286         0          0          1.0      0            0              2           2                3         -4  -12   4            1      -5        6         0 0 1 1 1 0
-Frame11: B   10   5       -5.90                     0.2333         0          0          1.0      0            0              3           2                4         -2  -10   2   6        1       2        4         1 1 1 1
-Frame12: B    9   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -9   1   3   7    1       1        5         1 1 1 1 1
-Frame13: B   11   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -3 -11   1   5    1      -2        6         1 1 1 1 1 0
-Frame14: B   14   5       -5.90                     0.2333         0          0          1.0      0            0              3           2                4         -2   -6 -14   2        1      -3        6         0 1 1 1 1 0
-Frame15: B   13   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -5 -13   1   3    1       1        5         1 1 1 1 1
-Frame16: B   15   6       -7.1444                   0.3            0          0          1.0      0            0              4           2                5         -1   -3 -7  -15   1    1      -2        6         1 1 1 1 1 0
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:   B   16   1        0.0                      0.0            0          0          1.0      0            0              0             2                3          16 32 24                    2                2           16 32
+Frame2:   B    8   1       -4.8848                   0.2061         0          0          1.0      0            0              1             2                2          8 16                        2                2           -8 8
+Frame3:   B    4   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 -12
+Frame4:   B    2   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                3           -2 -6 -14
+Frame5:   B    1   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 -1                        2                4           -1 -3 -7 -15
+Frame6:   B    3   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 3                         2                3           -1 -5 -13
+Frame7:   B    6   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 6                         2                2           -2 -10
+Frame8:   B    5   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 5                         2                3           -1 -3 -11
+Frame9:   B    7   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 7                       2                2           -1 -9
+Frame10:  B   12   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 4
+Frame11:  B   10   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                2           -2 -6
+Frame12:  B    9   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 9                         2                3           -1 -3 -7
+Frame13:  B   11   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 11                      2                2           -1 -5
+Frame14:  B   14   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                3          2 6 14                      2                2           -2 2
+Frame15:  B   13   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 5 13                      2                2           -1 -3
+Frame16:  B   15   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                4          1 3 7 15                    2                2           -1 1
 
 #=========== Motion Search =============
 FastSearch                    : 1           # 0:Full search  1:TZ search
@@ -70,29 +70,14 @@ TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1
 TransformSkipLog2MaxSize      : 5
 SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
 
-#============ Slices ================
-SliceMode                : 0                # 0: Disable all slice options.
-                                            # 1: Enforce maximum number of LCU in an slice,
-                                            # 2: Enforce maximum number of bytes in an 'slice'
-                                            # 3: Enforce maximum number of tiles in a slice
-SliceArgument            : 1500             # Argument for 'SliceMode'.
-                                            # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice.
-                                            # If SliceMode==2 it represents max. bytes per slice.
-                                            # If SliceMode==3 it represents max. tiles per slice.
-
-LFCrossSliceBoundaryFlag : 1                # In-loop filtering, including ALF and DB, is across or not across slice boundary.
-                                            # 0:not across, 1: across
-
-#============ PCM ================
-PCMEnabledFlag                      : 0                # 0: No PCM mode
-PCMLog2MaxSize                      : 5                # Log2 of maximum PCM block size.
-PCMLog2MinSize                      : 3                # Log2 of minimum PCM block size.
-PCMInputBitDepthFlag                : 1                # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth.
-PCMFilterDisableFlag                : 0                # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples.
-
-#============ Lossless ================
-TransquantBypassEnable     : 0                         # Value of PPS flag.
-CUTransquantBypassFlagForce: 0                         # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled
+#=========== TemporalFilter =================
+TemporalFilter                : 0           # Enable/disable GOP Based Temporal Filter
+TemporalFilterFutureReference : 1           # Enable/disable reading future frames
+TemporalFilterStrengthFrame8  : 0.95        # Enable filter at every 8th frame with given strength
+TemporalFilterStrengthFrame16 : 1.5         # Enable filter at every 16th frame with given strength, longer intervals has higher priority
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 0           # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
 
 #============ Rate Control ======================
 RateControl                         : 0                # Rate control: enable rate control
@@ -106,9 +91,12 @@ RCForceIntraQP                      : 0                # Rate control: force int
 #============ VTM settings ======================
 LoopFilterTcOffset_div2             : 0
 SEIDecodedPictureHash               : 0
-CbQpOffset                          : 1
-CrQpOffset                          : 1
-
+CbQpOffset                          : 0
+CrQpOffset                          : 0
+SameCQPTablesForAllChroma           : 1
+QpInValCb                           : 32 44
+QpOutValCb                          : 32 41
+ReWriteParamSets                    : 1
 #============ NEXT ====================
 
 # General
@@ -119,14 +107,17 @@ DualITree                    : 1      # separate partitioning of luma and chroma
 MinQTLumaISlice              : 8
 MinQTChromaISlice            : 4
 MinQTNonISlice               : 8
-MaxBTDepth                   : 3
-MaxBTDepthISliceL            : 3
-MaxBTDepthISliceC            : 3
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
 
 MTS                          : 1
-MTSIntraMaxCand              : 3
+MTSIntraMaxCand              : 4
 MTSInterMaxCand              : 4
 SBT                          : 1
+LFNST                        : 1
+ISP                          : 1
+MMVD                         : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
@@ -134,27 +125,41 @@ LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 1
 ALF                          : 1
-GBi                          : 1 
-GBiFast                      : 1
-BIO                          : 1 
-MHIntra                      : 1
+BCW                          : 1
+BcwFast                      : 1
+BIO                          : 1
+CIIP                         : 1
 Triangle                     : 1
 IBC                          : 0      # turned off in CTC
 AllowDisFracMMVD             : 1
 AffineAmvr                   : 1
-LumaReshapeEnable            : 1      # luma reshaping. 0: disable 1:enable 
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 0      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+LMCSOffset                   : 6      # chroma residual scaling offset
+MRL                          : 1
+MIP                          : 1
 DMVR                         : 1
+SMVD                         : 1
+JointCbCr                    : 1      # joint coding of chroma residuals (if available): 0: disable, 1: enable
+PROF                         : 1
+PPSorSliceMode               : 1
 
 # Fast tools
 PBIntraFast                  : 1
-ISPFast                      : 1
+ISPFast                      : 0
 FastMrg                      : 1
 AMaxBT                       : 1
+FastMIP                      : 0
+FastLFNST                    : 0
+FastLocalDualTreeMode        : 1
+ChromaTS                     : 1
 
 # Encoder optimization tools
 AffineAmvrEncOpt             : 1
-
+MmvdDisNum                   : 6
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
 
+
diff --git a/cfg/field/VTM_encoder_lowdelay_field_GOP16.cfg b/cfg/field/VTM_encoder_lowdelay_field_GOP16.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..596ee9fd0a681a032abc9932ce509ccda1eeca49
--- /dev/null
+++ b/cfg/field/VTM_encoder_lowdelay_field_GOP16.cfg
@@ -0,0 +1,26 @@
+FieldCoding                   : 1           # (0: Frame based coding, 1: Field based coding)
+TopFieldFirst                 : 1           # Field parity order (1: Top field first, 0: Bottom field first)
+ConformanceMode               : 1
+VuiParametersPresent          : 1
+SEIPictureTiming              : 1
+SEIFrameFieldInfo             : 1
+IntraPeriod                   : -1          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 0           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 16          # GOP Size (number of B slice = GOPSize-1)
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0     #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:    B    2   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 9 10 17 18 25 26            8                   8         1 2 9 10 17 18 25 26
+Frame2:    B    3   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 10 11 18 19 26 27         8                   9         1 2 3 10 11 18 19 26 27
+Frame3:    B    4   4       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 3 4 11 12 19 20             8                   8         1 2 3 4 11 12 19 20
+Frame4:    B    5   4       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 4 5 12 13 20 21           8                   9         1 2 3 4 5 12 13 20 21
+Frame5:    B    6   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 5 6 13 14 21 22             8                   8         1 2 5 6 13 14 21 22
+Frame6:    B    7   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 6 7 14 15 22 23           8                   9         1 2 3 6 7 14 15 22 23
+Frame7:    B    8   4       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 7 8 15 16 23 24             8                   8         1 2 7 8 15 16 23 24
+Frame8:    B    9   4       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 8 9 16 17 24 25           8                   9         1 2 3 8 9 16 17 24 25
+Frame9:    B   10   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 9 10 17 18 25 26            8                   8         1 2 9 10 17 18 25 26
+Frame10:   B   11   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 10 11 18 19 26 27         8                   9         1 2 3 10 11 18 19 26 27
+Frame11:   B   12   4       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 3 4 11 12 19 20             8                   8         1 2 3 4 11 12 19 20
+Frame12:   B   13   4       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 4 5 12 13 20 21           8                   9         1 2 3 4 5 12 13 20 21
+Frame13:   B   14   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 5 6 13 14 21 22             8                   8         1 2 5 6 13 14 21 22
+Frame14:   B   15   5       -6.5                      0.2590         0          0          1.0      0            0               0             8                9         1 2 3 6 7 14 15 22 23           8                   9         1 2 3 6 7 14 15 22 23
+Frame15:   B   16   1        0.0                      0.0            0          0          1.0      0            0               0             8                8         1 2 7 8 15 16 23 24             8                   8         1 2 7 8 15 16 23 24
+Frame16:   B   17   1        0.0                      0.0            0          0          1.0      0            0               0             8                9         1 2 3 8 9 16 17 24 25           8                   9         1 2 3 8 9 16 17 24 25
diff --git a/cfg/field/VTM_encoder_lowdelay_field_GOP8.cfg b/cfg/field/VTM_encoder_lowdelay_field_GOP8.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..dc23e0a35af85ff1022c69bdd50f90743050e486
--- /dev/null
+++ b/cfg/field/VTM_encoder_lowdelay_field_GOP8.cfg
@@ -0,0 +1,18 @@
+FieldCoding                   : 1           # (0: Frame based coding, 1: Field based coding)
+TopFieldFirst                 : 1           # Field parity order (1: Top field first, 0: Bottom field first)
+ConformanceMode               : 1
+VuiParametersPresent          : 1
+SEIPictureTiming              : 1
+SEIFrameFieldInfo             : 1
+IntraPeriod                   : -1          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 0           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 8          # GOP Size (number of B slice = GOPSize-1)
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0     #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:    B    2   5        -6.5                      0.2590         0          0          1.0      0            0               0             5                5         1 2 9 10 17                  5                   5         1 2 9 10 17
+Frame2:    B    3   5        -6.5                      0.2590         0          0          1.0      0            0               0             6                6         1 2 3 10 11 18               6                   6         1 2 3 10 11 18
+Frame3:    B    4   4        -6.5                      0.2590         0          0          1.0      0            0               0             7                7         1 2 3 4 11 12 19             7                   7         1 2 3 4 11 12 19
+Frame4:    B    5   4        -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 3 4 5 12 13 20           8                   8         1 2 3 4 5 12 13 20
+Frame5:    B    6   5        -6.5                      0.2590         0          0          1.0      0            0               0             7                7         1 2 5 6 13 14 21             7                   7         1 2 5 6 13 14 21
+Frame6:    B    7   5        -6.5                      0.2590         0          0          1.0      0            0               0             8                8         1 2 3 6 7 14 15 22           8                   8         1 2 3 6 7 14 15 22
+Frame7:    B    8   1         0.0                      0.0            0          0          1.0      0            0               0             7                7         1 2 7 8 15 16 23             7                   7         1 2 7 8 15 16 23
+Frame8:    B    9   1         0.0                      0.0            0          0          1.0      0            0               0             8                8         1 2 3 8 9 16 17 24           8                   8         1 2 3 8 9 16 17 24
diff --git a/cfg/field/VTM_encoder_randomaccess_field_GOP16.cfg b/cfg/field/VTM_encoder_randomaccess_field_GOP16.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..040922b18643bb2a18a3c6cf964f68980f923363
--- /dev/null
+++ b/cfg/field/VTM_encoder_randomaccess_field_GOP16.cfg
@@ -0,0 +1,26 @@
+FieldCoding                   : 1           # (0: Frame based coding, 1: Field based coding)
+TopFieldFirst                 : 1           # Field parity order (1: Top field first, 0: Bottom field first)
+ConformanceMode               : 1
+VuiParametersPresent          : 1
+SEIPictureTiming              : 1
+SEIFrameFieldInfo             : 1
+IntraPeriod                   : 32          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 1           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 16          # GOP Size (number of B slice = GOPSize-1)
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:  B    17  1        0.0                      0.0        0          0          0.442    0            0              0             4                4          16 17 20 21                 4                4           16 17 20 21
+Frame2:  B    16  1        0.0                      0.0        0          0          0.442    0            0              0             3                3          15 16 -1                    3                3           -1 15 16
+Frame3:  B    8   2        0.0                      0.0        0          0          0.3536   0            0              1             2                4          7 8 -8 -9                   2                4           -8 -9 7 8
+Frame4:  B    9   2        0.0                      0.0        0          0          0.3536   0            0              1             2                5          1 8 9 -7 -8                 2                5           -7 -8 1 8 9
+Frame5:  B    4   3        0.0                      0.0        0          0          0.3536   0            0              2             2                4          3 4 -4 -5                   2                4           -4 -5 -12 -13
+Frame6:  B    5   3        0.0                      0.0        0          0          0.3536   0            0              2             2                5          1 4 5 -3 -4                 2                4           -3 -4 -11 -12
+Frame7:  B    2   4        0.0                      0.0        0          0          0.68     0            0              3             2                4          1 2 -2 -3                   2                6           -2 -3 -6 -7 -14 -15
+Frame8:  B    3   4        0.0                      0.0        0          0          0.68     0            0              3             2                4          1 2 -1 -2                   2                6           -1 -2 -5 -6 -13 -14
+Frame9:  B    6   4        0.0                      0.0        0          0          0.68     0            0              3             2                4          1 2 -2 -3                   2                4           -2 -3 -10 -11
+Frame10: B    7   4        0.0                      0.0        0          0          0.68     0            0              3             2                5          1 2 3 -1 -2                 2                4           -1 -2 -9 -10
+Frame11: B    12  3        0.0                      0.0        0          0          0.3536   0            0              2             2                4          3 4 -4 -5                   2                4           -4 -5 3 4
+Frame12: B    13  3        0.0                      0.0        0          0          0.3536   0            0              2             2                5          1 4 5 -3 -4                 2                5           -3 -4 1 4 5
+Frame13: B    10  4        0.0                      0.0        0          0          0.68     0            0              3             2                4          1 2 -2 -3                   2                4           -2 -3 -6 -7
+Frame14: B    11  4        0.0                      0.0        0          0          0.68     0            0              3             2                5          1 2 3 -1 -2                 2                4           -1 -2 -5 -6
+Frame15: B    14  4        0.0                      0.0        0          0          0.68     0            0              3             2                5          1 2 5 -2 -3                 2                4           -2 -3 1 2
+Frame16: B    15  4        0.0                      0.0        0          0          0.68     0            0              3             2                4          1 2 3 6                     2                4           -1 -2 1 2
diff --git a/cfg/field/VTM_encoder_randomaccess_field_GOP32.cfg b/cfg/field/VTM_encoder_randomaccess_field_GOP32.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..34435952483b8187452b7b4602392b34516d24dc
--- /dev/null
+++ b/cfg/field/VTM_encoder_randomaccess_field_GOP32.cfg
@@ -0,0 +1,42 @@
+FieldCoding                   : 1           # (0: Frame based coding, 1: Field based coding)
+TopFieldFirst                 : 1           # Field parity order (1: Top field first, 0: Bottom field first)
+ConformanceMode               : 1
+VuiParametersPresent          : 1
+SEIPictureTiming              : 1
+SEIFrameFieldInfo             : 1
+IntraPeriod                   : 64          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 1           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 32          # GOP Size (number of B slice = GOPSize-1)
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:   B   33   1        0.0                   0.0            0          0          1.0      0            0              0             4                4          32 33 48 49                 4                4           32 33 48 49
+Frame2:   B   32   1        0.0                   0.0            0          0          1.0      0            0              0             4                5          31 32 47 48 -1              4                5           -1 31 32 47 48
+Frame3:   B   16   1       -4.8848                0.2061         0          0          1.0      0            0              1             4                4          15 16 31 32                 4                4           -16 -17 15 16
+Frame4:   B   17   1       -4.8848                0.2061         0          0          1.0      0            0              1             4                5          1 16 17 32 33               4                4           -15 -16 16 17
+Frame5:   B    8   4       -5.7476                0.2286         0          0          1.0      0            0              2             4                4          7 8 23 24                   4                4           -8 -9 -24 -25
+Frame6:   B    9   4       -5.7476                0.2286         0          0          1.0      0            0              2             4                5          1 8 9 24 25                 4                4           -7 -8 -23 -24
+Frame7:   B    4   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                4          3 4 19 20                   4                6           -4 -5 -12 -13 -28 -29
+Frame8:   B    5   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                5          1 4 5 20 21                 4                6           -3 -4 -11 -12 -27 -28
+Frame9:   B    2   6       -7.1444                0.3            0          0          1.0      0            0              4             4                4          1 2 -2 -3                   4                8           -2 -3 -6 -7 -14 -15 -30 -31
+Frame10:  B    3   6       -7.1444                0.3            0          0          1.0      0            0              4             4                5          1 2 3 -1 -2                 4                8           -1 -2 -5 -6 -13 -14 -29 -30
+Frame11:  B    6   6       -7.1444                0.3            0          0          1.0      0            0              4             4                4          1 2 5 6                     4                6           -2 -3 -10 -11 -26 -27
+Frame12:  B    7   6       -7.1444                0.3            0          0          1.0      0            0              4             4                5          1 2 3 6 7                   4                6           -1 -2 -9 -10 -25 -26
+Frame13:  B   12   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                4          3 4 11 12                   4                4           -4 -5 -20 -21
+Frame14:  B   13   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                5          1 4 5 12 13                 4                4           -3 -4 -19 -20
+Frame15:  B   10   6       -7.1444                0.3            0          0          1.0      0            0              4             4                4          1 2 9 10                    4                6           -2 -3 -6 -7 -22 -23
+Frame16:  B   11   6       -7.1444                0.3            0          0          1.0      0            0              4             4                5          1 2 3 10 11                 4                6           -1 -2 -5 -6 -21 -22
+Frame17:  B   14   6       -7.1444                0.3            0          0          1.0      0            0              4             4                6          1 2 5 6 13 14               4                4           -2 -3 -18 -19
+Frame18:  B   15   6       -7.1444                0.3            0          0          1.0      0            0              4             4                7          1 2 3 6 7 14 15             4                4           -1 -2 -17 -18
+Frame19:  B   24   4       -5.7476                0.2286         0          0          1.0      0            0              2             4                4          7 8 23 24                   4                4           -8 -9 7 8
+Frame20:  B   25   4       -5.7476                0.2286         0          0          1.0      0            0              2             4                5          1 8 9 24 25                 4                4           -7 -8 8 9
+Frame21:  B   20   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                4          3 4 19 20                   4                4           -4 -5 -12 -13
+Frame22:  B   21   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                5          1 4 5 20 21                 4                4           -3 -4 -11 -12
+Frame23:  B   18   6       -7.1444                0.3            0          0          1.0      0            0              4             4                4          1 2 17 18                   4                6           -2 -3 -6 -7 -14 -15
+Frame24:  B   19   6       -7.1444                0.3            0          0          1.0      0            0              4             4                5          1 2 3 18 19                 4                6           -1 -2 -5 -6 -13 -14
+Frame25:  B   22   6       -7.1444                0.3            0          0          1.0      0            0              4             4                6          1 2 5 6 21 22               4                4           -2 -3 -10 -11
+Frame26:  B   23   6       -7.1444                0.3            0          0          1.0      0            0              4             4                7          1 2 3 6 7 22 23             4                4           -1 -2 -9 -10
+Frame27:  B   28   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                6          3 4 11 12 27 28             4                4           -4 -5 3 4
+Frame28:  B   29   5       -5.90                  0.2333         0          0          1.0      0            0              3             4                7          1 4 5 12 13 28 29           4                4           -3 -4 4 5
+Frame29:  B   26   6       -7.1444                0.3            0          0          1.0      0            0              4             4                6          1 2 9 10 25 26              4                4           -2 -3 -6 -7
+Frame30:  B   27   6       -7.1444                0.3            0          0          1.0      0            0              4             4                7          1 2 3 10 11 26 27           4                4           -1 -2 -5 -6
+Frame31:  B   30   6       -7.1444                0.3            0          0          1.0      0            0              4             4                8          1 2 5 6 13 14 29 30         4                4           -2 -3 1 2
+Frame32:  B   31   6       -7.1444                0.3            0          0          1.0      0            0              4             4                9          1 2 3 6 7 14 15 30 31       4                4           -1 -2 2 3
diff --git a/cfg/layers.cfg b/cfg/layers.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..f083583f89767ae10f384c6dab19ebcc4dc2e28a
--- /dev/null
+++ b/cfg/layers.cfg
@@ -0,0 +1,19 @@
+#======== Layers ===============
+MaxLayers                     : 2
+MaxSublayers                  : 1
+AllLayersSameNumSublayersFlag : 0
+AllIndependentLayersFlag      : 0
+#======== OLSs ===============
+EachLayerIsAnOlsFlag          : 0
+OlsModeIdc                    : 2
+NumOutputLayerSets            : 2
+OlsOutputLayer1               : 1 0
+#======== Layer-0 ===============
+LayerId0                      : 0
+#======== Layer-1 ===============
+LayerId1                      : 1
+NumRefLayers1                 : 1
+RefLayerIdx1                  : 0
+
+
+
diff --git a/cfg/lossless/lossless.cfg b/cfg/lossless/lossless.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..a0a00a933635eb13a64910aead5ba7c280832343
--- /dev/null
+++ b/cfg/lossless/lossless.cfg
@@ -0,0 +1,19 @@
+CostMode                     : lossless
+ChromaTS                     : 1
+DepQuant                     : 0
+RDOQ                         : 0
+RDOQTS                       : 0
+SBT                          : 0
+LMCSEnable                   : 0
+ISP                          : 0
+MTS                          : 0
+LFNST                        : 0
+JointCbCr                    : 0
+LoopFilterDisable            : 1
+SAO                          : 0
+ALF                          : 0
+DMVR                         : 0
+BIO                          : 0
+PROF                         : 0
+Log2MaxTbSize                : 5
+InternalBitDepth             : 0
\ No newline at end of file
diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RasterScanSlice.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RasterScanSlice.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..7487dc814d1464b5616ade8693b73286b6daee37
--- /dev/null
+++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RasterScanSlice.cfg
@@ -0,0 +1,159 @@
+#======== File I/O =====================
+BitstreamFile                 : str.bin
+ReconFile                     : rec.yuv
+
+#======== Profile ================
+Profile                       : auto
+
+#======== Unit definition ================
+MaxCUWidth                    : 64          # Maximum coding unit width in pixel
+MaxCUHeight                   : 64          # Maximum coding unit height in pixel
+MaxPartitionDepth             : 4           # Maximum coding unit depth
+
+#======== Coding Structure =============
+IntraPeriod                   : 32          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 1           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 16          # GOP Size (number of B slice = GOPSize-1)
+
+IntraQPOffset                 : -3
+LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:   B   16   1        0.0                      0.0            0          0          1.0      0            0              0             2                3          16 32 24                    2                2           16 32
+Frame2:   B    8   1       -4.8848                   0.2061         0          0          1.0      0            0              1             2                2          8 16                        2                2           -8 8
+Frame3:   B    4   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 -12
+Frame4:   B    2   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                3           -2 -6 -14
+Frame5:   B    1   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 -1                        2                4           -1 -3 -7 -15
+Frame6:   B    3   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 3                         2                3           -1 -5 -13
+Frame7:   B    6   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 6                         2                2           -2 -10
+Frame8:   B    5   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 5                         2                3           -1 -3 -11
+Frame9:   B    7   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 7                       2                2           -1 -9
+Frame10:  B   12   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 4
+Frame11:  B   10   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                2           -2 -6
+Frame12:  B    9   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 9                         2                3           -1 -3 -7
+Frame13:  B   11   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 11                      2                2           -1 -5
+Frame14:  B   14   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                3          2 6 14                      2                2           -2 2
+Frame15:  B   13   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 5 13                      2                2           -1 -3
+Frame16:  B   15   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                4          1 3 7 15                    2                2           -1 1
+
+#=========== Motion Search =============
+FastSearch                    : 1           # 0:Full search  1:TZ search
+SearchRange                   : 384         # (0: Search range is a Full frame)
+ASR                           : 1           # Adaptive motion search range
+MinSearchWindow               : 96          # Minimum motion search window size for the adaptive window ME
+BipredSearchRange             : 4           # Search range for bi-prediction refinement
+HadamardME                    : 1           # Use of hadamard measure for fractional ME
+FEN                           : 1           # Fast encoder decision
+FDM                           : 1           # Fast Decision for Merge RD cost
+
+#======== Quantization =============
+QP                            : 32          # Quantization parameter(0-51)
+MaxDeltaQP                    : 0           # CU-based multi-QP optimization
+MaxCuDQPSubdiv                : 0           # Maximum subdiv for CU luma Qp adjustment
+DeltaQpRD                     : 0           # Slice-based multi-QP optimization
+RDOQ                          : 1           # RDOQ
+RDOQTS                        : 1           # RDOQ for transform skip
+
+#=========== Deblock Filter ============
+LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+LoopFilterBetaOffset_div2     : 0           # base_param: -6 ~ 6
+LoopFilterTcOffset_div2       : 0           # base_param: -6 ~ 6
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+
+#=========== Misc. ============
+InternalBitDepth              : 10          # codec operating bit-depth
+
+#=========== Coding Tools =================
+SAO                           : 1           # Sample adaptive offset  (0: OFF, 1: ON)
+TransformSkip                 : 1           # Transform skipping (0: OFF, 1: ON)
+TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1: ON)
+TransformSkipLog2MaxSize      : 5
+SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 1                      # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
+
+# Figure 4 - Section 6.3.1 - 12 tiles and 3 raster-scan slices
+TileColumnWidthArray          : 6 6 6                  # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width
+TileRowHeightArray            : 3 3 3 3                # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height  
+RasterScanSlices              : 1                      # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan)
+RasterSliceSizes              : 2 5 5                  # Raster-scan slice sizes in units of tiles. Last slice size will be repeated uniformly to cover any remaining tiles in the picture
+DisableLoopFilterAcrossTiles  : 0                      # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries  1: do not filter across tile boundaries)
+DisableLoopFilterAcrossSlices : 0                      # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries)
+
+#============ Rate Control ======================
+RateControl                         : 0                # Rate control: enable rate control
+TargetBitrate                       : 1000000          # Rate control: target bitrate, in bps
+KeepHierarchicalBit                 : 2                # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation
+LCULevelRateControl                 : 1                # Rate control: 1: LCU level RC; 0: picture level RC
+RCLCUSeparateModel                  : 1                # Rate control: use LCU level separate R-lambda model
+InitialQP                           : 0                # Rate control: initial QP
+RCForceIntraQP                      : 0                # Rate control: force intra QP to be equal to initial QP
+
+#============ VTM settings ======================
+LoopFilterTcOffset_div2             : 0
+SEIDecodedPictureHash               : 0
+CbQpOffset                          : 1
+CrQpOffset                          : 1
+
+ReWriteParamSets                    : 1
+#============ NEXT ====================
+
+# General
+CTUSize                      : 128
+LCTUFast                     : 1
+
+DualITree                    : 1      # separate partitioning of luma and chroma channels for I-slices
+MinQTLumaISlice              : 8
+MinQTChromaISlice            : 4
+MinQTNonISlice               : 8
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
+
+MTS                          : 1
+MTSIntraMaxCand              : 3
+MTSInterMaxCand              : 4
+SBT                          : 1
+LFNST			     : 1
+ISP                          : 1
+MMVD                         : 1
+Affine                       : 1
+SubPuMvp                     : 1
+MaxNumMergeCand              : 6
+LMChroma                     : 1      # use CCLM only
+DepQuant                     : 1
+IMV                          : 1
+ALF                          : 1
+BCW                          : 1 
+BcwFast                      : 1
+BIO                          : 1 
+CIIP                         : 1
+Triangle                     : 1
+IBC                          : 0      # turned off in CTC
+AllowDisFracMMVD             : 1
+AffineAmvr                   : 1
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 0      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+MRL                          : 1
+MIP                          : 1
+DMVR                         : 1
+SMVD                         : 1
+
+# Fast tools
+PBIntraFast                  : 1
+ISPFast                      : 1
+FastMrg                      : 1
+AMaxBT                       : 1
+FastMIP                      : 0
+FastLFNST		     : 0
+
+# Encoder optimization tools
+AffineAmvrEncOpt             : 1
+MmvdDisNum		     : 6
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
+
+
+
diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSlice.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSlice.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..c0d3ecbbc396d50a6895e9a1b9e17e672e943071
--- /dev/null
+++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSlice.cfg
@@ -0,0 +1,159 @@
+#======== File I/O =====================
+BitstreamFile                 : str.bin
+ReconFile                     : rec.yuv
+
+#======== Profile ================
+Profile                       : auto
+
+#======== Unit definition ================
+MaxCUWidth                    : 64          # Maximum coding unit width in pixel
+MaxCUHeight                   : 64          # Maximum coding unit height in pixel
+MaxPartitionDepth             : 4           # Maximum coding unit depth
+
+#======== Coding Structure =============
+IntraPeriod                   : 32          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 1           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 16          # GOP Size (number of B slice = GOPSize-1)
+
+IntraQPOffset                 : -3
+LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:   B   16   1        0.0                      0.0            0          0          1.0      0            0              0             2                3          16 32 24                    2                2           16 32
+Frame2:   B    8   1       -4.8848                   0.2061         0          0          1.0      0            0              1             2                2          8 16                        2                2           -8 8
+Frame3:   B    4   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 -12
+Frame4:   B    2   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                3           -2 -6 -14
+Frame5:   B    1   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 -1                        2                4           -1 -3 -7 -15
+Frame6:   B    3   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 3                         2                3           -1 -5 -13
+Frame7:   B    6   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 6                         2                2           -2 -10
+Frame8:   B    5   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 5                         2                3           -1 -3 -11
+Frame9:   B    7   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 7                       2                2           -1 -9
+Frame10:  B   12   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 4
+Frame11:  B   10   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                2           -2 -6
+Frame12:  B    9   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 9                         2                3           -1 -3 -7
+Frame13:  B   11   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 11                      2                2           -1 -5
+Frame14:  B   14   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                3          2 6 14                      2                2           -2 2
+Frame15:  B   13   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 5 13                      2                2           -1 -3
+Frame16:  B   15   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                4          1 3 7 15                    2                2           -1 1
+
+#=========== Motion Search =============
+FastSearch                    : 1           # 0:Full search  1:TZ search
+SearchRange                   : 384         # (0: Search range is a Full frame)
+ASR                           : 1           # Adaptive motion search range
+MinSearchWindow               : 96          # Minimum motion search window size for the adaptive window ME
+BipredSearchRange             : 4           # Search range for bi-prediction refinement
+HadamardME                    : 1           # Use of hadamard measure for fractional ME
+FEN                           : 1           # Fast encoder decision
+FDM                           : 1           # Fast Decision for Merge RD cost
+
+#======== Quantization =============
+QP                            : 32          # Quantization parameter(0-51)
+MaxDeltaQP                    : 0           # CU-based multi-QP optimization
+MaxCuDQPSubdiv                : 0           # Maximum subdiv for CU luma Qp adjustment
+DeltaQpRD                     : 0           # Slice-based multi-QP optimization
+RDOQ                          : 1           # RDOQ
+RDOQTS                        : 1           # RDOQ for transform skip
+
+#=========== Deblock Filter ============
+LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+LoopFilterBetaOffset_div2     : 0           # base_param: -6 ~ 6
+LoopFilterTcOffset_div2       : 0           # base_param: -6 ~ 6
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+
+#=========== Misc. ============
+InternalBitDepth              : 10          # codec operating bit-depth
+
+#=========== Coding Tools =================
+SAO                           : 1           # Sample adaptive offset  (0: OFF, 1: ON)
+TransformSkip                 : 1           # Transform skipping (0: OFF, 1: ON)
+TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1: ON)
+TransformSkipLog2MaxSize      : 5
+SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 1                         # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
+
+# Figure 6 - Section 6.3.1 - 4 tiles and 4 rectangular slices
+TileColumnWidthArray          : 9 9                       # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width
+TileRowHeightArray            : 6 6                       # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height  
+RasterScanSlices              : 0                         # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan)
+RectSlicePositions            : 0 206 9 35 45 107 117 215 # Rectangular slice positions. List containing pairs of top-left CTU RS address followed by bottom-right CTU RS address
+DisableLoopFilterAcrossTiles  : 0                         # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries  1: do not filter across tile boundaries)
+DisableLoopFilterAcrossSlices : 0                         # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries)
+
+#============ Rate Control ======================
+RateControl                         : 0                # Rate control: enable rate control
+TargetBitrate                       : 1000000          # Rate control: target bitrate, in bps
+KeepHierarchicalBit                 : 2                # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation
+LCULevelRateControl                 : 1                # Rate control: 1: LCU level RC; 0: picture level RC
+RCLCUSeparateModel                  : 1                # Rate control: use LCU level separate R-lambda model
+InitialQP                           : 0                # Rate control: initial QP
+RCForceIntraQP                      : 0                # Rate control: force intra QP to be equal to initial QP
+
+#============ VTM settings ======================
+LoopFilterTcOffset_div2             : 0
+SEIDecodedPictureHash               : 0
+CbQpOffset                          : 1
+CrQpOffset                          : 1
+
+ReWriteParamSets                    : 1
+#============ NEXT ====================
+
+# General
+CTUSize                      : 128
+LCTUFast                     : 1
+
+DualITree                    : 1      # separate partitioning of luma and chroma channels for I-slices
+MinQTLumaISlice              : 8
+MinQTChromaISlice            : 4
+MinQTNonISlice               : 8
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
+
+MTS                          : 1
+MTSIntraMaxCand              : 3
+MTSInterMaxCand              : 4
+SBT                          : 1
+LFNST			     : 1
+ISP                          : 1
+MMVD                         : 1
+Affine                       : 1
+SubPuMvp                     : 1
+MaxNumMergeCand              : 6
+LMChroma                     : 1      # use CCLM only
+DepQuant                     : 1
+IMV                          : 1
+ALF                          : 1
+BCW                          : 1 
+BcwFast                      : 1
+BIO                          : 1 
+CIIP                         : 1
+Triangle                     : 1
+IBC                          : 0      # turned off in CTC
+AllowDisFracMMVD             : 1
+AffineAmvr                   : 1
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 0      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+MRL                          : 1
+MIP                          : 1
+DMVR                         : 1
+SMVD                         : 1
+
+# Fast tools
+PBIntraFast                  : 1
+ISPFast                      : 1
+FastMrg                      : 1
+AMaxBT                       : 1
+FastMIP                      : 0
+FastLFNST		     : 0
+
+# Encoder optimization tools
+AffineAmvrEncOpt             : 1
+MmvdDisNum		     : 6
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
+
+
+
diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSliceFixedSize.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSliceFixedSize.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..9f29eb4d055d13f850e6e6b47573ca8e5d51250b
--- /dev/null
+++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_RectangularSliceFixedSize.cfg
@@ -0,0 +1,160 @@
+#======== File I/O =====================
+BitstreamFile                 : str.bin
+ReconFile                     : rec.yuv
+
+#======== Profile ================
+Profile                       : auto
+
+#======== Unit definition ================
+MaxCUWidth                    : 64          # Maximum coding unit width in pixel
+MaxCUHeight                   : 64          # Maximum coding unit height in pixel
+MaxPartitionDepth             : 4           # Maximum coding unit depth
+
+#======== Coding Structure =============
+IntraPeriod                   : 32          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 1           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 16          # GOP Size (number of B slice = GOPSize-1)
+
+IntraQPOffset                 : -3
+LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:   B   16   1        0.0                      0.0            0          0          1.0      0            0              0             2                3          16 32 24                    2                2           16 32
+Frame2:   B    8   1       -4.8848                   0.2061         0          0          1.0      0            0              1             2                2          8 16                        2                2           -8 8
+Frame3:   B    4   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 -12
+Frame4:   B    2   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                3           -2 -6 -14
+Frame5:   B    1   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 -1                        2                4           -1 -3 -7 -15
+Frame6:   B    3   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 3                         2                3           -1 -5 -13
+Frame7:   B    6   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 6                         2                2           -2 -10
+Frame8:   B    5   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 5                         2                3           -1 -3 -11
+Frame9:   B    7   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 7                       2                2           -1 -9
+Frame10:  B   12   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 4
+Frame11:  B   10   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                2           -2 -6
+Frame12:  B    9   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 9                         2                3           -1 -3 -7
+Frame13:  B   11   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 11                      2                2           -1 -5
+Frame14:  B   14   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                3          2 6 14                      2                2           -2 2
+Frame15:  B   13   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 5 13                      2                2           -1 -3
+Frame16:  B   15   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                4          1 3 7 15                    2                2           -1 1
+
+#=========== Motion Search =============
+FastSearch                    : 1           # 0:Full search  1:TZ search
+SearchRange                   : 384         # (0: Search range is a Full frame)
+ASR                           : 1           # Adaptive motion search range
+MinSearchWindow               : 96          # Minimum motion search window size for the adaptive window ME
+BipredSearchRange             : 4           # Search range for bi-prediction refinement
+HadamardME                    : 1           # Use of hadamard measure for fractional ME
+FEN                           : 1           # Fast encoder decision
+FDM                           : 1           # Fast Decision for Merge RD cost
+
+#======== Quantization =============
+QP                            : 32          # Quantization parameter(0-51)
+MaxDeltaQP                    : 0           # CU-based multi-QP optimization
+MaxCuDQPSubdiv                : 0           # Maximum subdiv for CU luma Qp adjustment
+DeltaQpRD                     : 0           # Slice-based multi-QP optimization
+RDOQ                          : 1           # RDOQ
+RDOQTS                        : 1           # RDOQ for transform skip
+
+#=========== Deblock Filter ============
+LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+LoopFilterBetaOffset_div2     : 0           # base_param: -6 ~ 6
+LoopFilterTcOffset_div2       : 0           # base_param: -6 ~ 6
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+
+#=========== Misc. ============
+InternalBitDepth              : 10          # codec operating bit-depth
+
+#=========== Coding Tools =================
+SAO                           : 1           # Sample adaptive offset  (0: OFF, 1: ON)
+TransformSkip                 : 1           # Transform skipping (0: OFF, 1: ON)
+TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1: ON)
+TransformSkipLog2MaxSize      : 5
+SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 1                         # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
+
+# 24 tiles and 6 rectangular slices
+TileColumnWidthArray          : 3 3 3 3 3 3               # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width
+TileRowHeightArray            : 3 3 3 3                   # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height  
+RasterScanSlices              : 0                         # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan)
+RectSliceFixedWidth           : 2                         # Fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead)
+RectSliceFixedHeight          : 2                         # Fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead)
+DisableLoopFilterAcrossTiles  : 0                         # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries  1: do not filter across tile boundaries)
+DisableLoopFilterAcrossSlices : 0                         # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries)
+
+#============ Rate Control ======================
+RateControl                         : 0                # Rate control: enable rate control
+TargetBitrate                       : 1000000          # Rate control: target bitrate, in bps
+KeepHierarchicalBit                 : 2                # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation
+LCULevelRateControl                 : 1                # Rate control: 1: LCU level RC; 0: picture level RC
+RCLCUSeparateModel                  : 1                # Rate control: use LCU level separate R-lambda model
+InitialQP                           : 0                # Rate control: initial QP
+RCForceIntraQP                      : 0                # Rate control: force intra QP to be equal to initial QP
+
+#============ VTM settings ======================
+LoopFilterTcOffset_div2             : 0
+SEIDecodedPictureHash               : 0
+CbQpOffset                          : 1
+CrQpOffset                          : 1
+
+ReWriteParamSets                    : 1
+#============ NEXT ====================
+
+# General
+CTUSize                      : 128
+LCTUFast                     : 1
+
+DualITree                    : 1      # separate partitioning of luma and chroma channels for I-slices
+MinQTLumaISlice              : 8
+MinQTChromaISlice            : 4
+MinQTNonISlice               : 8
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
+
+MTS                          : 1
+MTSIntraMaxCand              : 3
+MTSInterMaxCand              : 4
+SBT                          : 1
+LFNST			     : 1
+ISP                          : 1
+MMVD                         : 1
+Affine                       : 1
+SubPuMvp                     : 1
+MaxNumMergeCand              : 6
+LMChroma                     : 1      # use CCLM only
+DepQuant                     : 1
+IMV                          : 1
+ALF                          : 1
+BCW                          : 1 
+BcwFast                      : 1
+BIO                          : 1 
+CIIP                         : 1
+Triangle                     : 1
+IBC                          : 0      # turned off in CTC
+AllowDisFracMMVD             : 1
+AffineAmvr                   : 1
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 0      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+MRL                          : 1
+MIP                          : 1
+DMVR                         : 1
+SMVD                         : 1
+
+# Fast tools
+PBIntraFast                  : 1
+ISPFast                      : 1
+FastMrg                      : 1
+AMaxBT                       : 1
+FastMIP                      : 0
+FastLFNST		     : 0
+
+# Encoder optimization tools
+AffineAmvrEncOpt             : 1
+MmvdDisNum		     : 6
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
+
+
+
diff --git a/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_SingleTilePerSlice.cfg b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_SingleTilePerSlice.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..4f9a7857f0daa71a9d60e94f5848555e39cd23c8
--- /dev/null
+++ b/cfg/nonCTC-SliceConfigExamples/encoder_randomaccess_vtm_SingleTilePerSlice.cfg
@@ -0,0 +1,157 @@
+#======== File I/O =====================
+BitstreamFile                 : str.bin
+ReconFile                     : rec.yuv
+
+#======== Profile ================
+Profile                       : auto
+
+#======== Unit definition ================
+MaxCUWidth                    : 64          # Maximum coding unit width in pixel
+MaxCUHeight                   : 64          # Maximum coding unit height in pixel
+MaxPartitionDepth             : 4           # Maximum coding unit depth
+
+#======== Coding Structure =============
+IntraPeriod                   : 32          # Period of I-Frame ( -1 = only first)
+DecodingRefreshType           : 1           # Random Accesss 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize                       : 16          # GOP Size (number of B slice = GOPSize-1)
+
+IntraQPOffset                 : -3
+LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
+#        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
+Frame1:   B   16   1        0.0                      0.0            0          0          1.0      0            0              0             2                3          16 32 24                    2                2           16 32
+Frame2:   B    8   1       -4.8848                   0.2061         0          0          1.0      0            0              1             2                2          8 16                        2                2           -8 8
+Frame3:   B    4   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 -12
+Frame4:   B    2   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                3           -2 -6 -14
+Frame5:   B    1   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 -1                        2                4           -1 -3 -7 -15
+Frame6:   B    3   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 3                         2                3           -1 -5 -13
+Frame7:   B    6   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 6                         2                2           -2 -10
+Frame8:   B    5   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 5                         2                3           -1 -3 -11
+Frame9:   B    7   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 7                       2                2           -1 -9
+Frame10:  B   12   4       -5.7476                   0.2286         0          0          1.0      0            0              2             2                2          4 12                        2                2           -4 4
+Frame11:  B   10   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                2          2 10                        2                2           -2 -6
+Frame12:  B    9   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                2          1 9                         2                3           -1 -3 -7
+Frame13:  B   11   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 3 11                      2                2           -1 -5
+Frame14:  B   14   5       -5.90                     0.2333         0          0          1.0      0            0              3             2                3          2 6 14                      2                2           -2 2
+Frame15:  B   13   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                3          1 5 13                      2                2           -1 -3
+Frame16:  B   15   6       -7.1444                   0.3            0          0          1.0      0            0              4             2                4          1 3 7 15                    2                2           -1 1
+
+#=========== Motion Search =============
+FastSearch                    : 1           # 0:Full search  1:TZ search
+SearchRange                   : 384         # (0: Search range is a Full frame)
+ASR                           : 1           # Adaptive motion search range
+MinSearchWindow               : 96          # Minimum motion search window size for the adaptive window ME
+BipredSearchRange             : 4           # Search range for bi-prediction refinement
+HadamardME                    : 1           # Use of hadamard measure for fractional ME
+FEN                           : 1           # Fast encoder decision
+FDM                           : 1           # Fast Decision for Merge RD cost
+
+#======== Quantization =============
+QP                            : 32          # Quantization parameter(0-51)
+MaxDeltaQP                    : 0           # CU-based multi-QP optimization
+MaxCuDQPSubdiv                : 0           # Maximum subdiv for CU luma Qp adjustment
+DeltaQpRD                     : 0           # Slice-based multi-QP optimization
+RDOQ                          : 1           # RDOQ
+RDOQTS                        : 1           # RDOQ for transform skip
+
+#=========== Deblock Filter ============
+LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+LoopFilterBetaOffset_div2     : 0           # base_param: -6 ~ 6
+LoopFilterTcOffset_div2       : 0           # base_param: -6 ~ 6
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+
+#=========== Misc. ============
+InternalBitDepth              : 10          # codec operating bit-depth
+
+#=========== Coding Tools =================
+SAO                           : 1           # Sample adaptive offset  (0: OFF, 1: ON)
+TransformSkip                 : 1           # Transform skipping (0: OFF, 1: ON)
+TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1: ON)
+TransformSkipLog2MaxSize      : 5
+SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
+
+#============ Tiles / Slices ================
+EnablePicPartitioning         : 1                      # Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
+TileColumnWidthArray          : 1 2 3 4                # Tile column widths in units of CTUs. Last column width will be repeated uniformly to cover any remaining picture width
+TileRowHeightArray            : 1 2 3 4                # Tile row heights in units of CTUs. Last row height will be repeated uniformly to cover any remaining picture height  
+RasterScanSlices              : 1                      # Raster-scan or rectangular slices (0: rectangular, 1: raster-scan)
+RasterSliceSizes              : 1                      # Raster-scan slice sizes in units of tiles. Last slice size will be repeated uniformly to cover any remaining tiles in the picture
+DisableLoopFilterAcrossTiles  : 0                      # Loop filtering (DBLK/SAO/ALF) applied across tile boundaries or not (0: filter across tile boundaries  1: do not filter across tile boundaries)
+DisableLoopFilterAcrossSlices : 0                      # Loop filtering (DBLK/SAO/ALF) applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries)
+
+#============ Rate Control ======================
+RateControl                         : 0                # Rate control: enable rate control
+TargetBitrate                       : 1000000          # Rate control: target bitrate, in bps
+KeepHierarchicalBit                 : 2                # Rate control: 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation
+LCULevelRateControl                 : 1                # Rate control: 1: LCU level RC; 0: picture level RC
+RCLCUSeparateModel                  : 1                # Rate control: use LCU level separate R-lambda model
+InitialQP                           : 0                # Rate control: initial QP
+RCForceIntraQP                      : 0                # Rate control: force intra QP to be equal to initial QP
+
+#============ VTM settings ======================
+LoopFilterTcOffset_div2             : 0
+SEIDecodedPictureHash               : 0
+CbQpOffset                          : 1
+CrQpOffset                          : 1
+
+ReWriteParamSets                    : 1
+#============ NEXT ====================
+
+# General
+CTUSize                      : 128
+LCTUFast                     : 1
+
+DualITree                    : 1      # separate partitioning of luma and chroma channels for I-slices
+MinQTLumaISlice              : 8
+MinQTChromaISlice            : 4
+MinQTNonISlice               : 8
+MaxMTTHierarchyDepth         : 3
+MaxMTTHierarchyDepthISliceL  : 3
+MaxMTTHierarchyDepthISliceC  : 3
+
+MTS                          : 1
+MTSIntraMaxCand              : 3
+MTSInterMaxCand              : 4
+SBT                          : 1
+LFNST			     : 1
+ISP                          : 1
+MMVD                         : 1
+Affine                       : 1
+SubPuMvp                     : 1
+MaxNumMergeCand              : 6
+LMChroma                     : 1      # use CCLM only
+DepQuant                     : 1
+IMV                          : 1
+ALF                          : 1
+BCW                          : 1 
+BcwFast                      : 1
+BIO                          : 1 
+CIIP                         : 1
+Triangle                     : 1
+IBC                          : 0      # turned off in CTC
+AllowDisFracMMVD             : 1
+AffineAmvr                   : 1
+LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
+LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSUpdateCtrl               : 0      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
+MRL                          : 1
+MIP                          : 1
+DMVR                         : 1
+SMVD                         : 1
+
+# Fast tools
+PBIntraFast                  : 1
+ISPFast                      : 1
+FastMrg                      : 1
+AMaxBT                       : 1
+FastMIP                      : 0
+FastLFNST		     : 0
+
+# Encoder optimization tools
+AffineAmvrEncOpt             : 1
+MmvdDisNum		     : 6
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
+
+
+
diff --git a/cfg/per-class/classF.cfg b/cfg/per-class/classF.cfg
index 52ae3dfa2c608bae4325b0c19695db04bd0ce589..0edc6f8c15bcee5cdd03a22df2d25406613a7c33 100644
--- a/cfg/per-class/classF.cfg
+++ b/cfg/per-class/classF.cfg
@@ -1,3 +1,3 @@
 IBC : 1
 HashME : 1
-
+BDPCM: 1
diff --git a/cfg/per-class/classH1.cfg b/cfg/per-class/classH1.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..a6b09ffa1f9acffa0b742fe305e391b30c4a5283
--- /dev/null
+++ b/cfg/per-class/classH1.cfg
@@ -0,0 +1,27 @@
+# ======== Luma adaptive QP ==========
+LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
+isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
+
+# ======= LMCS =======================
+LMCSEnable                    : 1           # turned on in HDR CTC 
+LMCSSignalType                : 1           # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSOffset                    : 1           # chroma residual scaling offset
+
+#======== Chroma QP scale ============
+WCGPPSEnable                  : 0           # enable WCG Chroma scale
+
+CbQpOffset                    : 0
+CrQpOffset                    : 0
+
+SameCQPTablesForAllChroma     : 0
+QpInValCb                     : 13 20 36 38 43 54
+QpOutValCb                    : 13 21 29 29 32 37
+QpInValCr                     : 13 20 37 41 44 54
+QpOutValCr                    : 13 21 27 29 32 37
+QpInValCbCr                   : 12 21 41 43 54
+QpOutValCbCr                  : 12 22 30 32 37
+
+VerCollocatedChroma           : 1
+
+#======== HDR Metrics ============
+CalculateHdrMetrics           : 1           # Calculate HDR metrics for Class H1 (PQ) content
diff --git a/cfg/per-class/classH2.cfg b/cfg/per-class/classH2.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..1792d27b2a577d359a67badd4e571c5cd507a3be
--- /dev/null
+++ b/cfg/per-class/classH2.cfg
@@ -0,0 +1,16 @@
+# ======== Luma adaptive QP ========
+LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
+
+# ======= LMCS =======================
+LMCSEnable                    : 1           # turned on in HLG CTC 
+LMCSSignalType                : 2           # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
+LMCSOffset                    : 0           # chroma residual scaling offset
+
+#======== Chroma QP scale ============
+WCGPPSEnable                  : 0           # enable WCG Chroma scale
+
+CbQpOffset                          : 0
+CrQpOffset                          : 0
+SameCQPTablesForAllChroma           : 1
+QpInValCb                           : 9 23 33 42
+QpOutValCb                          : 9 24 33 37
diff --git a/cfg/per-sequence-HDR/H1_BalloonFestival.cfg b/cfg/per-sequence-HDR/H1_BalloonFestival.cfg
index 150521a880c2e700e4a085dbf1c7944eecef4044..ad5aa2663a0a8ca924b56e382db9bd04ebee9abc 100644
--- a/cfg/per-sequence-HDR/H1_BalloonFestival.cfg
+++ b/cfg/per-sequence-HDR/H1_BalloonFestival.cfg
@@ -9,15 +9,3 @@ SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 240         # Number of frames to be coded
 
 Level                         : 4.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
-WCGPPSCbQpScale               : 1.14        # Scale factor depending on capture and representation color space
-WCGPPSCrQpScale               : 1.79        # Scale factor depending on capture and representation color space 
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
diff --git a/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg b/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg
index 540461fa8e9a2dc715e746f3616f3bef47b5e7ef..47162c8d732f10c609199d172b1a0cb2fa0657f8 100644
--- a/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg
+++ b/cfg/per-sequence-HDR/H1_Cosmos1_TreeTrunk.cfg
@@ -11,12 +11,5 @@ FramesToBeEncoded             : 240         # Number of frames to be coded
 Level                         : 4.1
 
 #======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
 WCGPPSCbQpScale               : 1.04        # Scale factor depending on capture and representation color space
 WCGPPSCrQpScale               : 1.39        # Scale factor depending on capture and representation color space 
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
diff --git a/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg b/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg
index 4d1f44ef8932ffdc459d96074fa26b882a39db8b..0f0b8e4f14b575199c0e83de4055af98a4511889 100644
--- a/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg
+++ b/cfg/per-sequence-HDR/H1_EBU_Hurdles.cfg
@@ -9,14 +9,3 @@ SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 500         # Number of frames to be coded
 
 Level                         : 4.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
-WCGPPSCbQpScale               : 1.14        # Scale factor depending on capture and representation color space
-WCGPPSCrQpScale               : 1.79        # Scale factor depending on capture and representation color space 
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
diff --git a/cfg/per-sequence-HDR/H1_EBU_Starting.cfg b/cfg/per-sequence-HDR/H1_EBU_Starting.cfg
index 34505213bf9a2b06f1136744183452662b8e5e60..701df4b7e6b522d0862f9de5b8a4f5b8a9f0c33d 100644
--- a/cfg/per-sequence-HDR/H1_EBU_Starting.cfg
+++ b/cfg/per-sequence-HDR/H1_EBU_Starting.cfg
@@ -9,14 +9,3 @@ SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 500         # Number of frames to be coded
 
 Level                         : 4.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
-WCGPPSCbQpScale               : 1.14        # Scale factor depending on capture and representation color space
-WCGPPSCrQpScale               : 1.79        # Scale factor depending on capture and representation color space 
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
diff --git a/cfg/per-sequence-HDR/H1_Market.cfg b/cfg/per-sequence-HDR/H1_Market.cfg
index 56b955537bc3ca755beb5abfde571a49e846196e..c4675e6fd4af0ff5f20705ffcbfb34dccf324800 100644
--- a/cfg/per-sequence-HDR/H1_Market.cfg
+++ b/cfg/per-sequence-HDR/H1_Market.cfg
@@ -9,14 +9,3 @@ SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 400         # Number of frames to be coded
 
 Level                         : 4.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
-WCGPPSCbQpScale               : 1.14        # Scale factor depending on capture and representation color space
-WCGPPSCrQpScale               : 1.79        # Scale factor depending on capture and representation color space 
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
diff --git a/cfg/per-sequence-HDR/H1_ShowGirl.cfg b/cfg/per-sequence-HDR/H1_ShowGirl.cfg
index 2a6ff3d3d22de6c6cfdac7ad8b8d545baa3d185f..cdda6c4c10d5f80c7a26e3c8d425a7d3cb0aeb79 100644
--- a/cfg/per-sequence-HDR/H1_ShowGirl.cfg
+++ b/cfg/per-sequence-HDR/H1_ShowGirl.cfg
@@ -11,12 +11,11 @@ FramesToBeEncoded             : 339         # Number of frames to be coded
 Level                         : 4.1
 
 #======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
 WCGPPSCbQpScale               : 1.04        # Scale factor depending on capture and representation color space
-WCGPPSCrQpScale               : 1.39        # Scale factor depending on capture and representation color space 
+WCGPPSCrQpScale               : 1.39        # Scale factor depending on capture and representation color space
 
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
+#======== HDR Metrics ============
+CropOffsetLeft                : 10
+CropOffsetTop                 : 10
+CropOffsetRight               : -10
+CropOffsetBottom              : -10
diff --git a/cfg/per-sequence-HDR/H1_SunRise.cfg b/cfg/per-sequence-HDR/H1_SunRise.cfg
index 31c70dfb7a9a7992b0e74b01e63257ba7f99d344..7f6bb5b28882d05a59c8595a44ece14da03bc735 100644
--- a/cfg/per-sequence-HDR/H1_SunRise.cfg
+++ b/cfg/per-sequence-HDR/H1_SunRise.cfg
@@ -9,14 +9,3 @@ SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 200         # Number of frames to be coded
 
 Level                         : 4.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 1           # enable WCG Chroma scale
-WCGPPSChromaQpScale           : -0.46       # Linear chroma QP offset mapping (scale) based on QP
-WCGPPSChromaQpOffset          : 9.26        # Linear chroma QP offset mapping (offset) based on QP
-WCGPPSCbQpScale               : 1.14        # Scale factor depending on capture and representation color space
-WCGPPSCrQpScale               : 1.79        # Scale factor depending on capture and representation color space 
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 1           # Change luma delta QP based on average luma
-isSDR                         : 0           # 1: SDR in PQ container,   0: HDR
diff --git a/cfg/per-sequence-HDR/H2_DayStreet.cfg b/cfg/per-sequence-HDR/H2_DayStreet.cfg
index 58a92baa6581a3dbffad0f3fefc31d584c2d74af..99e77a0fe91e46fe518ce78c3cbeff3b64c8db3c 100644
--- a/cfg/per-sequence-HDR/H2_DayStreet.cfg
+++ b/cfg/per-sequence-HDR/H2_DayStreet.cfg
@@ -9,10 +9,3 @@ SourceHeight                  : 2160        # Input  frame height
 FramesToBeEncoded             : 300         # Number of frames to be coded
 
 Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
-
diff --git a/cfg/per-sequence-HDR/H2_DayStreet_C2.cfg b/cfg/per-sequence-HDR/H2_DayStreet_C2.cfg
deleted file mode 100644
index 1ee6bf149bdcec1310f78583e87f916d484443c8..0000000000000000000000000000000000000000
--- a/cfg/per-sequence-HDR/H2_DayStreet_C2.cfg
+++ /dev/null
@@ -1,18 +0,0 @@
-#======== File I/O ===============
-InputFile                     : DayStreet_3840x2160_60p_10bit_420_hlg.yuv
-InputBitDepth                 : 10          # Input bitdepth
-InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
-FrameRate                     : 60          # Frame Rate per second
-FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
-FramesToBeEncoded             : 600         # Number of frames to be coded
-
-Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
-
diff --git a/cfg/per-sequence-HDR/H2_FlyingBirds2_C2.cfg b/cfg/per-sequence-HDR/H2_FlyingBirds2.cfg
similarity index 73%
rename from cfg/per-sequence-HDR/H2_FlyingBirds2_C2.cfg
rename to cfg/per-sequence-HDR/H2_FlyingBirds2.cfg
index f029fea88c93367b4fbf3dd38bc811e79c58cdba..18f8fb078878b7a313402f03ff5226e16d3f79c9 100644
--- a/cfg/per-sequence-HDR/H2_FlyingBirds2_C2.cfg
+++ b/cfg/per-sequence-HDR/H2_FlyingBirds2.cfg
@@ -9,10 +9,3 @@ SourceHeight                  : 2160        # Input  frame height
 FramesToBeEncoded             : 300         # Number of frames to be coded
 
 Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
-
diff --git a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg b/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg
index 67db9625ae551783cac173ccd3b542ab288ac4c2..9412ad6837f8765d4bfc722e1ad9d1fefa5b74fa 100644
--- a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg
+++ b/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter.cfg
@@ -9,10 +9,3 @@ SourceHeight                  : 2160        # Input  frame height
 FramesToBeEncoded             : 300         # Number of frames to be coded
 
 Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
-
diff --git a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter_C2.cfg b/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter_C2.cfg
deleted file mode 100644
index 18f585160328263f3a6ea32188d5e8465490f70c..0000000000000000000000000000000000000000
--- a/cfg/per-sequence-HDR/H2_PeopleInShoppingCenter_C2.cfg
+++ /dev/null
@@ -1,18 +0,0 @@
-#======== File I/O ===============
-InputFile                     : PeopleInShoppingCenter_3840x2160_60p_10bit_420_hlg.yuv
-InputBitDepth                 : 10          # Input bitdepth
-InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
-FrameRate                     : 60          # Frame Rate per second
-FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
-FramesToBeEncoded             : 600         # Number of frames to be coded
-
-Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
-
diff --git a/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg b/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg
index 50fa5c60946351a767b47dba980c6cb9987b86b3..c673c0e9e890295fb5af84cb8f0f08c69bab1dc0 100644
--- a/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg
+++ b/cfg/per-sequence-HDR/H2_SunsetBeach2.cfg
@@ -9,9 +9,3 @@ SourceHeight                  : 2160        # Input  frame height
 FramesToBeEncoded             : 300         # Number of frames to be coded
 
 Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
\ No newline at end of file
diff --git a/cfg/per-sequence-HDR/H2_SunsetBeach2_C2.cfg b/cfg/per-sequence-HDR/H2_SunsetBeach2_C2.cfg
deleted file mode 100644
index d85a664678cd714c9acf9e1f54312492228a6fc8..0000000000000000000000000000000000000000
--- a/cfg/per-sequence-HDR/H2_SunsetBeach2_C2.cfg
+++ /dev/null
@@ -1,18 +0,0 @@
-#======== File I/O ===============
-InputFile                     : SunsetBeach2_3840x2160p_60_10b_HLG_420.yuv
-InputBitDepth                 : 10          # Input bitdepth
-InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
-FrameRate                     : 60          # Frame Rate per second
-FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
-FramesToBeEncoded             : 600         # Number of frames to be coded
-
-Level                         : 5.1
-
-#======== Chroma QP scale =============
-WCGPPSEnable                  : 0           # enable WCG Chroma scale
-
-# ======== Luma adaptive QP ========
-LumaLevelToDeltaQPMode        : 0           # Change luma delta QP based on average luma
-
diff --git a/cfg/per-sequence/Robot_444.cfg b/cfg/per-sequence/Robot_444.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..e8495c94387ce92c117befb66b91275c3a60d939
--- /dev/null
+++ b/cfg/per-sequence/Robot_444.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : sc_robot_1280x720_30_8bit_300_444.yuv
+InputBitDepth                 : 8           # Input bitdepth
+InputChromaFormat             : 444         # Ratio of luminance to chrominance samples
+FrameRate                     : 30          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1280        # Input  frame width
+SourceHeight                  : 720         # Input  frame height
+FramesToBeEncoded             : 300         # Number of frames to be coded
+
+Level                         : 6.2
diff --git a/cfg/per-sequence/Robot_RGB.cfg b/cfg/per-sequence/Robot_RGB.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..6fc4be981268717be82746b1119b1431680b034a
--- /dev/null
+++ b/cfg/per-sequence/Robot_RGB.cfg
@@ -0,0 +1,14 @@
+#======== File I/O ===============
+InputFile                     : sc_robot_1280x720_30_8bit_300.rgb
+InputBitDepth                 : 8           # Input bitdepth
+InputChromaFormat             : 444         # Ratio of luminance to chrominance samples
+FrameRate                     : 30          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1280        # Input  frame width
+SourceHeight                  : 720         # Input  frame height
+FramesToBeEncoded             : 300         # Number of frames to be coded
+InputColourSpaceConvert       : RGBtoGBR    # Non-normative colour space conversion to apply to input video
+SNRInternalColourSpace        : 1           # Evaluate SNRs in GBR order
+OutputInternalColourSpace     : 0           # Convert recon output back to RGB order. Use --OutputColourSpaceConvert GBRtoRGB on decoder to produce a matching output file.
+
+Level                         : 6.2
diff --git a/cfg/rpr/scale1.5x.cfg b/cfg/rpr/scale1.5x.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..f9733d0dbc11f436fcc50799d8abc7f91b2238d5
--- /dev/null
+++ b/cfg/rpr/scale1.5x.cfg
@@ -0,0 +1,6 @@
+# Reference picture resampling CE settings for scaling ratio and number of encoded frames
+
+ScalingRatioHor                     : 1.5
+ScalingRatioVer                     : 1.5
+FractionNumFrames                   : 0.5
+UpscaledOutput                      : 1
\ No newline at end of file
diff --git a/cfg/rpr/scale2.0x.cfg b/cfg/rpr/scale2.0x.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..f0b9a6e92b924879d7fc5dfbe3be46c409c30f5e
--- /dev/null
+++ b/cfg/rpr/scale2.0x.cfg
@@ -0,0 +1,6 @@
+# Reference picture resampling CE settings for scaling ratio and number of encoded frames
+
+ScalingRatioHor                     : 2.0
+ScalingRatioVer                     : 2.0
+FractionNumFrames                   : 0.5
+UpscaledOutput                      : 1
diff --git a/cfg/sei_vui/alternative_transfer_characteristics.cfg b/cfg/sei_vui/alternative_transfer_characteristics.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..8b72d975f659d2bb076ccbe34d300528e7d8c8df
--- /dev/null
+++ b/cfg/sei_vui/alternative_transfer_characteristics.cfg
@@ -0,0 +1,2 @@
+#======== Alternative transfer characteristics SEI message =====================
+SEIPreferredTransferCharacterisics      : 18
diff --git a/cfg/sei_vui/ambient_viewing_environment.cfg b/cfg/sei_vui/ambient_viewing_environment.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..9ac5c6a24a30a4bd90f5f000ccc88344ed842c84
--- /dev/null
+++ b/cfg/sei_vui/ambient_viewing_environment.cfg
@@ -0,0 +1,5 @@
+#======== Ambient viewing environment SEI message =====================
+SEIAVEEnabled                           : 1
+SEIAVEAmbientIlluminance                : 100000
+SEIAVEAmbientLightX                     : 15635
+SEIAVEAmbientLightY                     : 16450
diff --git a/cfg/sei_vui/content_colour_volume.cfg b/cfg/sei_vui/content_colour_volume.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..b4ea22a809c0d2dc7f22c53a99b49abf86b55e80
--- /dev/null
+++ b/cfg/sei_vui/content_colour_volume.cfg
@@ -0,0 +1,17 @@
+#======== Content Colour Volume SEI message =====================
+SEICCVEnabled                           : 1
+SEICCVCancelFlag                        : 0
+SEICCVPersistenceFlag                   : 1
+SEICCVPrimariesPresent                  : 1
+m_ccvSEIPrimariesX0                     : 0.300
+m_ccvSEIPrimariesY0                     : 0.600
+m_ccvSEIPrimariesX1                     : 0.150
+m_ccvSEIPrimariesY1                     : 0.060
+m_ccvSEIPrimariesX2                     : 0.640
+m_ccvSEIPrimariesY2                     : 0.330
+SEICCVMinLuminanceValuePresent          : 1
+SEICCVMinLuminanceValue                 : 0.0
+SEICCVMaxLuminanceValuePresent          : 1
+SEICCVMaxLuminanceValue                 : 0.1
+SEICCVAvgLuminanceValuePresent          : 1
+SEICCVAvgLuminanceValue                 : 0.01
diff --git a/cfg/sei_vui/content_light_level.cfg b/cfg/sei_vui/content_light_level.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..97129b5d56eedd7b44dab7cdf50c5c5b7dc01a5d
--- /dev/null
+++ b/cfg/sei_vui/content_light_level.cfg
@@ -0,0 +1,4 @@
+#======== Content Light Level SEI message =====================
+SEICLLEnabled                           : 1
+SEICLLMaxContentLightLevel              : 4000
+SEICLLMaxPicAvgLightLevel               : 0
diff --git a/cfg/sei_vui/equirectangular.cfg b/cfg/sei_vui/equirectangular.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..c448477dded8aed2ed548d9d912225f73bc1adce
--- /dev/null
+++ b/cfg/sei_vui/equirectangular.cfg
@@ -0,0 +1,9 @@
+#======== Equirectangular Projection SEI message =====================
+SEIErpEnabled                           : 1
+SEIErpCancelFlag                        : 0
+SEIErpPersistenceFlag                   : 1
+SEIErpGuardBandFlag                     : 1
+SEIErpGuardBandType                     : 0
+SEIErpLeftGuardBandWidth                : 254
+SEIErpRightGuardBandWidth               : 254
+
diff --git a/cfg/sei_vui/film_grain_characterstics.cfg b/cfg/sei_vui/film_grain_characterstics.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..d8c9c739fc3c7d9597fad4246fe61c879d25a823
--- /dev/null
+++ b/cfg/sei_vui/film_grain_characterstics.cfg
@@ -0,0 +1,11 @@
+#======== Film grain characteristics SEI message =====================
+SEIFGCEnabled                           : 1
+SEIFGCCancelFlag                        : 0
+SEIFGCPersistenceFlag                   : 1
+SEIFGCModelID                           : 0  # 0: frequency filtering; 1: auto-regression; 2-3 are reserved
+SEIFGCSepColourDescPresentFlag          : 0  # if not 0, need to specify separate colour description (not implemented in current encoder cmd line)
+SEIFGCBlendingModeID                    : 0  # 0: additive; 1: multipliciative
+SEIFGCLog2ScaleFactor                   : 0
+SEIFGCCompModelPresentComp0             : 0  # if not 0, need to specify model for comp 0 (not implemented in current encoder cmd line)
+SEIFGCCompModelPresentComp1             : 0  # if not 0, need to specify model for comp 1 (not implemented in current encoder cmd line)
+SEIFGCCompModelPresentComp2             : 0  # if not 0, need to specify model for comp 2 (not implemented in current encoder cmd line)
diff --git a/cfg/sei_vui/frame_packing.cfg b/cfg/sei_vui/frame_packing.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..0a8406dcfcb507d7099067121717f2dc6a25a860
--- /dev/null
+++ b/cfg/sei_vui/frame_packing.cfg
@@ -0,0 +1,6 @@
+#======== Frame Packing SEI message =====================
+SEIFramePacking                         : 1
+SEIFramePackingId                       : 0
+SEIFramePackingType                     : 3
+SEIFramePackingQuincunx                 : 1
+SEIFramePackingInterpretation           : 0
diff --git a/cfg/sei_vui/generalized_cubemap_projection.cfg b/cfg/sei_vui/generalized_cubemap_projection.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..82cd6d8a491d7df90875bf3cd40bd1b74cce9a83
--- /dev/null
+++ b/cfg/sei_vui/generalized_cubemap_projection.cfg
@@ -0,0 +1,16 @@
+#======== Generalized Cubemap Projection SEI message =====================
+SEIGcmpEnabled                           : 1
+SEIGcmpCancelFlag                        : 0
+SEIGcmpPersistenceFlag                   : 1
+SEIGcmpPackingType                       : 2
+SEIGcmpMappingFunctionType               : 2
+SEIGcmpFaceIndex                         : 5 0 4 3 1 2
+SEIGcmpFaceRotation                      : 2 2 2 2 2 2
+SEIGcmpFunctionCoeffU                    : 0.28 0.28 0.28 0.28 0.28 0.28
+SEIGcmpFunctionUAffectedByVFlag          : 0 0 0 0 0 0
+SEIGcmpFunctionCoeffV                    : 0.4 0.4 0.4 0.28 0.4 0.28
+SEIGcmpFunctionVAffectedByUFlag          : 1 1 1 0 1 0
+SEIGcmpGuardBandFlag                     : 1
+SEIGcmpGuardBandBoundaryType             : 1
+SEIGcmpGuardBandSamplesMinus1            : 15
+
diff --git a/cfg/sei_vui/mastering_display_colour_volume.cfg b/cfg/sei_vui/mastering_display_colour_volume.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..4566d40061a53fd4f8cbaee4eabe44176f0f2584
--- /dev/null
+++ b/cfg/sei_vui/mastering_display_colour_volume.cfg
@@ -0,0 +1,6 @@
+#======== Mastering Display Colour Volume SEI message =====================
+SEIMasteringDisplayColourVolume         : 1
+SEIMasteringDisplayMaxLuminance         : 10000
+SEIMasteringDisplayMinLuminance         : 0
+SEIMasteringDisplayPrimaries            : 0 50000 0 0 50000 0
+SEIMasteringDisplayWhitePoint           : 16667 16667
diff --git a/cfg/sei_vui/omni_viewport.cfg b/cfg/sei_vui/omni_viewport.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..8a61f4db61a7b44ff918c522c0571fc8387d7814
--- /dev/null
+++ b/cfg/sei_vui/omni_viewport.cfg
@@ -0,0 +1,12 @@
+#======== Omni Viewport SEI message =====================
+SEIOmniViewportEnabled                  : 1
+SEIOmniViewportId                       : 0
+SEIOmniViewportCancelFlag               : 0
+SEIOmniViewportPersistenceFlag          : 1
+SEIOmniViewportCntMinus1                : 2
+SEIOmniViewportAzimuthCentre            : -5898240 5898240 0
+SEIOmniViewportElevationCentre          : -5898240 5898240 0
+SEIOmniViewportTiltCentre               : -11796480 5898240 0
+SEIOmniViewportHorRange                 : 2949120 2949120 2949120
+SEIOmniViewportVerRange                 : 2949120 2949120 2949120
+
diff --git a/cfg/sei_vui/region_wise_packing.cfg b/cfg/sei_vui/region_wise_packing.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..7f04516dfccb58497fa687ce0b28483eefa0d110
--- /dev/null
+++ b/cfg/sei_vui/region_wise_packing.cfg
@@ -0,0 +1,27 @@
+#======== Region-wise packing SEI message =====================
+SEIRwpEnabled                           : 1
+SEIRwpCancelFlag                        : 0
+SEIRwpPersistenceFlag                   : 1
+SEIRwpConstituentPictureMatchingFlag    : 1   
+SEIRwpNumPackedRegions                  : 6
+SEIRwpProjPictureWidth                  : 2048
+SEIRwpProjPictureHeight                 : 1024
+SEIRwpPackedPictureWidth                : 1536
+SEIRwpPackedPictureHeight               : 768
+SEIRwpTransformType                     : 0 1 2 3 4 5
+SEIRwpGuardBandFlag                     : 1 1 1 1 1 1
+SEIRwpProjRegionWidth                   : 512 512 512 512 512 512
+SEIRwpProjRegionHeight                  : 128 128 128 128 128 128
+SEIRwpProjRegionTop                     : 256 256 256 256 256 256
+SEIRwpProjRegionLeft                    : 0 256 512 768 1024 1280
+SEIRwpPackedRegionWidth                 : 512 512 512 384 384 384
+SEIRwpPackedRegionHeight                : 128 128 128 192 192 192
+SEIRwpPackedRegionTop                   : 64 64 64 64 64 64
+SEIRwpPackedRegionLeft                  : 32 32 32 32 32 32
+SEIRwpLeftGuardBandWidth                : 64 64 64 64 64 64
+SEIRwpRightGuardBandWidth               : 128 128 128 128 128 128
+SEIRwpTopGuardBandHeight                : 192 192 192 192 192 192
+SEIRwpBottomGuardBandHeight             : 255 255 255 255 255 255
+SEIRwpGuardBandNotUsedForPredFlag       : 1 1 1 1 1 1
+SEIRwpGuardBandType                     : 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 0 0 0 0 1 1 1 1
+
diff --git a/cfg/sei_vui/sample_aspect_ratio.cfg b/cfg/sei_vui/sample_aspect_ratio.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..b33547473cf6062f619b684f5061293c9cc1b15f
--- /dev/null
+++ b/cfg/sei_vui/sample_aspect_ratio.cfg
@@ -0,0 +1,6 @@
+SEISampleAspectRatioInfo:  1
+SEISARICancelFlag:         0
+SEISARIPersistenceFlag:    1
+SEISARIAspectRatioIdc:     255
+SEISARISarWidth:           1
+SEISARISarHeight:          1
diff --git a/cfg/sei_vui/sphere_rotation.cfg b/cfg/sei_vui/sphere_rotation.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..be144e63b058a18465ad0c6a469ab8f0a2074d73
--- /dev/null
+++ b/cfg/sei_vui/sphere_rotation.cfg
@@ -0,0 +1,8 @@
+#======== Sphere Rotation SEI message =====================
+SEISphereRotationEnabled                : 1
+SEISphereRotationCancelFlag             : 0
+SEISphereRotationPersistenceFlag        : 1
+SEISphereRotationYaw                    : -5898240
+SEISphereRotationPitch                  : -5898240
+SEISphereRotationRoll                   : -11796480
+
diff --git a/cfg/sei_vui/subpicture_level.cfg b/cfg/sei_vui/subpicture_level.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..46a3dce7aa0fcf354ec5a6a973acdfe47d6c2161
--- /dev/null
+++ b/cfg/sei_vui/subpicture_level.cfg
@@ -0,0 +1,2 @@
+SEISubpicureLevelInfo: 1
+
diff --git a/cfg/sei_vui/timing.cfg b/cfg/sei_vui/timing.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..63f37a73919737867258e173f4c611176003d05d
--- /dev/null
+++ b/cfg/sei_vui/timing.cfg
@@ -0,0 +1,4 @@
+SEIBufferingPeriod:  1
+SEIPictureTiming:  1
+VuiParametersPresent: 1
+RCCpbSize: 2000
diff --git a/cfg/sei_vui/vui_HD.cfg b/cfg/sei_vui/vui_HD.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..6705b1da5f934e0d1d41b4d9bf627a1af0b97073
--- /dev/null
+++ b/cfg/sei_vui/vui_HD.cfg
@@ -0,0 +1,17 @@
+VuiParametersPresent:            1      // enable VUI
+AspectRatioInfoPresent:          1      // enable presence of sample aspect ratio information
+AspectRatioIdc:                  1      // sample aspect ratio pre-defined types according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+SarWidth:                        1      // sample aspect ratio width, if AspectRatioIdc is equal to 255
+SarHeight:                       1      // sample aspect ratio height, if AspectRatioIdc is equal to 255
+ColourDescriptionPresent:        1      // enable presence of colour description information
+ColourPrimaries:                 1      // the source colour primaries according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+TransferCharacteristics:         1      // transfer characteristics function according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+MatrixCoefficients:              1      // the formulae used in deriving luma and chroma signals acc. to Rec. ITU-T H.273 | ISO/IEC 23091-2
+VideoFullRange:                  0      // scaling and offset values applied according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+ChromaLocInfoPresent:            1      // enable presence of chroma location information
+ChromaSampleLocTypeTopField:     0      // the location of chroma sample top field
+ChromaSampleLocTypeBottomField:  0      // the location of chroma sample bottom field
+ChromaSampleLocType:             0      // the location of chroma sample frame
+OverscanInfoPresent:             1      // enable presence of overscan information
+OverscanAppropriate:             0      // indicates if the cropped decoded pictures output are suitable for display using overscan
+
diff --git a/cfg/sei_vui/vui_UHD_PQ.cfg b/cfg/sei_vui/vui_UHD_PQ.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..717815029e845436e2f792e9b0d6015c9014e610
--- /dev/null
+++ b/cfg/sei_vui/vui_UHD_PQ.cfg
@@ -0,0 +1,17 @@
+VuiParametersPresent:            1      // enable VUI
+AspectRatioInfoPresent:          1      // enable presence of sample aspect ratio information
+AspectRatioIdc:                  1      // sample aspect ratio pre-defined types according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+SarWidth:                        1      // sample aspect ratio width, if AspectRatioIdc is equal to 255
+SarHeight:                       1      // sample aspect ratio height, if AspectRatioIdc is equal to 255
+ColourDescriptionPresent:        1      // enable presence of colour description information
+ColourPrimaries:                 9      // the source colour primaries according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+TransferCharacteristics:         16     // transfer characteristics function according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+MatrixCoefficients:              9      // the formulae used in deriving luma and chroma signals acc. to Rec. ITU-T H.273 | ISO/IEC 23091-2
+VideoFullRange:                  0      // scaling and offset values applied according to Rec. ITU-T H.273 | ISO/IEC 23091-2
+ChromaLocInfoPresent:            1      // enable presence of chroma location information
+ChromaSampleLocTypeTopField:     2      // the location of chroma sample top field
+ChromaSampleLocTypeBottomField:  2      // the location of chroma sample bottom field
+ChromaSampleLocType:             2      // the location of chroma sample frame
+OverscanInfoPresent:             1      // enable presence of overscan information
+OverscanAppropriate:             0      // indicates if the cropped decoded pictures output are suitable for display using overscan
+
diff --git a/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py b/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py
index eb351881c9571586e19549ccacfbbf37fb571abc..f231c70c4f0eb392f86cc0daab632ea994a3dc5a 100755
--- a/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py
+++ b/cmake/CMakeBuild/bin/pyhhi/build/app/cmk.py
@@ -11,6 +11,7 @@ import sys
 import pyhhi.build.common.system as system
 import pyhhi.build.common.util as util
 import pyhhi.build.common.ver as ver
+import pyhhi.build.common.bldtools as bldtools
 import pyhhi.build.cmksupp as cmksupp
 from pyhhi.build.common.bldtools import BuildScriptInstaller
 from pyhhi.build.common.error import InvalidCommandLineArgumentError
@@ -24,7 +25,7 @@ class CMakeLauncherApp(object):
         self._cmake_launcher = None
         self._dict_generator_choice = {'linux': ['umake', 'ninja'],
                                        'macosx': ['xcode', 'umake', 'ninja'],
-                                       'windows': ['vs15', 'vs14', 'vs12', 'vs11', 'vs10', 'umake', 'mgwmake', 'ninja']}
+                                       'windows': ['vs16', 'vs15', 'vs14', 'vs12', 'vs11', 'vs10', 'umake', 'mgwmake', 'ninja']}
         self._top_dir = None
         self._cmake_mod_list = ['pyhhi.build.app.cmk',
                                 'pyhhi.build.cmkfnd',
@@ -93,14 +94,14 @@ class CMakeLauncherApp(object):
 %(prog)s [options] [variant=debug,release,relwithdebinfo,minsizerel] [link=static,shared] [toolset=<toolset_spec>] [address-model=32]
 
 %(prog)s is a script front end to cmake to simplify its usage on Linux,
-Windows, MacOSX using cmake's generators "Unix Makefiles", "Xcode" and
-"Visual Studio 15 - Visual Studio 10" and its compilers.
+Windows, MacOSX using cmake's generators "Unix Makefiles", "Ninja", "Xcode" and
+"Visual Studio 16 - Visual Studio 10" and its compilers.
 
 arguments:
   variant:          debug if not specified
   link:             static if not specified
   toolset:          default c++ compiler if not specified
-                    examples/windows: msvc-19.13, msvc-19.0, msvc-18.0, msvc-17.0, msvc-16.0, intel, gcc
+                    examples/windows: msvc-19.1x, msvc-19.0, msvc-18.0, msvc-17.0, msvc-16.0, intel, gcc
                     examples/linux:   gcc-4.9, gcc-5, gcc-6, clang, intel
   address-model=32: windows: builds 32 bit binaries instead of 64 bit binaries
 
@@ -131,7 +132,7 @@ usage examples:
 
         parser.add_argument("-g", "-G", action="store", dest="generator", choices=self._dict_generator_choice[self._sys_info.get_platform()],
                             help="""specify a cmake generator the script has special support for.
-                                    Supported generators: ninja, umake, mgwmake, vs15, vs14, vs12, vs11, vs10, xcode.
+                                    Supported generators: ninja, umake, mgwmake, vs16, vs15, vs14, vs12, vs11, vs10, xcode.
                                     The choices accepted are platform and installation dependent. The environment variable
                                     DEFAULT_CMAKE_GENERATOR may be used to override the default value.""")
 
@@ -155,13 +156,14 @@ usage examples:
         parser.add_argument("--clean-first", action="store_true", dest="clean_first", default=False,
                             help="build target clean first, then build the active target.")
 
-        parser.add_argument("--verbosity", action="store", dest="build_verbosity", choices=['quiet', 'minimal', 'normal', 'detailed', 'diagnostic'], default='minimal',
-                            help="specify msbuild verbosity level [default: %(default)s].")
+        parser.add_argument("--verbosity", action="store", dest="build_verbosity", choices=['cmake', 'quiet', 'minimal', 'normal', 'detailed', 'diagnostic'], default='minimal',
+                            help="""specify (ms)build verbosity level [default: %(default)s]. 
+                                 The choice 'cmake' requires cmake 3.14.x or higher to increase build verbosity for Visual Studio and other generators.""")
 
         util.app_args_add_log_level(parser)
 
         g = parser.add_argument_group("advanced options")
-        g.add_argument("-i", action="store", dest="install_dir", nargs='?', const=os.path.join(self._sys_info.get_home_dir(), 'bin'),
+        g.add_argument("-i", action="store", dest="install_dir", nargs='?', const=os.path.join(self._sys_info.get_home_dir(native=True), 'bin'),
                        help="install this script and exit. The default destination directory is %(const)s.")
 
         g.add_argument("--py-cache-clean", action="store", dest="py_cache_dirs", nargs='+',
@@ -282,11 +284,26 @@ usage examples:
                 # looks like a cross compiler specification which requires a toolchain file matching the toolset spec and the linux system.
                 toolset_spec_norm = self._find_toolchain_file(toolset_spec_norm)
         elif self._sys_info.is_windows():
-            # msvc-19.00 -> normalized to 19.0
-            re_match = re.match(r'msvc-(\d+)\.(\d+)', toolset_spec)
-            if re_match:
-                minor_version = int(re_match.group(2))
-                toolset_spec_norm = "msvc-{0}.{1:d}".format(re_match.group(1), minor_version)
+            if toolset_spec.startswith('msvc-'):
+                msvc_registry = bldtools.MsvcRegistry()
+                if toolset_spec == 'msvc-19.2x':
+                    if msvc_registry.is_version_installed((14, 2)):
+                        cl_version = msvc_registry.get_compiler_version((14, 2))
+                        toolset_spec_norm = "msvc-{0:d}.{1:d}".format(cl_version[0], cl_version[1])
+                    else:
+                        raise InvalidCommandLineArgumentError("toolset={} not available.".format(toolset_spec))
+                elif toolset_spec == 'msvc-19.1x':
+                    if msvc_registry.is_version_installed((14, 1)):
+                        cl_version = msvc_registry.get_compiler_version((14, 1))
+                        toolset_spec_norm = "msvc-{0:d}.{1:d}".format(cl_version[0], cl_version[1])
+                    else:
+                        raise InvalidCommandLineArgumentError("toolset={} not available.".format(toolset_spec))
+                else:
+                    # msvc-19.00 -> normalized to 19.0
+                    re_match = re.match(r'msvc-(\d+)\.(\d+)', toolset_spec)
+                    if re_match:
+                        minor_version = int(re_match.group(2))
+                        toolset_spec_norm = "msvc-{0}.{1:d}".format(re_match.group(1), minor_version)
         elif self._sys_info.is_macosx():
             pass
         else:
diff --git a/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py b/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py
index 078586a78187fdc7b183989ee300e7ba80b86c3b..07754e6e03143caab41b35e0fe4987dce3b5a1c2 100755
--- a/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py
+++ b/cmake/CMakeBuild/bin/pyhhi/build/cmksupp.py
@@ -155,6 +155,7 @@ class CMakeLauncher(object):
                                          'mgwmake': 'MinGW Makefiles',
                                          'ninja': 'Ninja',
                                          'xcode': 'Xcode',
+                                         'vs16': 'Visual Studio 16 2019',
                                          'vs15': 'Visual Studio 15 2017',
                                          'vs14': 'Visual Studio 14 2015',
                                          'vs12': 'Visual Studio 12 2013',
@@ -178,13 +179,30 @@ class CMakeLauncher(object):
                                                            'vs12': ['msvc-18.0', 'msvc-17.0', 'msvc-16.0'],
                                                            'vs11': ['msvc-17.0', 'msvc-16.0'],
                                                            'vs10': ['msvc-16.0']}
+
             # vs15 has not a fixed compiler version and therefore the mapping is generated dynamically.
             if self._msvc_registry.is_version_installed((14, 1)):
                 cl_version = self._msvc_registry.get_compiler_version((14, 1))
                 msvc_str = 'msvc-' + ver.version_tuple_to_str(cl_version[:2])
                 if cl_version[1] < 20:
                     self._dict_to_vs_platform_toolset[msvc_str] = 'v141'
-                    self._dict_generator_alias_to_msvc_toolsets['vs15'] = [msvc_str, 'msvc-19.0', 'msvc-18.0', 'msvc-17.0', 'msvc-16.0']
+                    if not self._msvc_registry.is_vs2019_toolset((14, 1)):
+                        self._dict_generator_alias_to_msvc_toolsets['vs15'] = [msvc_str, 'msvc-19.0', 'msvc-18.0', 'msvc-17.0', 'msvc-16.0']
+                else:
+                    assert False
+
+            # vs16 has not a fixed compiler version and therefore the mapping is generated dynamically.
+            if self._msvc_registry.is_version_installed((14, 2)):
+                cl_version = self._msvc_registry.get_compiler_version((14, 2))
+                msvc_str = 'msvc-' + ver.version_tuple_to_str(cl_version[:2])
+                if cl_version[1] < 30:
+                    self._dict_to_vs_platform_toolset[msvc_str] = 'v142'
+                    msvc_version_list = [msvc_str]
+                    if self._msvc_registry.is_version_installed((14, 1)):
+                        cl_version = self._msvc_registry.get_compiler_version((14, 1))
+                        msvc_version_list.append("msvc-{0:d}.{1:d}".format(cl_version[0], cl_version[1]))
+                    msvc_version_list.extend(['msvc-19.0', 'msvc-18.0', 'msvc-17.0', 'msvc-16.0'])
+                    self._dict_generator_alias_to_msvc_toolsets['vs16'] = msvc_version_list
                 else:
                     assert False
 
@@ -259,38 +277,43 @@ class CMakeLauncher(object):
                     vs_toolset = "Intel C++ Compiler %d.%d" % (compiler_info.version_major_minor[0], compiler_info.version_major_minor[1])
                 else:
                     assert False
-                cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias],
-                              '-T', vs_toolset,
-                              '-A', self._dict_to_vs_platform_name[compiler_info.target_arch]]
+                cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias]]
+                if generator_alias == 'vs16':
+                    if ver.version_compare(compiler_info.version_major_minor, (19, 20)) < 0:
+                        cmake_argv.extend(['-T', self._dict_to_vs_platform_toolset['msvc-' + ver.version_tuple_to_str(compiler_info.version_major_minor)]])
+                    if compiler_info.target_arch != 'x86_64':
+                        cmake_argv.extend(['-A', self._dict_to_vs_platform_name[compiler_info.target_arch]])
+                else:
+                    cmake_argv.extend(['-T', vs_toolset, '-A', self._dict_to_vs_platform_name[compiler_info.target_arch]])
+
             elif generator_alias == 'xcode':
                 cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias]]
             elif generator_alias in ['umake', 'mgwmake', 'ninja']:
                 cmake_argv = ['-G', self._dict_to_cmake_generator[generator_alias],
-                              '-DCMAKE_BUILD_TYPE=' + self._dict_to_cmake_config[cfg]]
+                              '-DCMAKE_BUILD_TYPE:STRING=' + self._dict_to_cmake_config[cfg]]
                 if compiler_info.is_cross_compiler():
-                    cmake_argv.append('-DCMAKE_TOOLCHAIN_FILE=' + compiler_info.cmake_toolchain_file)
+                    cmake_argv.append('-DCMAKE_TOOLCHAIN_FILE:FILEPATH=' + compiler_info.cmake_toolchain_file)
                 else:
                     if compiler_info.cmake_cxx_compiler:
-                        cmake_argv.append('-DCMAKE_CXX_COMPILER=' + compiler_info.cmake_cxx_compiler)
+                        cmake_argv.append('-DCMAKE_CXX_COMPILER:FILEPATH=' + compiler_info.cmake_cxx_compiler)
                     if compiler_info.cmake_c_compiler:
-                        cmake_argv.append('-DCMAKE_C_COMPILER=' + compiler_info.cmake_c_compiler)
+                        cmake_argv.append('-DCMAKE_C_COMPILER:FILEPATH=' + compiler_info.cmake_c_compiler)
             if cmake_argv_optional:
                 # Add any additional arguments to the cmake command line.
                 cmake_argv.extend(cmake_argv_optional)
             if lnk_variant == 'shared':
-                cmake_argv.append('-DBUILD_SHARED_LIBS=1')
+                cmake_argv.append('-DBUILD_SHARED_LIBS:BOOL=ON')
             if self._is_multi_configuration_generator():
                 cmake_config_types = [self._dict_to_cmake_config[x] for x in self._default_config_types]
                 for b_cfg in build_configs:
                     if b_cfg not in self._default_config_types:
                         cmake_config_types.append(self._dict_to_cmake_config[b_cfg])
-                cmake_argv.append('-DCMAKE_CONFIGURATION_TYPES=' + ';'.join(cmake_config_types))
+                cmake_argv.append('-DCMAKE_CONFIGURATION_TYPES:STRING=' + ';'.join(cmake_config_types))
             # cmake_argv.append(self._top_dir)
             # print("launch_config(): cmake_args", cmake_argv)
             # print("build dir:", b_dir)
             # print("top dir:", self._top_dir)
-            if (not self._sys_info.is_windows()) and (ver.version_compare(self._cmake_finder.get_cmake_version(), (3, 13, 0)) >= 0):
-                # Not done for windows yet avoiding potential issues with command line length limits.
+            if ver.version_compare(self._cmake_finder.get_cmake_version(), (3, 13, 0)) >= 0:
                 cmake_argv.extend(['-S', self._top_dir, '-B', b_dir])
                 retv = self.launch_cmake(cmake_argv)
             else:
@@ -410,9 +433,11 @@ class CMakeLauncher(object):
         elif self._sys_info.get_platform() == 'macosx':
             generator_alias = 'xcode'
         elif self._sys_info.get_platform() == 'windows':
-            # e.g. 14.1, 14.0, 12.0 etc.
+            # e.g. 14.2, 14.1, 14.0, 12.0 etc.
             bb_vs_latest_version = self._msvc_registry.get_latest_version()
-            if ver.version_compare(bb_vs_latest_version, (14,1)) == 0:
+            if ver.version_compare(bb_vs_latest_version, (14, 2)) == 0:
+                generator_alias = 'vs16'
+            elif ver.version_compare(bb_vs_latest_version, (14, 1)) == 0:
                 generator_alias = 'vs15'
             else:
                 generator_alias = 'vs' + str(bb_vs_latest_version[0])
@@ -565,8 +590,20 @@ class CMakeLauncher(object):
                 self._add_cmake_build_tool_options(cmake_argv, ['-parallelizeTargets', '-jobs', str(build_jobs)])
 
     def _add_cmake_build_verbosity_option(self, cmake_argv, generator_alias, verbosity_level):
-        if generator_alias.startswith('vs'):
-            self._add_cmake_build_tool_options(cmake_argv, ['/verbosity:' + verbosity_level])
+        if verbosity_level == 'cmake':
+            cmake_version = self._cmake_finder.get_cmake_version()
+            if ver.version_compare(cmake_version, (3, 14)) >= 0:
+                # self._add_cmake_build_tool_options(cmake_argv, ['-v'])
+                # -v is a cmake option and not a build tool option and therefore
+                # it has to be inserted left of '--'
+                if '--' in cmake_argv:
+                    index = cmake_argv.index('--')
+                    cmake_argv.insert(index, '-v')
+                else:
+                    cmake_argv.append('-v')
+        else:
+            if generator_alias.startswith('vs'):
+                self._add_cmake_build_tool_options(cmake_argv, ['/verbosity:' + verbosity_level])
 
     def _add_cmake_build_tool_options(self, cmake_argv, build_tool_options):
         if not build_tool_options:
diff --git a/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py b/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py
index 7db8ae7f1e1f4497ae2203e7114d1ca0578d5c02..bbb6f70b925c50470b04c637fae4faa5f16371b0 100755
--- a/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py
+++ b/cmake/CMakeBuild/bin/pyhhi/build/common/bldtools.py
@@ -23,33 +23,51 @@ class MsvcRegistry(object):
         def __init__(self):
             self._logger = logging.getLogger(__name__)
             self._sys_info = system.SystemInfo()
-            self._supported_msvc_versions = ['14.1', '14.0', '12.0', '11.0', '10.0']
+            self._supported_msvc_versions = ['14.2', '14.1', '14.0', '12.0', '11.0', '10.0']
             program_dir = self._sys_info.get_program_dir('x86')
-            self._msvc_install_dir_dict = {'14.1': [os.path.join(program_dir, "Microsoft Visual Studio", '2017', 'Enterprise', 'VC'),
-                                                    os.path.join(program_dir, "Microsoft Visual Studio", '2017', 'Professional', 'VC'),
-                                                    os.path.join(program_dir, "Microsoft Visual Studio", '2017', 'Community', 'VC')],
-                                           '14.0': [os.path.join(program_dir, "Microsoft Visual Studio 14.0", 'VC')],
+            # VS2019, VS2017 come with a locator tool vswhere to search for the installation directory.
+            # The dictionary _msvc_install_dir_dict will be augmented with keys 14.2 and 14.1 by method _do_inventory_vc14x().
+            self._msvc_install_dir_dict = {'14.0': [os.path.join(program_dir, "Microsoft Visual Studio 14.0", 'VC')],
                                            '12.0': [os.path.join(program_dir, "Microsoft Visual Studio 12.0", 'VC')],
                                            '11.0': [os.path.join(program_dir, "Microsoft Visual Studio 11.0", 'VC')],
                                            '10.0': [os.path.join(program_dir, "Microsoft Visual Studio 10.0", 'VC')]}
-
             # a list of sorted version tuples identifying the installed MSVC products
             self._installed_msvc_versions = []
             # key = msvc_version, value = full path of vcvarsall.bat
             self._compiler_command_dict = {}
-            # key = msvc_version, value = options to be passed to the setup command; e.g. -vcvars_ver=14.0
+            # key = msvc_version, value = options to be passed to the setup command; e.g. -vcvars_ver=14.0, -vcvars_ver=14.1x
             self._compiler_command_option_dict = {}
             # key = msvc_version, value = vc version
             self._compiler_version_dict = {}
             # key = msvc_version, value = True/False
             self._is_vs2017_toolset_dict = {}
+            # key = msvc_version, value = True/False; e.g. '14.1' -> True indicates 14.1 is an alternative toolset installed with vs2019.
+            self._is_vs2019_toolset_dict = {}
+            # clear information on alternative toolset upfront
+            for version in self._supported_msvc_versions:
+                self._is_vs2017_toolset_dict[version] = False
+                self._is_vs2019_toolset_dict[version] = False
 
             if self._logger.isEnabledFor(logging.DEBUG):
                 self._logger.debug("performing in-depth VS inventory for debugging.")
                 self._do_inventory_winreg()
-            # Update VS2017 installation paths via vswhere.exe
-            self._do_inventory_vc141()
+            vswhere = self._find_vswhere()
+            if vswhere:
+                # Update VS2019 installation paths via vswhere.exe
+                self._do_inventory_vc14x('14.2', vswhere)
+                # Update VS2017 installation paths via vswhere.exe
+                self._do_inventory_vc14x('14.1', vswhere)
+            else:
+                pass
             self._do_inventory()
+            self._dump_inventory()
+
+        def _dump_inventory(self):
+            if self._logger.isEnabledFor(logging.DEBUG):
+                for version in self._installed_msvc_versions:
+                    version_str = ver.version_tuple_to_str(version)
+                    cl_version_str = ver.version_tuple_to_str(self._compiler_version_dict[version_str])
+                    self._logger.debug("found MSVC version {}, CL version {}, setup={}".format(version_str, cl_version_str, self._compiler_command_dict[version_str]))
 
         def get_compiler_command(self, version=None):
             if version is None:
@@ -87,6 +105,14 @@ class MsvcRegistry(object):
                 return self._is_vs2017_toolset_dict[version_str]
             return False
 
+        def is_vs2019_toolset(self, version):
+            if not self.is_version_installed(version):
+                return False
+            version_str = ver.version_tuple_to_str(version)
+            if version_str in self._is_vs2019_toolset_dict:
+                return self._is_vs2019_toolset_dict[version_str]
+            return False
+
         def _do_inventory(self):
             for version in self._supported_msvc_versions:
                 if version not in self._msvc_install_dir_dict:
@@ -98,13 +124,18 @@ class MsvcRegistry(object):
                     cl_cmd = self._find_cl_cmd(vc_dir, version)
                     if cl_cmd:
                         self._logger.debug("found VC compiler %s", cl_cmd)
-                        if version in ['14.1']:
+                        if version in ['14.2', '14.1']:
                             setup_cmd = os.path.normpath(os.path.join(os.path.dirname(cl_cmd), '..', '..', '..', '..', '..', '..', 'Auxiliary', 'Build', 'vcvarsall.bat'))
                         elif version in ['14.0']:
                             if os.path.exists(os.path.join(vc_dir, '..', 'Common7', 'IDE', 'devenv.exe')):
                                 self._logger.debug("found VS 2015 IDE installed.")
                                 setup_cmd = os.path.join(vc_dir, 'vcvarsall.bat')
-                                self._is_vs2017_toolset_dict[version] = False
+                            elif '14.2' in self._compiler_command_dict:
+                                # We've got 14.0 as an alternative VS 2019 toolset.
+                                self._logger.debug("found msvc-14.0 installed as an alternative VS 2019 toolset.")
+                                setup_cmd = self._compiler_command_dict['14.2']
+                                self._is_vs2019_toolset_dict[version] = True
+                                self._compiler_command_option_dict[version] = '-vcvars_ver=14.0'
                             elif '14.1' in self._compiler_command_dict:
                                 # We've got 14.0 as an alternative VS 2017 toolset.
                                 self._logger.debug("found msvc-14.0 installed as an alternative VS 2017 toolset.")
@@ -119,6 +150,19 @@ class MsvcRegistry(object):
                     cl_version = self._query_msvc_compiler_version(cl_cmd)
                     self._compiler_command_dict[version] = setup_cmd
                     self._compiler_version_dict[version] = cl_version
+                    if (version == '14.2') and ('14.1' not in self._msvc_install_dir_dict):
+                        # Search for alternative toolset vc141 installed with vs2019
+                        self._logger.debug("searching for alternative VS2019 toolset vc141.")
+                        vc_dir in self._msvc_install_dir_dict[version][0]
+                        setup_cmd = self._compiler_command_dict['14.2']
+                        cl_cmd = self._find_cl_cmd(vc_dir, '14.1')
+                        if cl_cmd:
+                            self._logger.debug("found alternative VC compiler {}".format(cl_cmd))
+                            cl_version = self._query_msvc_compiler_version(cl_cmd)
+                            self._compiler_command_dict['14.1'] = setup_cmd
+                            self._compiler_version_dict['14.1'] = cl_version
+                            self._compiler_command_option_dict['14.1'] = '-vcvars_ver=14.1x'
+                            self._is_vs2019_toolset_dict['14.1'] = True
 
             msvc_version_list = []
             for version in self._compiler_version_dict:
@@ -127,32 +171,36 @@ class MsvcRegistry(object):
                 self._installed_msvc_versions = ver.version_list_sort(msvc_version_list)
                 self._installed_msvc_versions.reverse()
                 # print("sorted msvc versions: ", self._installed_msvc_versions)
-                for version in self._installed_msvc_versions:
-                    version_str = ver.version_tuple_to_str(version)
-                    if version_str in ['14.1']:
-                        self._is_vs2017_toolset_dict[version_str] = True
-                    elif version_str not in self._is_vs2017_toolset_dict:
-                        self._is_vs2017_toolset_dict[version_str] = False
 
         def _find_cl_cmd(self, vc_inst_dir, version_str):
             cl_cmd = None
-            if version_str in ['14.1']:
+            if version_str in ['14.2', '14.1']:
                 msvc_dir = os.path.join(vc_inst_dir, 'Tools', 'MSVC')
                 if os.path.exists(msvc_dir):
                     version_dir_list = [ver.version_tuple_from_str(x) for x in os.listdir(msvc_dir) if re.match(r'[0-9.]+$', x)]
                     if version_dir_list:
                         version_dir_list = ver.version_list_sort(version_dir_list)
                         version_dir_list.reverse()
+                        # VS2019 installs toolset v141 side-by-side in a folder named '14.16.27023', toolset v142 is
+                        # installed in a folder named '14.20.27508'.
                         for version in version_dir_list:
+                            if (version_str == '14.2') and (version[1] >= 30):
+                                self._logger.debug("ignoring cl installation folder: {}".format(os.path.join(msvc_dir, ver.version_tuple_to_str(version))))
+                                continue
+                            if (version_str == '14.1') and (version[1] >= 20):
+                                self._logger.debug("ignoring cl installation folder: {}".format(os.path.join(msvc_dir, ver.version_tuple_to_str(version))))
+                                continue
                             cl_cmd = os.path.join(msvc_dir, ver.version_tuple_to_str(version), 'bin', 'HostX64', 'x64', 'cl.exe')
                             if os.path.exists(cl_cmd):
-                                return cl_cmd
+                                break
                             else:
                                 cl_cmd = None
             else:
                 cl_cmd = os.path.join(vc_inst_dir, 'bin', 'amd64', 'cl.exe')
                 if not os.path.exists(cl_cmd):
                     cl_cmd = None
+            if cl_cmd:
+                self._logger.debug("found cl: {}".format(cl_cmd))
             return cl_cmd
 
         def _query_msvc_compiler_version(self, cl_cmd):
@@ -208,39 +256,46 @@ class MsvcRegistry(object):
                         continue
             return vc_install_dir_dict
 
-        def _do_inventory_vc141(self):
-            vswhere = self._find_vswhere()
-            vc_dir_fnd = False
-            if vswhere:
-                self._logger.debug("found VS2017 locator: %s", vswhere)
-                try:
-                    vswhere_argv = [vswhere, '-latest']
-                    # vswhere_argv.extend(['-products', 'Enterprise'])
-                    # vswhere_argv.extend(['-products', 'Professional'])
-                    # vswhere_argv.extend(['-products', 'Community'])
-                    vswhere_argv.extend(['-products', '*'])
-                    vswhere_argv.extend(['-requires', 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64'])
-                    vswhere_argv.extend(['-property', 'installationPath'])
-                    vswhere_argv.extend(['-version', '[15.0,16.0)'])
-                    retv = subprocess.check_output(vswhere_argv, universal_newlines=True).rstrip()
-                    if retv != '':
-                        self._logger.debug("VS2017 install path: %s", retv)
-                        vc_dir = os.path.join(retv, 'VC')
-                        if os.path.exists(vc_dir):
-                            self._logger.debug("VS2017 VC install path: %s", vc_dir)
-                            self._msvc_install_dir_dict['14.1'] = [vc_dir]
-                            vc_dir_fnd = True
-                    else:
-                        self._logger.debug("VS2017 install path: <none>")
-                except subprocess.CalledProcessError:
-                    self._logger.debug("VS2017 locator call failed for some reason.")
+        def _do_inventory_vc14x(self, msvc_version_str, vswhere=None):
+            if msvc_version_str == '14.2':
+                vswhere_version_expr = '[16.0,17.0)'
+                vs_alias_str = 'VS2019'
+            elif msvc_version_str == '14.1':
+                vswhere_version_expr = '[15.0,16.0)'
+                vs_alias_str = 'VS2017'
+            else:
+                assert False
+            if vswhere is None:
+                vswhere = self._find_vswhere()
+            if vswhere is None:
+                self._logger.debug("{0} locator vswhere.exe not found, {0} detection disabled.".format(vs_alias_str))
+                return
             else:
-                self._logger.debug("VS2017 locator vswhere.exe not found, VS2017 detection disabled.")
+                self._logger.debug("found {} locator: {}".format(vs_alias_str, vswhere))
+            vc_dir_fnd = False
+            try:
+                vswhere_argv = [vswhere, '-latest']
+                # vswhere_argv.extend(['-products', 'Enterprise'])
+                # vswhere_argv.extend(['-products', 'Professional'])
+                # vswhere_argv.extend(['-products', 'Community'])
+                vswhere_argv.extend(['-products', '*'])
+                vswhere_argv.extend(['-requires', 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64'])
+                vswhere_argv.extend(['-property', 'installationPath'])
+                vswhere_argv.extend(['-version', vswhere_version_expr])
+                retv = subprocess.check_output(vswhere_argv, universal_newlines=True).rstrip()
+                if retv != '':
+                    self._logger.debug("{} install path: {}".format(vs_alias_str, retv))
+                    vc_dir = os.path.join(retv, 'VC')
+                    if os.path.exists(vc_dir):
+                        self._logger.debug("{} VC install path: {}".format(vs_alias_str, vc_dir))
+                        self._msvc_install_dir_dict[msvc_version_str] = [vc_dir]
+                        vc_dir_fnd = True
+                else:
+                    self._logger.debug("{} install path: <none>".format(vs_alias_str))
+            except subprocess.CalledProcessError:
+                self._logger.debug("{} vswhere locator call failed for some reason.".format(vs_alias_str))
             if not vc_dir_fnd:
-                self._logger.debug("VS2017 VC not found, VS2017 detection disabled.")
-                # Disable VS2017 detection by path?
-                if '14.1' in self._msvc_install_dir_dict:
-                    self._msvc_install_dir_dict.pop('14.1')
+                self._logger.debug("{0} VC not found, {0} detection disabled.".format(vs_alias_str))
 
         def _find_vswhere(self):
             vswhere_prog = None
@@ -264,68 +319,6 @@ class MsvcRegistry(object):
         return getattr(MsvcRegistry.instance, item)
 
 
-class BjamToolset(object):
-    def __init__(self, sys_info, bb_version=None):
-        self._logger = logging.getLogger(__name__)
-        self._bjam_toolset_build_script = None
-
-        if sys_info.is_linux():
-            self._bjam_toolset = 'gcc'
-        elif sys_info.is_macosx():
-            self._bjam_toolset = 'darwin'
-        elif sys_info.is_windows():
-            # On windows the Boost.Build version is required as vsXXXX is only supported by Boost.Build x.y.z or higher.
-            assert bb_version is not None
-            self._msvc_registry = MsvcRegistry()
-
-            # We have to constrain the search for the latest msvc because the bjam build scripts must have support for it.
-            # e.g. vc14 works only for 1.59.0 or higher.
-            if ver.version_compare(bb_version, (1, 50, 0)) < 0:
-                max_msvc_version = (10, 0)
-            elif ver.version_compare(bb_version, (1, 55, 0)) < 0:
-                max_msvc_version = (11, 0)
-            elif ver.version_compare(bb_version, (1, 59, 0)) < 0:
-                max_msvc_version = (12, 0)
-            elif ver.version_compare(bb_version, (1, 64, 0)) < 0:
-                max_msvc_version = (14, 0)
-            elif ver.version_compare(bb_version, (1, 64, 0)) == 0:
-                # This is Boost.Build 1.64.0 or higher.
-                # Since msvc-14.1 is supported by 1.64.0 but building bjam requires a suitable VC command prompt or patched
-                # bjam build scripts, a previous msvc-x.y is preferred.
-                max_msvc_version = None
-                for v in ['14.0', '12.0', '11.0', '10.0']:
-                    if self._msvc_registry.is_version_installed(ver.version_tuple_from_str(v)):
-                        max_msvc_version = ver.version_tuple_from_str(v)
-                        break
-            else:
-                # Allow vc141 for 1.65.0 and higher by default, the toolset detection vswhere is now available.
-                max_msvc_version = None
-
-            # mingw does not work the same way as msvc when launching helper scripts or programs and
-            # is not used to build bjam/b2.
-            msvc_version = self._msvc_registry.get_latest_version(max_msvc_version)
-            msvc_version_str = ver.version_tuple_to_str(msvc_version)
-            self._bjam_toolset = 'msvc-' + msvc_version_str
-
-            # map the msvc toolset spec into a string supported by the bjam build script build.bat
-            if msvc_version[1] > 0:
-                # vc141
-                self._bjam_toolset_build_script = 'vc' + str(msvc_version[0])+ str(msvc_version[1])
-            else:
-                # vc14, vc12, vc11, vc10
-                self._bjam_toolset_build_script = 'vc' + str(msvc_version[0])
-        else:
-            raise Exception('Unknown platform detected, please contact technical support.')
-        if self._bjam_toolset_build_script is None:
-            self._bjam_toolset_build_script = self._bjam_toolset
-
-    def get_bjam_toolset(self, build_script_format=False):
-        if build_script_format:
-            return self._bjam_toolset_build_script
-        else:
-            return self._bjam_toolset
-
-
 class Toolset(object):
 
     class PlatformInfo(object):
@@ -463,6 +456,8 @@ class Toolset(object):
         if self._toolset.startswith('msvc'):
             if self._msvc_registry.is_vs2017_toolset(self._version):
                 s += "VS 2017 toolset!\n"
+            if self._msvc_registry.is_vs2019_toolset(self._version):
+                s += "VS 2019 toolset!\n"
 
         s += "platform(s):\n"
         for platform_info in self._platform_info:
diff --git a/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py b/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py
index d065c13a0878ef863fceb52428fd41791ee74447..ea821ef1046d573193c3c6ef4e0843924c998651 100755
--- a/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py
+++ b/cmake/CMakeBuild/bin/pyhhi/build/common/cmbldver.py
@@ -4,4 +4,4 @@
 # Any manual changes here will be overridden by the next build.
 #-------------
 
-CMAKE_BUILD_VERSION_STR = "3.13.0.2"
+CMAKE_BUILD_VERSION_STR = "3.14.4.4"
diff --git a/cmake/CMakeBuild/bin/pyhhi/build/common/system.py b/cmake/CMakeBuild/bin/pyhhi/build/common/system.py
index 69b02ea583eccf03b5874e87fad9041468c27a9e..b51f0fc6c2d2826e1f2d988592c130d36013d6db 100755
--- a/cmake/CMakeBuild/bin/pyhhi/build/common/system.py
+++ b/cmake/CMakeBuild/bin/pyhhi/build/common/system.py
@@ -98,18 +98,18 @@ class SystemInfo(object):
                     
                 if self._os_arch == 'x86_64':
                     if self._python_arch == 'x86':
-                        self._program_dir = os.getenv('PROGRAMW6432')
-                        self._programx86_dir = os.getenv('PROGRAMFILES')
+                        self._program_dir = os.path.normpath(os.getenv('PROGRAMW6432'))
+                        self._programx86_dir = os.path.normpath(os.getenv('PROGRAMFILES'))
                     else:
-                        self._program_dir = os.getenv('PROGRAMFILES')
-                        self._programx86_dir = os.getenv('PROGRAMFILES(X86)')
+                        self._program_dir = os.path.normpath(os.getenv('PROGRAMFILES'))
+                        self._programx86_dir = os.path.normpath(os.getenv('PROGRAMFILES(X86)'))
                     assert self._programx86_dir is not None
                 elif self._os_arch == 'x86':
-                    self._program_dir = os.getenv('PROGRAMFILES')
+                    self._program_dir = os.path.normpath(os.getenv('PROGRAMFILES'))
                 else:
                     assert False
                 assert self._program_dir is not None
-                self._program_data_dir = os.getenv('PROGRAMDATA')
+                self._program_data_dir = os.path.normpath(os.getenv('PROGRAMDATA'))
 
                 if self._windows_msys:
                     pass
@@ -315,8 +315,12 @@ class SystemInfo(object):
         def get_path(self):
             return self._search_path
 
-        def get_home_dir(self):
-            return self._home_dir
+        def get_home_dir(self, native=False):
+            if self.is_windows_msys() and native:
+                home_dir = os.path.normpath(os.path.expandvars('$USERPROFILE'))
+            else:
+                home_dir = self._home_dir
+            return home_dir
 
         def get_default_proj_home_dir(self):
             return self._default_proj_home_dir
@@ -339,7 +343,7 @@ class SystemInfo(object):
 
         def get_short_path(self, fpath):
             if self.is_windows():
-                fpath = self.get_short_path_win(fpath)
+                fpath = os.path.normpath(self.get_short_path_win(fpath))
             return fpath
 
         def get_short_path_win(self, fpath):
@@ -440,13 +444,13 @@ class SystemInfo(object):
             # make sure the user's home directory exists
             if not os.path.exists(home_dir):
                 raise Exception('home directory "' + home_dir + '" does not exist.')
-            self._home_dir = home_dir
+            self._home_dir = os.path.normpath(home_dir)
 
         def _query_default_proj_home_dir(self):
             if 'PROJ_HOME' in os.environ:
-                proj_home_dir = os.path.expandvars('$PROJ_HOME')
+                proj_home_dir = os.path.normpath(os.path.expandvars('$PROJ_HOME'))
             else:
-                proj_home_dir = os.path.join(self.get_home_dir(), 'projects')
+                proj_home_dir = os.path.join(self.get_home_dir(native=True), 'projects')
             if os.path.exists(proj_home_dir):
                 self._default_proj_home_dir = proj_home_dir
             else:
@@ -459,7 +463,8 @@ class SystemInfo(object):
                 self._search_path.append(util.normalize_path(dir))
 
         def _query_desktop_dir(self):
-            home_dir = self.get_home_dir()
+            # MSYS has its own environment but Desktop comes from the native windows home.
+            home_dir = self.get_home_dir(native=True)
             desktop_dir = os.path.join(home_dir, 'Desktop')
             if os.path.exists(desktop_dir):
                 self._desktop_dir = desktop_dir
diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake
index 202c7e309cea573fcd40af14889ffbbff742eff5..07532ba3ef188edb7a1f52d08434ff82341b0eab 100644
--- a/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake
+++ b/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake
@@ -16,34 +16,30 @@ Unless explicitly disabled by configuration option ``BBuildEnv_EXCLUDE_MODULES``
 module ``BBuildEnv`` loads the following submodules to provide additional support 
 for Boost, Qt5, OpenCV, file downloads, MinGW and CPack:
 
-- :module:`BBuildEnvAddProject` provides macros and functions to add standard
-  subproject like console applications, libraries, samples, UTF tests and Qt
-  applications to a standard workspace. 
-- :module:`BBuildEnvBoost` adds a few utility functions and macros helping to 
-  use locally built Boost libraries. 
-- :module:`BBuildEnvOpenCV`
-- :module:`BBuildEnvQt5`
-- :module:`BBuildEnvDownload`
-- :module:`BBuildEnvVersionUtil`
-- :module:`BBuildEnvCPack` adds a few utility functions helping to create binary
-  distribution packages.   
-- :module:`BBuildEnvMingw` adds helper functions to copy MinGW runtime DLLs.
-
-
-Reserved Identifiers
-^^^^^^^^^^^^^^^^^^^^
-
-Avoiding name clashes in CMakeLists.txt or project specific CMake files all 
-projects including module ``BBuildEnv``, or any of its submodules, are advised 
-not to use CMake variables, functions or macros starting with::
-
-  BBuildEnv, _BBuildEnv, _bb_, bb_, BB_, _BB_
-
-Users may use variables starting with ``BBuildEnv_<var>`` only to configure the 
-behavior of ``BuildEnv`` modules or submodules or evaluate properties of loaded 
-``BuildEnv`` modules or submodules exposed through documented variables 
-``BBuildEnv_<var>``. 
-
+==============================   ===========================================================
+Module                           Description
+==============================   ===========================================================
+:module:`BBuildEnvAddProject`    Macros and functions to add standard
+                                 subproject like console applications, libraries, samples, 
+                                 UTF tests and Qt applications to a standard workspace
+:module:`BBuildEnvGitSvn`        Utility functions to support Git to SVN interoperability.                                   
+:module:`BBuildEnvDownload`      Supports HTTPS downloads of single files at build time
+:module:`BBuildEnvVersionUtil`   Functions to parse version header files
+:module:`BBuildEnvCPack`         Functions helping to create binary distribution packages
+:module:`BBuildEnvBoost`         Macros and functions helping to use locally built Boost 
+                                 libraries. 
+:module:`BBuildEnvOpenCV`        Helper functions to copy OpenCV runtime DLLs
+:module:`BBuildEnvQt5`           Helper functions to copy Qt5 runtime DLLs
+:module:`BBuildEnvMingw`         Helper functions to copy MinGW runtime DLLs on Ubuntu
+==============================   ===========================================================
+
+The following modules are not loaded by default as they provide functionality not needed
+by all main projects.
+
+- :module:`BBuildEnvGit` provides macros and functions to checkout Git repositories at 
+  configuration time to aggregate them into a single build tree.  Similar functionality 
+  is provided by module :module:`FetchContent` with slightly different Git clone and
+  update behavior.
 
 Configuration Options
 ^^^^^^^^^^^^^^^^^^^^^
@@ -51,12 +47,19 @@ Configuration Options
 This module evaluates the following variables at load time allowing users to 
 customize its behavior:
 
-``BBuildEnv_DEBUG``
-  Enable debugging messages.
 ``BBuildEnv_EXCLUDE_MODULES``
   List of submodules to be excluded from loading.  Use ``ALL`` to disable
   loading any submodule.
 
+``BBuildEnv_DEBUG``
+  Enable debugging messages.
+
+``BBuildEnv_USE_LIBRARY_NAME_POSTFIX``
+  A boolean variable to enable a configuration specific library name postfix which
+  allows to install all library or executable variants in the same directory. It's unset/off by
+  default to provide backward compatibility with earlier releases. If enabled executable 
+  targets will need a configuration postfix as well. 
+
 
 How to Use
 ^^^^^^^^^^
@@ -99,12 +102,48 @@ as an svn:external or as a versioned Git subtree:
 Provided Variables
 ^^^^^^^^^^^^^^^^^^
 
+Module ``BBuildEnv`` provides the following output variables 
+which are supposed to be treated readonly:
+
 ``BBuildEnv_VERSION``
   Module's version in decimal dotted format with a maximum of four components.
-
+  
 ``BBuildEnv_MSYS``
   Set to true when using MSYS.
 
+``BBuildEnv_GENERATOR_ALIAS``
+  CMake generator specific build directory. It's a plain name without any path separators.
+  
+  ``umake`` 
+    Unix Makefiles
+ 
+  ``vs16``
+    Microsoft Visual Studio 2019
+    
+  ``vs15``
+    Microsoft Visual Studio 2017
+    
+  ``vs14``
+    Microsoft Visual Studio 2015
+    
+  ``xcode``
+    Xcode generator, switching between different Xcode versions is currently not supported within a single build tree.
+    
+  ``ninja``
+    Ninja generator
+
+``BBuildEnv_<CONFIG>_POSTFIX``
+  Configuration specific postfix strings to support side-by-side installation in the same 
+  directory. 
+
+``BBuildEnv_SHARED_DIR_POSTFIX``
+  A string specific to the shared library configuration to allow for single 
+  output directories or installation directories.  
+
+``BBuildEnv_OUTPUT_DIR_SUFFIX``
+  A generator specific relative path to be used in installation rules to support multiple 
+  generators or compiler versions in combination with the same installation prefix. 
+
 ``BBuildEnv_ROOT_DIR``
   Optional root directory of CMakeBuild customization files. 
 
@@ -151,6 +190,40 @@ Provided Functions and Macros
       Compiler specific flag to enable or disable a warning.
 
 
+.. command:: bb_add_subdirectory
+
+  The ``bb_add_subdirectory()`` macro adds an external in-tree Git subproject 
+  provided variable ``USE_GIT_SUBPROJECTS`` is ON. 
+  The macro silently assumes the subproject is checked out to
+  ``${CMAKE_SOURCE_DIR}/ext/<subproject>``.  If variable ``USE_GIT_SUBPROJECTS`` 
+  is OFF, the macro will invoke :command:`add_subdirectory` for backward compatibility 
+  with SVN repositories and subproject aggregation via SVN externals::
+  
+    bb_add_subdirectory(<subproject>)
+    
+  **Parameters:**
+  
+    ``subproject``
+      A relative path to an in-tree subproject; e.g. ``BoostAddon/src/lib/LoggerLib``
+
+
+.. command:: bb_set_target_output_name
+
+  The ``bb_set_target_output_name`` macro appends a configuration specific postfix to 
+  the output name of executable targets if variable ``BBuildEnv_USE_LIBRARY_NAME_POSTFIX`` 
+  is ON. If applied to library targets, it will change :prop_tgt:`COMPILE_PDB_NAME_<CONFIG>` 
+  for static libraries to align the PDB filename with the library filename. 
+  CMake's postfix machinery does it for linker generated PDB files but not for compiler
+  generated PDB files::
+
+    bb_set_target_output_name( <target> )
+
+  **Parameters:**
+  
+    ``target``
+      An existing target to be modified.
+
+
 .. command:: bb_set_external_dir
 
   The ``bb_set_external_dir()`` function searches for a directory given a 
@@ -158,15 +231,15 @@ Provided Functions and Macros
   a shared folder holding an external project
   without searching any system paths or cross compiler specific paths::
   
-    bb_set_external_dir(<abs_path> <relative_path> [<OPTIONAL>])
+    bb_set_external_dir(<abs_path> <dir> [<OPTIONAL>])
 
   **Parameters:**
 
     ``abs_path``
       Absolute path to ``relative_path`` found in one of the default locations.
 
-    ``relative_path``
-      Path to search for in one of the default locations.  An absolute path 
+    ``dir``
+      Directory to search for in one of the default locations.  An absolute path 
       will be returned as-is.
           
     ``OPTIONAL``
@@ -184,25 +257,26 @@ Provided Functions and Macros
     ``${CMAKE_SOURCE_DIR}/../..``
      
     ``$ENV{HOME}/projects``
-      Ignored on windows host systems.
+      Ignored on native windows host systems. It is searched when MSYS has been detected 
+      or any other non-windows platform.
      
     ``$ENV{USERPROFILE}/projects``
       Ignored on non-windows host systems.
 
 
-.. command:: bb_add_subdirectory
+Reserved Identifiers
+^^^^^^^^^^^^^^^^^^^^
 
-  The ``bb_add_subdirectory()`` macro adds an external in-tree Git subproject. 
-  The macro silently assumes the subproject is checked out to
-  ``${CMAKE_SOURCE_DIR}/ext/<subproject>``::
-  
-    bb_add_subdirectory(<subproject>)
-    
-  **Parameters:**
-  
-    ``subproject``
-      A relative path to an in-tree subproject; e.g. ``BoostAddon/src/lib/LoggerLib``
+Avoiding name clashes in CMakeLists.txt or project specific CMake files all 
+projects including module ``BBuildEnv``, or any of its submodules, are advised 
+not to use CMake variables, functions or macros starting with::
+
+  BBuildEnv, _BBuildEnv, _bb_, bb_, BB_, _BB_
 
+Users may use variables starting with ``BBuildEnv_<var>`` only to configure the 
+behavior of ``BuildEnv`` modules or submodules or evaluate properties of loaded 
+``BuildEnv`` modules or submodules exposed through documented variables 
+``BBuildEnv_<var>``. 
 
 #]===]
 
@@ -224,6 +298,7 @@ set( _BBuildEnvSubmoduleList
      BBuildEnvBoost 
      BBuildEnvQt5
      BBuildEnvOpenCV 
+     BBuildEnvGitSvn
    )
 
 foreach( _cmod IN LISTS _BBuildEnvSubmoduleList )
@@ -251,6 +326,21 @@ macro( bb_add_subdirectory subdirectory_ )
 endmacro()
 
 
+macro( bb_set_target_output_name target_ )
+  if( BBuildEnv_USE_LIBRARY_NAME_POSTFIX ) 
+    get_target_property( _bb_tmp_target_type ${target_} TYPE )
+    if( _bb_tmp_target_type STREQUAL "EXECUTABLE" )
+      set_target_properties( ${target_} PROPERTIES OUTPUT_NAME_DEBUG ${target_}${CMAKE_DEBUG_POSTFIX} 
+                                        OUTPUT_NAME_RELWITHDEBINFO   ${target_}${CMAKE_RELWITHDEBINFO_POSTFIX} 
+                                        OUTPUT_NAME_MINSIZEREL       ${target_}${CMAKE_MINSIZEREL_POSTFIX} )
+    elseif( MSVC AND (_bb_tmp_target_type STREQUAL "STATIC_LIBRARY" ) )
+      # message( STATUS "${target_} is static, setting COMPILE_PDB_NAME_DEBUG ..." )
+      set_target_properties( ${target_} PROPERTIES COMPILE_PDB_NAME_DEBUG ${target_}${CMAKE_DEBUG_POSTFIX} COMPILE_PDB_NAME_RELWITHDEBINFO ${target_}${CMAKE_RELWITHDEBINFO_POSTFIX} )
+    endif()  
+  endif()
+endmacro()
+
+
 macro( bb_save_find_context fnd_ctx )
   if( CMAKE_CROSSCOMPILING )
     # find_package must be told not to expect the BOOST libraries inside "CMAKE_FIND_ROOT_PATH".
@@ -291,14 +381,38 @@ macro( _bb_get_cxx_compiler_version_major_minor version_major_minor_ )
   string( REGEX REPLACE "([0-9]+)\\.([0-9]+)([0-9.]+)?" "\\1.\\2" ${version_major_minor_} ${CMAKE_CXX_COMPILER_VERSION} )
 endmacro()
 
-macro( bb_get_home_dir home_dir_ )
+
+function( bb_get_home_dir home_dir_ )
+  set( _native FALSE )
+  
+  if( ARGC EQUAL 2 )
+    if( ${ARGV1} STREQUAL "NATIVE" )
+      set( _native TRUE )
+    else()
+      message( FATAL_ERROR "bb_get_home_dir: argument ${ARGV1} not understood." )
+    endif()
+  elseif( ARGC GREATER 2 )
+    message( FATAL_ERROR "bb_get_home_dir: too many arguments specified, expected <home_dir> [NATIVE]." )
+  endif()
+    
   if( CMAKE_HOST_WIN32 )
     # Force forward slashes on Windows
-    file( TO_CMAKE_PATH "$ENV{USERPROFILE}" ${home_dir_} )
+    if( BBuildEnv_MSYS )
+      if( _native )
+        file( TO_CMAKE_PATH "$ENV{USERPROFILE}" _home_dir )
+      else()
+        file( TO_CMAKE_PATH "$ENV{HOME}" _home_dir )
+      endif()
+    else()
+      file( TO_CMAKE_PATH "$ENV{USERPROFILE}" _home_dir )
+    endif()
   else()
-    set( ${home_dir_} "$ENV{HOME}" )
+    set( _home_dir "$ENV{HOME}" )
   endif()
-endmacro()
+  
+  set( ${home_dir_} "${_home_dir}" PARENT_SCOPE )
+  
+endfunction()
 
 
 macro( bb_set_home_dir home_dir_ )
@@ -601,10 +715,14 @@ function( _bb_find_proj_home proj_home_ home_dir_ )
   points to a non-existing directory.
       " )
     endif()
-  elseif( IS_DIRECTORY "${home_dir_}/projects" )
+  elseif( EXISTS "${home_dir_}/projects" )
     set( _proj_home "${home_dir_}/projects" )
-  #else()
-  #  get_filename_component( _proj_home ${CMAKE_SOURCE_DIR}/.. REALPATH )
+  elseif( BBuildEnv_MSYS )
+    # Check for %USERPROFILE%/projects as a fallback when using MSYS.
+    bb_get_home_dir( _home_dir NATIVE )
+    if( EXISTS "${_home_dir}/projects" )
+      set( _proj_home "${_home_dir}/projects" )
+    endif()
   endif()
   if( DEFINED _proj_home )
     set( ${proj_home_} "${_proj_home}" PARENT_SCOPE )
@@ -647,8 +765,15 @@ function( bb_set_external_dir dir_var_ dir_ )
         list( APPEND _search_path "${_dir_norm}" )
       endif()
     endforeach()
-    if( EXISTS "${bb_home_dir}/projects" )
-      list( APPEND _search_path "${bb_home_dir}/projects" )
+    bb_get_home_dir( _home_dir )
+    if( EXISTS "${_home_dir}/projects" )
+      list( APPEND _search_path "${_home_dir}/projects" )
+    endif()
+    if( BBuildEnv_MSYS )
+      bb_get_home_dir( _home_dir NATIVE )
+      if( EXISTS "${_home_dir}/projects" )
+        list( APPEND _search_path "${_home_dir}/projects" )
+      endif()
     endif()
     list( REMOVE_DUPLICATES _search_path )
     foreach( _path IN LISTS _search_path )
@@ -1132,18 +1257,6 @@ macro( bb_build_env_setup )
   _bb_find_proj_home( bb_proj_home "${bb_home_dir}" )
 
   # Add a cmake generator alias
-  # --
-  # Visual Studio 15 2017
-  # Visual Studio 14 2015
-  # Visual Studio 12 2013
-  # Visual Studio 11 2012
-  # Visual Studio 10 2010
-  # --
-  # Xcode
-  # Unix Makefiles
-  # Ninja
-  # MinGW Makefiles
-  # --
   unset( bb_generator_alias )
   if( CMAKE_GENERATOR STREQUAL "Unix Makefiles" )
     set( bb_generator_alias "umake" )
@@ -1169,11 +1282,15 @@ macro( bb_build_env_setup )
   
   # set standard output directories: gcc-5.4/x86_64
   if( DEFINED bb_generator_alias )
+    set( BBuildEnv_GENERATOR_ALIAS "${bb_generator_alias}" )
     set( bb_default_output_dir "${bb_generator_alias}/${bb_toolset_subdir}/${bb_platform_dir}" )
   else()
     set( bb_default_output_dir "${bb_toolset_subdir}/${bb_platform_dir}" )
   endif()
   
+  # BBuildEnv_OUTPUT_DIR_SUFFIX could be a cache variable to make it customizable. 
+  set( BBuildEnv_OUTPUT_DIR_SUFFIX           "${bb_default_output_dir}" )
+    
   # the deploy folder may be used to save installer packages.
   set( bb_deploy_dir "${CMAKE_SOURCE_DIR}/deploy" )
     
@@ -1182,23 +1299,63 @@ macro( bb_build_env_setup )
   endif()
   
   if( BUILD_SHARED_LIBS )
-    set( _bb_shared_suffix "-shared" )
+    set( BBuildEnv_SHARED_DIR_POSTFIX "-shared" )
+  else()
+    unset( BBuildEnv_SHARED_DIR_POSTFIX )
+  endif()
+
+  if( NOT DEFINED BBuildEnv_USE_LIBRARY_NAME_POSTFIX )
+    set( BBuildEnv_USE_LIBRARY_NAME_POSTFIX OFF CACHE BOOL "Enable library name postfix" )
   endif()
+    
+  #set( BBuildEnv_RELEASE_POSTFIX "" )
+  set( BBuildEnv_DEBUG_POSTFIX          "-d" )
+  set( BBuildEnv_RELWITHDEBINFO_POSTFIX "-rd" )
+  set( BBuildEnv_MINSIZEREL_POSTFIX     "-mr" )
   
-  set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/debug${_bb_shared_suffix}" )
-  set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/release${_bb_shared_suffix}" )
-  set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/relwithdebinfo${_bb_shared_suffix}" )
-  set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_SOURCE_DIR}/bin/${bb_default_output_dir}/minsizerel${_bb_shared_suffix}" )
   
-  set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/debug${_bb_shared_suffix}" )
-  set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/release${_bb_shared_suffix}" )
-  set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/relwithdebinfo${_bb_shared_suffix}" )
-  set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/minsizerel${_bb_shared_suffix}" )
+  if( BBuildEnv_USE_LIBRARY_NAME_POSTFIX ) 
+    set( CMAKE_DEBUG_POSTFIX                           ${BBuildEnv_DEBUG_POSTFIX} )
+    set( CMAKE_RELWITHDEBINFO_POSTFIX                  ${BBuildEnv_RELWITHDEBINFO_POSTFIX} )
+    set( CMAKE_MINSIZEREL_POSTFIX                      ${BBuildEnv_MINSIZEREL_POSTFIX} )
+
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY                "${CMAKE_SOURCE_DIR}/lib${BBuildEnv_SHARED_DIR_POSTFIX}/${BBuildEnv_OUTPUT_DIR_SUFFIX}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}" )
+    
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG}" )
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE}" )
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO}" )
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL}" )    
+    
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY                "${CMAKE_SOURCE_DIR}/bin${BBuildEnv_SHARED_DIR_POSTFIX}/${BBuildEnv_OUTPUT_DIR_SUFFIX}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" )
+    
+  else()
+    # Using CMake's default library name convention which is the same for all configurations.
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/debug${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/release${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/relwithdebinfo${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_SOURCE_DIR}/lib/${BBuildEnv_OUTPUT_DIR_SUFFIX}/minsizerel${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    
+    
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG}" )
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE}" )
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO}" )
+    set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL}" )    
+    
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/debug${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/release${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/relwithdebinfo${BBuildEnv_SHARED_DIR_POSTFIX}" )
+    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_SOURCE_DIR}/bin/${BBuildEnv_OUTPUT_DIR_SUFFIX}/minsizerel${BBuildEnv_SHARED_DIR_POSTFIX}" )    
+  endif()
+    
   
-  set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG          "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/debug${_bb_shared_suffix}" )
-  set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE        "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/release${_bb_shared_suffix}" )
-  set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/relwithdebinfo${_bb_shared_suffix}" )
-  set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL     "${CMAKE_SOURCE_DIR}/lib/${bb_default_output_dir}/minsizerel${_bb_shared_suffix}" )
 endmacro( bb_build_env_setup )
 
 #message( STATUS "BBuildEnv.cmake: starting: ${CMAKE_GENERATOR}" )
diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake
index 40de4dad5071bdb71e889d7408a789e43bd07585..3f195eb40ab0e4899bb1b9dbef49f4d4e5b6eaef 100644
--- a/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake
+++ b/cmake/CMakeBuild/cmake/modules/BBuildEnvDebug.cmake
@@ -60,6 +60,8 @@ function( bb_dump_cmake_system_info )
   message( STATUS "CMAKE_PROGRAM_PATH:          ${CMAKE_PROGRAM_PATH}" )  
   message( STATUS "CMAKE_SYSTEM_PROGRAM_PATH:   ${CMAKE_SYSTEM_PROGRAM_PATH}" )
   message( STATUS "CMAKE_GENERATOR:             ${CMAKE_GENERATOR}" )
+  message( STATUS "CMAKE_GENERATOR_PLATFORM:    ${CMAKE_GENERATOR_PLATFORM}" )
+  
   if( CMAKE_CXX_COMPILER_LOADED )
     message( STATUS "CMAKE_CXX_COMPILER_LOADED:   on" )
   endif()
@@ -73,6 +75,7 @@ function( bb_dump_cmake_system_info )
     message( STATUS "CMAKE_CXX_COMPILER_ARCHITECTURE_ID:  ${CMAKE_CXX_COMPILER_ARCHITECTURE_ID}" )
   endif()
   if( MSVC )
+    message( STATUS "CMAKE_VS_PLATFORM_NAME:      ${CMAKE_VS_PLATFORM_NAME}" )
     message( STATUS "CMAKE_VS_PLATFORM_NAME:      ${CMAKE_VS_PLATFORM_NAME}" )
     message( STATUS "CMAKE_VS_PLATFORM_TOOLSET:   ${CMAKE_VS_PLATFORM_TOOLSET}" )
     message( STATUS "MSVC_VERSION:                ${MSVC_VERSION}" )
@@ -119,6 +122,10 @@ endfunction( bb_dump_cmake_system_info )
 
 
 function( bb_dump_target_properties target_ prop1_ )
+  if( NOT TARGET ${target_} )
+    message( WARNING "target ${target_} does not exist." )
+    return()
+  endif()
   set( _prop_list ${prop1_} )
   list( APPEND _prop_list ${ARGN} )
   list( LENGTH _prop_list _prop_list_len )
@@ -126,9 +133,12 @@ function( bb_dump_target_properties target_ prop1_ )
   foreach( _prop ${_prop_list} )
     get_target_property( _prop_value ${target_} ${_prop} )
     if( _prop_value )
-      message( STATUS "bb_dump_target_properties: ${target_}: ${_prop}=${_prop_value}" )
+      message( STATUS "${target_}: ${_prop}=${_prop_value}" )
     endif()  
   endforeach()
   message( STATUS "bb_dump_target_properties: leaving" )
 endfunction()
 
+macro( bb_dump_imported_target_properties target_ )
+  bb_dump_target_properties( ${target_} IMPORTED_LOCATION IMPORTED_IMPLIB INTERFACE_INCLUDE_DIRECTORIES INTERFACE_COMPILE_DEFINITIONS INTERFACE_LINK_LIBRARIES )
+endmacro()
diff --git a/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake b/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake
index b4543a8d442aaf20971ec34d2c6bac56f587d6ee..2f5b5ea5b5c9240d99b8b3f764375336438a9db3 100644
--- a/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake
+++ b/cmake/CMakeBuild/cmake/modules/BBuildEnvVersion.cmake
@@ -5,9 +5,9 @@
 #
 
 set( BBuildEnv_VERSION_MAJOR 3 )
-set( BBuildEnv_VERSION_MINOR 13 ) 
-set( BBuildEnv_VERSION_PATCH 0 ) 
-set( BBuildEnv_VERSION_TWEAK 2 )
+set( BBuildEnv_VERSION_MINOR 14 ) 
+set( BBuildEnv_VERSION_PATCH 4 ) 
+set( BBuildEnv_VERSION_TWEAK 4 )
 
 # BBuildEnv version in decimal dotted format as supported by CMake's version compare operations.
 set( BBuildEnv_VERSION "${BBuildEnv_VERSION_MAJOR}.${BBuildEnv_VERSION_MINOR}.${BBuildEnv_VERSION_PATCH}.${BBuildEnv_VERSION_TWEAK}" ) 
diff --git a/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake b/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake
index 9c0ec5c3f62bc2d64d4a18b4adfc2abe63a818d3..3e78dc0815936ddf8c8ada05564b1f51c7110d37 100644
--- a/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake
+++ b/cmake/CMakeBuild/cmake/toolchains/aarch64-linux-gnu-gcc-ubuntu1804.cmake
@@ -19,7 +19,12 @@ list( APPEND CMAKE_FIND_ROOT_PATH ${ARM_LINUX_SYSROOT} )
 # search headers and libraries in the target environment, search 
 # programs in the host environment
 set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
-set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+
+# Ubuntu/amd64 + foreign architecture arm64
+set( CMAKE_LIBRARY_PATH /usr/lib/${GNU_MACHINE}-linux-gnu )
+set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH )
+#set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+
 set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
 
 set( USE_OPENCV_TOOLCHAIN_FLAGS ON )
diff --git a/doc/jvetdoc.cls b/doc/jvetdoc.cls
index bdc22890e30fb89bfc9ac4a60b0b17d920c2af7b..f766488489577b40d4f1401611e89fe51e994dcb 100644
--- a/doc/jvetdoc.cls
+++ b/doc/jvetdoc.cls
@@ -117,7 +117,7 @@
 	\@strutb \it Title: & \@title \\
 	\@strutb \it Status: & \@jvetdocstatus \\
 	\@strutb \it Purpose: & \@jvetdocpurpose \\
-	\@strutb \it Author(s): & %
+	\@strutb \it Editors: & %
 		\setcounter{jvet@author@column}{0}
 		\let\@and\\
 		\renewcommand{\and}{\@and\setcounter{jvet@author@column}{0}}
diff --git a/doc/mainpage.h b/doc/mainpage.h
index efe90264a414e5bca1df89df715511d12fd8c854..69186d6f62bbf41661e8bee5da088368b6f5ef98 100644
--- a/doc/mainpage.h
+++ b/doc/mainpage.h
@@ -24,7 +24,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/doc/software-manual.pdf b/doc/software-manual.pdf
index 34b144cc8b9a980a393cc3c1a4a8bba40a247126..d882b72ad3d0098f52a4e4eaf55fefd37a4f725c 100644
Binary files a/doc/software-manual.pdf and b/doc/software-manual.pdf differ
diff --git a/doc/software-manual.tex b/doc/software-manual.tex
index e816dbe34f6722ddf22932558b173a04cfbcabca..582dcbe4f6685daedad9b725df548eabd84be4b5 100644
--- a/doc/software-manual.tex
+++ b/doc/software-manual.tex
@@ -184,7 +184,6 @@ fontsize=\footnotesize}
 	\email{frank@bossentech.com}
 	\and
 	David Flynn
-	\email{dflynn@blackberry.com}
 	\and
 	Xiang Li
 	\email{xlxiangli@tencent.com}
@@ -206,7 +205,7 @@ fontsize=\footnotesize}
 \maketitle
 \begin{abstract}
 This document is a user manual describing usage of the VTM reference software
-for the VVC project. It applies to version 4.0 of the software.
+for the VVC project. It applies to version 7.1 of the software.
 \end{abstract}
 
 \tableofcontents
@@ -258,8 +257,8 @@ be available in older compilers.
  \thead{Compiler environment} &
  \thead{Versions} \\
 \hline
-MS Visual Studio  & 2015 and 2017 \\
-GCC               & 5.4 and 7.3 \\
+MS Visual Studio  & 2017 and 2019 \\
+GCC               & 5.4, 7.3 and 8.3\\
 Xcode/clang       & latest \\
 \hline
 \end{tabular}
@@ -381,7 +380,7 @@ pacman -S --needed base-devel mingw-w64-i686-toolchain mingw-w64-x86_64-toolchai
 \section{Using the encoder}
 
 \begin{minted}{bash}
-TAppEncoder 	[--help] [-c config.cfg] [--parameter=value]
+EncoderApp [--help] [-li -c config.cfg] [-li --parameter=value]
 \end{minted}
 
 \begin{table}[ht]
@@ -393,6 +392,7 @@ TAppEncoder 	[--help] [-c config.cfg] [--parameter=value]
  \thead{Description} \\
 \hline
 \texttt{--help} & Prints parameter usage. \\
+\texttt{-li} & Applies to its next config file or command line parameter only to define  i-th layer encoding option. If empty, the configuration file applies to all layers\\
 \texttt{-c} & Defines configuration file to use.  Multiple configuration files
      may be used with repeated --c options. \\
 \texttt{--}\emph{parameter}\texttt{=}\emph{value}
@@ -467,68 +467,48 @@ predict from a frame with a higher temporal id. If a frame with higher
 temporal IDs is listed among a frame's reference pictures, it is
 not used, but is kept for possible use in future frames.
 
-\item[]\textbf{num_ref_pics_active}: Size of reference picture lists L0
-and L1, indicating how many reference pictures in each direction that
-are used during coding.
+\item[]\textbf{num_ref_pics_active_L0}: Number of reference pictures in lists L0
+that are used during coding.
 
-\item[]\textbf{num_ref_pics}: The number of reference pictures kept for
-this frame.  This includes pictures that are used for reference for the
+\item[]\textbf{num_ref_pics_L0}: Size of reference picture list L0.
+This includes pictures that are used for reference for the
 current picture as well as pictures that will be used for reference in
 the future.
 
-\item[]\textbf{reference_pictures}: A space-separated list of
+\item[]\textbf{reference_pictures_L0}: A space-separated list of
 num_ref_pics integers, specifying the POC of the reference pictures
 kept, relative the POC of the current frame. The picture list shall be
-ordered, first with negative numbers from largest to smallest, followed
-by positive numbers from smallest to largest (e.g. \verb|-1 -3 -5 1 3|).
-Note that any pictures not supplied in this list will be discarded and
+ordered as their intendend order in the L0.
+Note that any pictures not supplied in this list and in the list of L1 will be discarded and
 therefore not available as reference pictures later.
 
-\item[]\textbf{predict}: Defines the value of the syntax element
-inter_ref_pic_set_prediction_flag. A value of 0 indicates that the
-reference picture set is encoded without inter RPS prediction and the
-subsequent parameters deltaRIdx$-1$, deltaRPS, num_ref_idcs and
-Reference_idcs are ignored and do not need to be present. A value of 1
-indicates that the reference picture set is encoded with inter
-prediction RPS using the subsequent parameters deltaRIdx$-1$, deltaRPS,
-num_ref_idcs and Reference_idcs in the line. A value of 2 indicates that
-the reference picture set is encoded with inter RPS but only the
-deltaRIdx$-1$ parameters is needed. The deltaRPS, num_ref_idcs and
-Reference_idcs values are automatically derived by the encoder based on
-the POC and refPic values of the current line and the RPS pointed to by
-the deltaRIdx$-1$ parameters.
-
-\item[]\textbf{deltaRIdx$-1$}: The difference between the index of the
-curent RPS and the predictor RPS minus 1.
-
-\item[]\textbf{deltaRPS}: The difference between the POC of the
-predictor RPS and POC the current RPS.
-
-\item[]\textbf{num_ref_idcs}: The number of ref_idcs to encode for the
-current RPS.  The value is equal to the value of  num_ref_pics of the
-predictor RPS plus 1.
-
-\item[]\textbf{reference_idcs}: A space-separated list of num_ref_idcs
-integers, specifying the ref idcs of the inter RPS prediction. The value
-of ref_idcs may be 0, 1 or 2 indicating that the reference picture is a
-reference picture used by the current picture, a reference picture used
-for future picture or not a reference picture anymore, respectively. The
-first num_ref_pics of ref_idcs correspond to the Reference pictures in
-the predictor RPS. The last ref_idcs corresponds to the predictor
-picture.
-\end{itemize}
+\item[]\textbf{num_ref_pics_active_L1}: Number of reference pictures in lists L1
+that are used during coding.
+
+\item[]\textbf{num_ref_pics_L1}: Size of reference picture list L1.
+This includes pictures that are used for reference for the
+current picture as well as pictures that will be used for reference in
+the future.
+
+\item[]\textbf{reference_pictures_L1}: A space-separated list of
+num_ref_pics integers, specifying the POC of the reference pictures
+kept, relative the POC of the current frame. The picture list shall be
+ordered as their intendend order in the L1.
+Note that any pictures not supplied in this list and in the list of L0 will be discarded and
+therefore not available as reference pictures later.
 
 For example, consider the coding structure of Figure~\ref{fig:gop-example}.
 This coding structure is of size 4. The pictures are listed in decoding
 order. Frame1 shall therefore describe picture with $\textrm{POC}=4$. It
-references picture 0, and therefore has $-4$ as a reference picture.
+references picture 0, and therefore has 4 as a reference picture.
 Similarly, Frame2 has a POC of 2, and since it references pictures 0 and
-4, its reference pictures are listed as \verb|-2 2|. Frame3 is a special
+4, its reference pictures are listed as \verb|2 -2|. Frame3 is a special
 case: even though it only references pictures with POC 0 and 2, it also
 needs to include the picture with POC 4, which must be kept in order to
-be used as a reference picture in the future. The reference picture list
-for Frame3 therefore becomes \verb|-1 1 3|. Frame4 has a POC of 3 and
-its list of reference pictures is \verb|-1 1|.
+be used as a reference picture in the future. Note that picture with POC 4 can be
+included in the L0 or L1. The reference picture list for Frame3 therefore becomes \verb|1 -1 -3|. 
+Frame4 has a POC of 3 and its list of reference pictures is \verb|1 -1|.
+\end{itemize}
 
 \begin{figure}[h]
 \caption{A GOP structure}
@@ -537,27 +517,6 @@ its list of reference pictures is \verb|-1 1|.
 \includegraphics[width=0.7\textwidth]{figures/gop-structure-example}
 \end{figure}
 
-Inter RPS prediction may be used for Frame2, Frame3 and Frame4, hence
-the predict parameter is set to 1 for these frames. Frame2 uses Frame1
-as the predictor hence the deltaRIdx$-1$ is 0.  Similarly for Frame3 and
-Frame4 which use Frame2 and Frame3 as predictors, respectively. The
-deltaRPS is equal to the POC of the predictor minus the POC of the
-current picture, therefore the deltaRPS for Frame2 is $4 -2 = 2$, for
-Frame3 is $2 - 1 = 1$ and for Frame4 is $1 - 3 = -2$.
-
-In Frame2, reference pictures with POC 0 and 2 are used, so the
-reference idcs for Frame2 are \verb|1 1| indicating that the reference
-picture, $-4$, in Frame1 is still a reference picture in Frame2 and
-Frame1 is also a reference picture in Frame2. The reference idcs for
-Frame3 are \verb|1 1 1|. The first and second â€œ1â€s indicating that
-the reference pictures "$-2$ $2$" in Frame2 are still reference pictures in
-Frame3 and the last â€œ1â€ indicating that Frame2 is also a reference
-picture in Frame3. In Frame 4, the reference idcs are \verb|0 1 1 0|.
-The first â€œ0â€ indicates that the reference pictures â€œ-1â€ in Frame 3 is
-no longer a reference picture in Frame4. The next two â€œ1â€s indicate that
-the reference pictures â€œ$1$ $3$â€ are now reference pictures of Frame4.
-The final â€œ0â€ indicates that Frame3 is not a reference picture.
-
 In order to specify this to the encoder, the parameters in
 Table~\ref{tab:gop-example} could be used.
 
@@ -574,25 +533,23 @@ Table~\ref{tab:gop-example} could be used.
  \thead{Frame3} &
  \thead{Frame4} \\
 \hline
-Type                &   P  &    B   &      B   &       B \\
-POC                 &   4  &    2   &      1   &       3 \\
-QPOffset            &   1  &    2   &      3   &       3 \\
-QPOffsetModelOff    & 0.0  &  0.0   &    0.0   &     0.0 \\
-QPOffsetModelScale  & 0.0  &  0.0   &    0.0   &     0.0 \\
-SliceCbQPOffset     &   0  &    0   &      0   &       0 \\
-SliceCrQPOffset     &   0  &    0   &      0   &       0 \\
-QPfactor            & 0.5  &  0.5   &    0.5   &     0.5 \\
-tcOffsetDiv2        &   0  &    1   &      2   &       2 \\  
-betaOffsetDiv2      &   0  &    0   &      0   &       0 \\
-temporal_id         &   0  &    1   &      2   &       2 \\
-num_ref_pics_active &   1  &    1   &      1   &       1 \\
-num_ref_pics        &   1  &    2   &      3   &       2 \\
-reference_pictures  & $-$4 & $-$2 2 & $-$1 1 3 &  $-$1 1 \\
-predict             &   0  &    1   &      1   &       1 \\
-deltaRIdx$-$1       &      &    0   &      0   &       0 \\
-deltaRPS            &      &    2   &      1   &    $-$2 \\
-num_ref_idcs        &      &    2   &      3   &       4 \\
-reference_idcs      &      &  1 1   &  1 1 1   & 0 1 1 0 \\
+Type                   &   P  &    B   &         B   &       B \\
+POC                    &   4  &    2   &         1   &       3 \\
+QPOffset               &   1  &    2   &         3   &       3 \\
+QPOffsetModelOff       & 0.0  &  0.0   &       0.0   &     0.0 \\
+QPOffsetModelScale     & 0.0  &  0.0   &       0.0   &     0.0 \\
+SliceCbQPOffset        &   0  &    0   &         0   &       0 \\
+SliceCrQPOffset        &   0  &    0   &         0   &       0 \\
+QPfactor               & 0.5  &  0.5   &       0.5   &     0.5 \\
+tcOffsetDiv2           &   0  &    1   &         2   &       2 \\  
+betaOffsetDiv2         &   0  &    0   &         0   &       0 \\
+temporal_id            &   0  &    1   &         2   &       2 \\
+num_ref_pics_active_L0 &   1  &    1   &         1   &       1 \\
+num_ref_pics_L0        &   1  &    1   &         1   &       1 \\
+reference_pictures_L0  &   4  &    2   &         1   &       1 \\
+num_ref_pics_active_L1 &   0  &    1   &         1   &       1 \\
+num_ref_pics_L1        &   0  &    1   &         2   &       1 \\
+reference_pictures_L1  &      & $-$2   & $-$1 $-$3   &    $-$1 \\
 \hline
 \end{tabular}
 \end{table}
@@ -605,51 +562,12 @@ line should contain information for one frame, so this
 configuration would be specified as:
 
 \begin{verbatim}
-Frame1: P 4 1 0 0 0.5 0 0 0 1 1 -4 0
-Frame2: B 2 2 0 0 0.5 1 0 1 1 2 -2 2 1 0 2 2 1 1
-Frame3: B 1 3 0 0 0.5 2 0 2 1 3 -1 1 3 1 0 1 3 1 1 1
-Frame4: B 3 3 0 0 0.5 2 0 2 1 2 -1 1 1 0 -2 4 0 1 1 0
+Frame1: P 4 1 0 0 0.5 0 0 0 1 1  4 1 1  4
+Frame2: B 2 2 0 0 0.5 1 0 1 1 1  2 1 1 -2
+Frame3: B 1 3 0 0 0.5 2 0 2 1 1  1 1 2 -1 -3
+Frame4: B 3 3 0 0 0.5 2 0 2 1 1  1 1 1 -1
 \end{verbatim}
 
-The values of deltaRIdx$-1$, deltaRPS, num_ref_idcs and reference
-idcs of Frame$K$ can be derived from the POC value of Frame$_K$ and
-the POC, num_ref_pics and reference_pictures values of Frame$_M$, where
-$K$ is the index of the RPS to be inter coded and the $M$ is the
-index of the reference RPS, as follows.
-
-\setlength{\algomargin}{2em}
-\begin{algorithm}[ht]
-\SetKwData{deltaRIdx}{deltaRIdx}
-\SetKwData{deltaRPS}{deltaRPS}
-\SetKwData{numrefidcs}{num_ref_idcs}
-\SetKwData{numrefpics}{num_ref_pics}
-\SetKwData{referencepictures}{reference_pictures}
-\SetKwData{referenceidcs}{reference_idcs}
-\SetKwData{POC}{POC}
-
-$\deltaRIdx_K - 1  \leftarrow  K - M - 1$ \;
-$\deltaRPS_K       \leftarrow  \POC_M - \POC_K$ \;
-$\numrefidcs_K     \leftarrow  \numrefpics_M + 1$ \;
-
-\For{$j \leftarrow 0$ \KwTo $\numrefpics_M$}{
-	\For{$i \leftarrow 0$ \KwTo $\numrefidcs_K$}{
-		\eIf{$\referencepictures_{M,j} + \deltaRPS_K == \referencepictures_{K,i}$}{
-			\lIf{$\referencepictures_{K,i}$ is used by the current frame}{
-				$\referenceidcs_{K,j} = 1$} \;
-			\lElse{$\referenceidcs_{K,j} = 2$} \;
-		}{
-			$\referenceidcs_K[j] = 0$ \;
-		}
-	}
-}
-
-\tcc{$\referencepictures_{M,\numrefpics_M}$ does not exist and is assumed to be 0}
-\end{algorithm}
-
-Note: The above (automatic) generation of the inter RPS parameter
-values has been integrated into the encoder, and is activated by
-the value of predict $= 2$ followed by the value of deltaRIdx$-1$,
-only, as described above.
 
 
 
@@ -685,7 +603,7 @@ Specifies the output coded bit stream file.
 \Option{ReconFile (-o)} &
 %\ShortOption{-o} &
 \Default{\NotSet} &
-Specifies the output locally reconstructed video file.
+Specifies the output locally reconstructed video file. If more than one layer is encoded (i.e. MaxLayers > 1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'reconst.yuv' becomes 'reconst0.yuv' for layer id 0, 'reconst' becomes 'reconst0'.
 \\
 
 \Option{SourceWidth (-wdt)}%
@@ -743,12 +661,10 @@ Note: This option has no effect on the decoding process.
 
 \Option{InputBitDepthC}%
 \Option{MSBExtendedBitDepthC}%
-\Option{InternalBitDepthC}%
 \Option{OutputBitDepthC} &
 %\ShortOption{\None} &
 \Default{0}%
 \Default{0}%
-\Default{0}%
 \Default{0} &
 Specifies the various bit-depths for chroma components.  These only need
 to be specified if non-equal luma and chroma bit-depth processing is
@@ -936,10 +852,67 @@ Enables harmonization of Gop first field couple.
 
 \Option{AccessUnitDelimiter} &
 %\ShortOption{\None} &
-\Default{0} &
+\Default{1} &
 Add Access Unit Delimiter NAL units between all Access Units.
 \\
 
+\Option{ScalingRatioHor} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Scaling ratio in horizontal direction for reference picture resampling.
+\\
+
+\Option{ScalingRatioVer} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Scaling ratio in vertical direction for reference picture resampling.
+\\
+
+\Option{FractionNumFrames} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Encode a fraction of the specified in FramesToBeEncoded frames.
+\\
+
+\Option{SwitchPocPeriod} &
+%\ShortOption{\None} &
+\Default{0} &
+POC period at which resolution is changed.
+\\
+
+\Option{UpscaledOutput} &
+%\ShortOption{\None} &
+\Default{0} &
+Picture output options: output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for reference picture resampling.
+\\
+
+
+\end{OptionTableNoShorthand}
+
+%%
+%% GOP based temporal filter parameters
+%%
+
+\begin{OptionTableNoShorthand}{GOP based temporal filter paramters}{tab:gop-based-temporal-filter}
+
+\Option{TemporalFilter} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables GOP based temporal filter.
+\\
+\Option{TemporalFilterFutureReference} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables or disable referencing future frames in the GOP based temporal filter. Can be used to disable future referencing for
+low delay configurations.
+\\
+\Option{TemporalFilterStrengthFrame*} &
+%\ShortOption{\None} &
+\Default{} &
+Strength for every * frame in GOP based temporal filter, where * is an integer. E.g. --TemporalFilterStrengthFrame8 0.95 will
+enable GOP based temporal filter at every 8th frame with strength 0.95. Longer intervals overrides shorter when there are
+multiple matches.
+\\
 \end{OptionTableNoShorthand}
 
 %%
@@ -983,10 +956,22 @@ Valid values are: main, high.
 NB: There is currently only limited validation that the encoder configuration complies with the profile, level and tier constraints.
 \\
 
+\Option{SubProfile} &
+%\ShortOption{\None} &
+\Default{0} &
+Indicates interoperability metadata registered as specified by X Recommendation ITU-T T.35.
+\\
+
+\Option{EnableDecodingParameterSet} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables writing of a decoding parameter set. If disabled, no parameter set will be written and the specical reserved ID zero will be used in the SPS indicating no constraint.
+\\
+
 \Option{MaxBitDepthConstraint} &
 %\ShortOption{\None} &
 \Default{0} &
-For --profile=main-RExt, specifies the value to use to derive the general_max_bit_depth constraint flags for RExt profiles; when 0, use $\max(InternalBitDepth, InternalBitDepthC)$
+For --profile=main-RExt, specifies the value to use to derive the general_max_bit_depth constraint flags for RExt profiles; when 0, use InternalBitDepth.
 \\
 
 \Option{MaxChromaFormatConstraint} &
@@ -1040,6 +1025,20 @@ Specifies the value of general_frame_only_constraint_flag
 \end{OptionTableNoShorthand}
 
 
+%%
+%% Layer parameters
+%%
+
+\begin{OptionTableNoShorthand}{Layer parameters}{tab:layer}
+\Option{MaxLayers} &
+%\ShortOption{\None} &
+\Default{1} &
+Specifies the value to use to derive the vps_max_layers_minus1 for layered coding
+\\
+
+\end{OptionTableNoShorthand}
+
+
 %%
 %% Unit definition parameters
 %%
@@ -1093,6 +1092,54 @@ Defines the depth of the TU tree for intra CUs.
 Defines the depth of the TU tree for inter CUs.
 \\
 
+\Option{MaxMTTHierarchyDepth} &
+%\ShortOption{\None} &
+\Default{3} &
+Defines the maximum depth of the multi-type tree for inter slices.
+\\
+
+\Option{MaxMTTHierarchyDepthI} &
+%\ShortOption{\None} &
+\Default{3} &
+Defines the maximum depth of the multi-type tree for intra slices.
+\\
+
+\Option{MaxMTTHierarchyDepthISliceC} &
+%\ShortOption{\None} &
+\Default{3} &
+Defines the maximum depth of the multi-type tree in dual tree for chroma components.
+\\
+
+\Option{MaxMTTHierarchyDepthISliceL} &
+%\ShortOption{\None} &
+\Default{3} &
+Defines the maximum depth of the multi-type tree in dual tree for luma component.
+\\
+
+\Option{MinQTChromaISlice} &
+%\ShortOption{\None} &
+\Default{4} &
+Defines the minimum size of the quad tree in dual tree for chroma components.
+\\
+
+\Option{MinQTISlice} &
+%\ShortOption{\None} &
+\Default{8} &
+Defines the minimum size of the quad tree for intra slices.
+\\
+
+\Option{MinQTLumaISlice} &
+%\ShortOption{\None} &
+\Default{8} &
+Defines the minimum size of the quad tree in dual tree for luma component.
+\\
+
+\Option{MinQTNonISlice} &
+%\ShortOption{\None} &
+\Default{8} &
+Defines the minimum size of the quad tree for inter slices.
+\\
+
 \end{OptionTableNoShorthand}
 
 
@@ -1122,6 +1169,13 @@ picture.
 \end{tabular}
 \\
 
+\Option{DRAPPeriod} &
+%\ShortOption{\None} &
+\Default{0} &
+Specifies the DRAP period in frames.  
+Dependent RAP indication SEI messages are disabled if DRAPPeriod is 0.
+\\
+
 \Option{GOPSize (-g)} &
 %\ShortOption{-g} &
 \Default{1} &
@@ -1137,8 +1191,15 @@ elements.
 \par
 See section~\ref{sec:gop-structure} for further details.
 \\
+
+\Option{ReWriteParamSets} &
+%\ShortOption{-ip} &
+\Default{$0$} &
+Enable writing of parameter sets (SPS, PPS, etc.) before every (intra) random access point to enable true random access.
+\\
 \end{OptionTableNoShorthand}
 
+
 %%
 %% Motion estimation parameters
 %%
@@ -1222,12 +1283,41 @@ $}
 Specifies the maximum number of merge candidates to use.
 \\
 
+\Option{MaxNumTriangleCand} &
+%\ShortOption{\None} &
+\Default{5} &
+Specifies the maximum number of triangle merge candidates to use.
+\\
+
+\Option{MaxNumIBCMergeCand} &
+%\ShortOption{\None} &
+\Default{6} &
+Specifies the maximum number of IBC merge candidates to use.
+\\
+
 \Option{DisableIntraInInter} &
 %\ShortOption{\None} &
 \Default{0} &
 Flag to disable intra PUs in inter slices.
 \\
 
+\Option{MMVD} &
+%\ShortOption{\None} &
+\Default{1} &
+Enables or disables the merge mode with motion vector difference (MMVD).
+\\
+
+\Option{MmvdDisNum} &
+%\ShortOption{\None} &
+\Default{6} &
+Specifies the number of MMVD distance entries used from the distance table at encoder.
+\\
+
+\Option{CIIP} &
+%\ShortOption{\None} &
+\Default{1} &
+Enables or disables the merge mode with combined inter merge and intra prediction (CIIP).
+\\
 \end{OptionTableNoShorthand}
 
 
@@ -1319,6 +1409,12 @@ candidate is not evaluated if the merge skip mode was the best merge
 mode for one of the previous candidates.
 \\
 
+\Option{SBTFast64WidthTh} &
+%\ShortOption{\None} &
+\Default{1920} &
+Picture width threshold for testing size-64 SBT in RDO (now for HD and above sequences).
+\\
+
 \Option{RDpenalty} &
 %\ShortOption{\None} &
 \Default{0} &
@@ -1332,6 +1428,19 @@ Enabling this parameter can reduce the visibility of CU boundaries in the coded
 \end{tabular}
 \\
 
+\Option{FastLocalDualTreeMode} &
+%\ShortOption{\None} &
+\Default{0} &
+Controls intra coding speedup introducted with local dual tree mode. 
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+ 0 & Disabled\\
+ 1 & Stop testing intra modes in inter slices, if best cost is more that 1.5 times inter cost.\\
+ 2 & Test only one intra mode in inter slices\\
+\end{tabular}
+\\
+
+
 \end{OptionTableNoShorthand}
 
 %%
@@ -1350,6 +1459,12 @@ Specifies the base value of the quantization parameter. If it is non-integer, th
 Specifies a QP offset from the base QP value to be used for intra frames.
 \\
 
+\Option{DepQuant} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables or disables the usage of dependent quantization.
+\\
+
 \Option{LambdaFromQpEnable} &
 %\ShortOption{\None} &
 \Default{false} &
@@ -1380,6 +1495,62 @@ $\lambda = \lambda_{base} \times max(2, min(4, (sliceQP-12)/6))$
 In addition, independent on the IntraQPFactor, if HadamardME=false, then for an inter slice the final $\lambda$ is scaled by a factor of $0.95$.
 \\
 
+\Option{UseIdentityTableForNon420Chroma}&
+\Default{1}&
+Specifies whether identity chroma QP mapping tables are used for 4:2:2 and 4:4:4 content. When set to 1, the identity chroma QP mapping table is used for all the three chroma components for 4:2:2 or 4:4:4 content. When set to 0, chroma QP 
+mapping table may be specified by other parameters in the configuration.
+\\
+
+\Option{SameCQPTablesForAllChroma}&
+\Default{1}&
+Specifies that the Cb, Cr and joint Cb-Cr components all use the same
+chroma mapping table. When set to 1, the values of QpInValCr, 
+QpOutValCr, QpInValCbCr and QpOutValCbCr are ignored. When set to 0, all
+Cb, Cr and joint Cb-Cr components may have different chroma QP mapping tables specified in the configuration file. Note that 
+SameCQPTablesForAllChroma is ignored when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content.
+\\
+
+\Option{QpInValCb}%
+\Option{QpOutValCb}&
+\Default{\NotSet} &
+Specifies the input and coordinates of the pivot points used to specify the chroma QP mapping tables for the Cb component. Default values are as follows: 
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+ QpInValCb & 25, 33, 43 \\
+ QpOutValCb & 25, 32, 37 \\
+\end{tabular}
+The values specify the pivot points for the chroma QP mapping table, the unspecified QP values are interpolated from the remaining values. E.g., the default values above specify that  the pivot points for the chroma QP mapping table for the Cb component are (25, 25), (33, 32), (43, 37).
+Note that that QpInValCr and QpOutValCr are ignored when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content.
+\\
+
+\Option{QpInValCr}%
+\Option{QpOutValCr}&
+\Default{\NotSet} &
+Specifies the input and coordinates of the pivot points used to specify the chroma QP mapping tables for the Cr component. Default values are as follows: 
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+ QpInValCr  & 0 \\
+ QpOutValCr & 0 \\
+\end{tabular}
+
+The default values specify a pivot point of (0,0) which corresponds to an identity chroma QP mapping table. Note that that QpInValCr and QpOutValCr are ignored 
+when SameCQPTablesForAllChroma is set to 1 or when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content.
+\\
+
+\Option{QpInValCbCr}%
+\Option{QpOutValCbCr}&
+\Default{\NotSet} &
+Specifies the input and coordinates of the pivot points used to specify the chroma QP mapping tables for the joint Cb-Cr component. Default values are as follows: 
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+ QpInValrCr   & 0 \\
+ QpOutValCbCr & 0 \\
+\end{tabular}
+
+The default values specify a pivot point of (0,0) which corresponds to a identity chroma QP mapping table. Note that that QpInValCbCr and QpOutVaCblCr are ignored 
+when SameCQPTablesForAllChroma is set to 1  or when UseIdentityTableForNon420Chroma is set to 1 for 4:2:2 and 4:4:4 content.
+\\
+
 \Option{CbQpOffset (-cbqpofs)}%
 \Option{CrQpOffset (-crqpofs)} &
 %\ShortOption{-cbqpofs}%
@@ -1392,6 +1563,20 @@ and cr_qp_offset, that are transmitted in the PPS.  Valid values are in
 the range $[-12, 12]$.
 \\
 
+\Option{CbCrQpOffset (-cbcrqpofs)} &
+\Default{-1} &
+Global offset to apply to the luma QP to derive the QP for joint Cb-Cr
+residual coding mode.  This option corresponds to the value of cb_cr_qp_offset
+transmitted in the PPS.  Valid values are in the range $[-12, 12]$.
+\\
+
+\Option{CbCrQpOffsetDualTree} &
+\Default{0} &
+Tile group QP offset for joint Cb-Cr residual coding mode when separate luma and
+chroma trees are used.  This option corresponds to the value of tile_group_cb_cr_qp_offset
+transmitted in the tile group header. Valid values are in the range $[-12, 12]$.
+\\
+
 \Option{LumaLevelToDeltaQPMode} &
 \Default{0} &
 Luma-level based Delta QP modulation.
@@ -1515,10 +1700,16 @@ Specifies a file containing a list of QP deltas. The $n$-th line
 value delta for the picture with POC value $n$.
 \\
 
-\Option{AdaptiveQp (-aq)} &
+\Option{PerceptQPA (-qpa)} &
+%\ShortOption{-qpa} &
+\Default{false} &
+Enables or disables the perceptually optimized QP adaptation (QPA) method described in JVET-H0047, JVET-K0206, and JVET-M0091. Use this together with 'SliceChromaQPOffsetPeriodicity=1' and, in case of HDR input, 'LumaLevelToDeltaQPMode=1' for best subjective quality. Cannot be used together with 'SelectiveRDOQ' (see above) or 'AdaptiveQP' (see below).
+\\
+
+\Option{AdaptiveQP (-aq)} &
 %\ShortOption{-aq} &
 \Default{false} &
-Enable or disable QP adaptation based upon a psycho-visual model.
+Enables or disables the legacy QP adaptation method based upon a psycho-visual model.
 \\
 
 \Option{MaxQPAdaptationRange (-aqr)} &
@@ -1560,6 +1751,12 @@ If ScalingList is set to 2 and this parameter is an empty string, information on
 is output and the encoder stops.
 \\
 
+\Option{DisableScalingMatrixForLFNST} &
+%\ShortOption{\None} &
+\Default{true} &
+Specifies whether scaling matrices are to be applied to blocks coded with LFNST.
+\\
+
 \Option{MaxCUChromaQpAdjustmentDepth} &
 %\ShortOption{\None} &
 \Default{-1} &
@@ -1570,125 +1767,110 @@ Specifies the maximum depth for CU chroma QP adjustment; if negative, CU chroma
 
 
 %%
-%% Slice coding parameters
+%% Slice/Tile coding parameters
 %%
-\begin{OptionTableNoShorthand}{Slice coding parameters}{tab:slice-coding}
-%\Option{SliceGranularity} &
-%\ShortOption{\None} &
-%\Default{0} &
-%Determines the depth in an LCU at which slices may begin and end.
-%\par
-%\begin{tabular}{cp{0.45\textwidth}}
-% 0   & Slice addresses are LCU aligned \\
-% $1 \leq n \leq 3$
-%     & Slice start addresses are aligned to CUs at depth $n$ \\
-%\end{tabular}
-%
-%Note: The smallest permissible alignment is 16x16 CUs.
-%Values of $n$ must satisfy this constraint, for example, with a 64x64
-%LCU, $n$ must be less than or equal to 2.
-%\\
+\begin{OptionTableNoShorthand}{Slice and tile coding parameters}{tab:slice-coding}
 
-\Option{SliceMode} &
+\Option{EnablePicPartitioning} &
 %\ShortOption{\None} &
 \Default{0} &
-Controls the slice partitioning method in conjunction with
-SliceArgument.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
- 0 & Single slice \\
- 1 & Maximum number of CTUs per slice \\
- 2 & Maximum number of bytes per slice \\
- 3 & Maximum number of tiles per slice \\
-\end{tabular}
+Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used).
+\\
+
+\Option{TileColumnWidthArray} &
+%\ShortOption{\None} &
+\Default{\NotSet} &
+Tile column widths in units of CTUs. Last column width in list will be repeated uniformly to cover any remaining picture width.
 \\
 
-\Option{SliceArgument} &
+\Option{TileRowHeightArray} &
 %\ShortOption{\None} &
 \Default{\NotSet} &
-Specifies the maximum number of CTUs, bytes or tiles in a slice depending on the
-SliceMode setting.
+Tile row heights in units of CTUs. Last row height in list will be repeated uniformly to cover any remaining picture height.
 \\
 
-\Option{SliceSegmentMode} &
+\Option{RasterScanSlices} &
 %\ShortOption{\None} &
 \Default{0} &
-Enables (dependent) slice segment coding in conjunction with 
-SliceSegmentArgument.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
- 0 & Single slice \\
- 1 & Maximum number of CTUs per slice segment\\
- 2 & Maximum number of bytes per slice segment\\
- 3 & Maximum number of tiles per slice segment\\
-\end{tabular}
+Use raster-scan or rectangular slices (0: rectangular, 1: raster-scan).
 \\
 
-\Option{SliceSegmentArgument} &
+\Option{RectSlicePositions} &
 %\ShortOption{\None} &
 \Default{\NotSet} &
-Defines the maximum number of CTUs, bytes or tiles a slice segment
-depending on the SliceSegmentMode setting.
+Rectangular slice positions. List containing pairs of top-left CTU RS address followed by bottom-right CTU RS address.
 \\
 
-\Option{WaveFrontSynchro} &
+\Option{RectSliceFixedWidth} &
 %\ShortOption{\None} &
-\Default{false} &
-Enables the use of specific CABAC probabilities synchronization at the
-beginning of each line of CTBs in order to produce a bitstream that can
-be encoded or decoded using one or more cores.
+\Default{0} &
+Fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead).
 \\
 
-\Option{TileUniformSpacing} &
+\Option{RectSliceFixedHeight} &
 %\ShortOption{\None} &
-\Default{false} &
-Controls the mode used to determine per row and column tile sizes.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
- 0 & Each tile column width and tile row height is explicitly set
-     by TileColumnWidthArray and TileRowHeightArray respectively \\
- 1 & Tile columns and tile rows are uniformly spaced. \\
-\end{tabular}
+\Default{0} &
+Fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead).
+\\
+
+\Option{RasterSliceSizes} &
+%\ShortOption{\None} &
+\Default{\NotSet} &
+Raster-scan slice sizes in units of tiles. Last size in list will be repeated uniformly to cover any remaining tiles in the picture.
 \\
 
-\Option{NumTileColumnsMinus1}%
-\Option{NumTileRowsMinus1} &
+\Option{DisableLoopFilterAcrossTiles} &
 %\ShortOption{\None} &
 \Default{0} &
-Specifies the tile based picture partitioning geometry as
-$\mathrm{NumTileColumnsMinus1} + 1 \times \mathrm{NumTileRowsMinus1} + 1$
-columns and rows.
+Loop filtering applied across tile boundaries or not (0: filter across tile boundaries  1: do not filter across tile boundaries).
 \\
 
-\Option{TileColumnWidthArray}%
-\Option{TileRowHeightArray} &
+\Option{DisableLoopFilterAcrossSlices} &
 %\ShortOption{\None} &
-\Default{\NotSet} &
-Specifies a space or comma separated list of widths and heights,
-respectively, of each tile column or tile row.  The first value in the
-list corresponds to the leftmost tile column or topmost tile row.
+\Default{0} &
+Loop filtering applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries).
+\\
+
+\Option{IDRRefParamList} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables the signalling of reference picture list syntax elements in slice headers of IDR pictures
+\\
+
+\Option{WaveFrontSynchro} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables the use of specific CABAC probabilities synchronization at the
+beginning of each line of CTBs in order to produce a bitstream that can
+be encoded or decoded using one or more cores.
 \\
+
+
 \end{OptionTableNoShorthand}
 
+%%
+%% Slice/Sub-Picture coding parameters
+%%
+\begin{OptionTableNoShorthand}{Slice and Sub-Picture coding parameters}{tab:subpicture-coding}
+
+\Option{EnableSubPicPartitioning} &
+%\ShortOption{\None} &
+\Default{1} &
+Enable Sub Picture partitioning (0: single slice per sub-picture, 1: multiple slices per sub-picture can be used).
+\\
 
+\end{OptionTableNoShorthand}
 
 %%
-%% Deblocking filter parameters
+%% In-loop filtering parameters
 %%
-\begin{OptionTableNoShorthand}{Deblocking filter parameters}{tab:deblocking-filter}
+\begin{OptionTableNoShorthand}{In-loop filtering parameters}{tab:inloop-filter}
 \Option{LoopFilterDisable} &
 %\ShortOption{\None} &
 \Default{false} &
 Enables or disables the in-loop deblocking filter.
 \\
 
-\Option{LFCrossSliceBoundaryFlag} &
-%\ShortOption{\None} &
-\Default{true} &
-Enables or disables the use of in-loop filtering across slice
-boundaries.
-\\
-
 \Option{LoopFilterOffsetInPPS}&
 %\ShortOption{\None}&
 \Default{false}&
@@ -1723,21 +1905,64 @@ Specifies the use of a deblocking filter metric to evaluate the suitability of d
 LoopFilterOffsetInPPS and LoopFilterDisable must be 0. Currently excepted values are 0, 1 and 2.
 \\
 
-\Option{LFCrossSliceBoundaryFlag}&
+\Option{LoopFilterAcrossVirtualBoundariesDisabledFlag}&
 %\ShortOption{\None}&
-\Default{true}&
-Enables or disables the use of a deblocking across tile boundaries.
+\Default{false}&
+Disables in-loop filtering operations across the virtual boundaries.
 \\
 
-\end{OptionTableNoShorthand}
+\Option{NumVerVirtualBoundaries}&
+%\ShortOption{\None}&
+\Default{0}&
+Specifies the number of vertical virtual boundaries.The value of NumVerVirtualBoundaries shall be in the range of 0 to 3, inclusive.
+\\
 
+\Option{NumHorVirtualBoundaries}&
+%\ShortOption{\None}&
+\Default{0}&
+Specifies the number of horizontal virtual boundaries. The value of NumHorVirtualBoundaries shall be in the range of 0 to 3, inclusive.
+\\
 
+\Option{VirtualBoundariesPosX}&
+%\ShortOption{\None}&
+\Default{\NotSet}&
+Specifies the locations of the vertical virtual boundaries in units of luma samples
+\\
 
-%%
-%% Coding tools parameters
-%%
+\Option{VirtualBoundariesPosY}&
+%\ShortOption{\None}&
+\Default{\NotSet}&
+Specifies the locations of the horizontal virtual boundaries in units of luma samples
+\\
 
-\begin{OptionTableNoShorthand}{Coding tools parameters}{tab:coding-tools}
+\Option{EncDbOpt}&
+%\ShortOption{\None}&
+\Default{false}&
+Enables or disables encoder-side deblocking optimization. When it is enabled, deblocking filter is applied during mode decision.
+\\
+
+
+\end{OptionTableNoShorthand}
+
+
+
+%%
+%% Coding tools parameters
+%%
+
+\begin{OptionTableNoShorthand}{Coding tools parameters}{tab:coding-tools}
+
+\Option{MRL} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the use of multiple reference line intra prediction (MRL).
+\\
+
+\Option{MIP} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables or disables the use of matrix-based intra prediction (MIP).
+\\
 
 \Option{AMP} &
 %\ShortOption{\None} &
@@ -1745,6 +1970,24 @@ Enables or disables the use of a deblocking across tile boundaries.
 Enables or disables the use of asymmetric motion partitions.
 \\
 
+\Option{ISP} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the Intra Sub-Partitions coding mode.
+\\
+
+\Option{ISPFast} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables fast encoder methods for ISP.
+\\
+
+\Option{JointCbCr} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the joint coding of chroma residuals.
+\\
+
 \Option{SAO} &
 %\ShortOption{\None} &
 \Default{true} &
@@ -1782,14 +2025,6 @@ for LCU bottom and right boundary areas.
 When true, resets the encoder's SAO state after an IRAP (POC order).
 \\
 
-\Option{ConstrainedIntraPred} &
-%\ShortOption{\None} &
-\Default{false} &
-Enables or disables constrained intra prediction.  Constrained intra
-prediction only permits samples from intra blocks in the same slice as the
-current block to be used for intra prediction.
-\\
-
 \Option{FastUDIUseMPMEnabled} &
 %\ShortOption{\None} &
 \Default{true} &
@@ -1808,32 +2043,6 @@ If enabled use a fast ME for generalised B Low Delay slices
 Enables use of B-Lambda for non-key low-delay pictures
 \\
 
-\Option{TransquantBypassEnable} &
-%\ShortOption{\None} &
-\Default{false} &
-Enables or disables the ability to bypass the transform,
-quantization and filtering stages at CU level.
-This option corresponds to the value of
-transquant_bypass_enabled_flag that is transmitted in the PPS.
-
-See CUTransquantBypassFlagForce for further details.
-\\
-
-\Option{CUTransquantBypassFlagForce} &
-%\ShortOption{\None} &
-\Default{0} &
-Controls the per CU transformation, quantization and filtering
-mode decision.
-This option controls the value of the per CU cu_transquant_bypass_flag.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
- 0 & Bypass is searched on a CU-by-CU basis and will be used if the cost is lower than not bypassing. \\
- 1 & Bypass is forced for all CUs. \\
-\end{tabular}
-
-This option has no effect if TransquantBypassEnable is disabled.
-\\
-
 \Option{PCMEnabledFlag} &
 %\ShortOption{\None} &
 \Default{false} &
@@ -1887,7 +2096,7 @@ Enables the use of weighted prediction in P slices.
 Enables the use of weighted prediction in B slices.
 \\
 
-\Option{WPMethod (-wpM)} &
+\Option{WeightedPredMethod (-wpM)} &
 %\ShortOption{\-wpM} &
 \Default{0} &
 Sets the Weighted Prediction method to be used.
@@ -1902,12 +2111,6 @@ Sets the Weighted Prediction method to be used.
 \\
 
 
-\Option{Log2ParallelMergeLevel} &
-%\ShortOption{\None} &
-\Default{2} &
-Defines the PPS-derived Log2ParMrgLevel variable.
-\\
-
 \Option{SignHideFlag (-SBH)} &
 %\ShortOption{-SBH} &
 \Default{true} &
@@ -1919,16 +2122,6 @@ bitstream, but may be inferred from the parity of the sum of all nonzero
 coefficients in the current coefficient group.
 \\
 
-\Option{StrongIntraSmoothing (-sis)} &
-%\ShortOption{-sis} &
-\Default{true} &
-If enabled specifies that for 32x32 intra prediction block, the intra smoothing
-when applied is either the 1:2:1 smoothing filter or a stronger bi-linear 
-interpolation filter.  Key reference sample values are tested and if the criteria 
-is satisfied, the stronger intra smoothing filter is applied.
-If disabled, the intra smoothing filter when applied is the 1:2:1 smoothing filter.
-\\
-
 \Option{TMVPMode} &
 %\ShortOption{\None} &
 \Default{1} &
@@ -1941,6 +2134,52 @@ Controls the temporal motion vector prediction mode.
 \end{tabular}
 \\
 
+\Option{PPSorSliceMode} &
+%\ShortOption{\None} &
+\Default{0} &
+Enables signaling the below parameters either in PPS or for each slice according to the following preset modes:
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+  0 & Always signaled per slice. \\
+  1 & RA settings. \\
+  2 & LDB settings. \\
+  3 & LDP settings. \\ 
+\end{tabular}
+\par
+\begin{tabular}{p{5cm}llll}
+  & & & & \\
+  Parameter & \multicolumn{3}{l}{Mode} \\
+  & 0 & 1 & 2 & 3 \\
+  dep_quant_enabled_flag & s & p & p & p \\
+  ref_pic_list_sps_flag0 & s & s & p & p \\
+  ref_pic_list_sps_flag1 & s & s & p & p \\
+  temporal_mvp_enabled_flag & s & s & p & p \\
+  mvd_l1_zero_flag & s & s & p & s \\
+  collocated_from_l0_flag & s & s & p & s \\
+  six_minus_max_num_merge_cand & s & p & p & p \\
+  five_minus_max_num_subblock_merge_cand & s & p & p & p \\
+  max_num_merge_cand_minus_max_num_triangle_cand & s & p & p & s \\
+\end{tabular}
+\\
+
+\Option{SliceLevelRpl} &
+%\ShortOption{\None} &
+\Default{true} &
+Code reference picture lists in slice headers rather than picture header.
+\\
+
+\Option{SliceLevelDblk} &
+%\ShortOption{\None} &
+\Default{true} &
+Code deblocking filter parameters in slice headers rather than picture header.
+\\
+
+\Option{SliceLevelSao} &
+%\ShortOption{\None} &
+\Default{true} &
+Code SAO parameters in slice headers rather than picture header.
+\\
+
 \Option{TransformSkip} &
 %\ShortOption{\None} &
 \Default{false} &
@@ -1957,6 +2196,157 @@ luma TUs are also skipped.
 \par
 This option has no effect if TransformSkip is disabled.
 \\
+
+\Option{ChromaTS} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables reduced testing of the transform-skipping mode
+decision for chroma TUs.  When disabled, no RDO search is performed for
+chroma TUs.
+\par
+This option has no effect if TransformSkip is disabled.
+\\
+
+\Option{ALF} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables or disables adaptive loop filter.
+\\
+
+\Option{UseNonLinearAlfLuma} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables optimization of non-linear filters for ALF on Luma channel.
+\\
+
+\Option{UseNonLinearAlfChroma} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables optimization of non-linear filters for ALF on Chroma channels.
+\\
+
+\Option{MaxNumAlfAlternativesChroma} &
+%\ShortOption{\None} &
+\Default{8} &
+Specified the maximum number of alternative chroma filters that can be
+switched at CTB level. Set to 1 to disable alternative chroma filters.
+Value shall be in the range 1..8.
+\\
+
+\Option{SMVD} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables symmetric MVD mode.
+\\
+
+\Option{PLT} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables palette mode coding.
+\\
+
+\Option{BDPCM} &
+%\ShortOption{\None} &
+\Default{0} &
+Enables or disables the use of intra block differential pulse code modulation mode.
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+	0 & Disable BDPCM for luma and chroma.\\
+	1 & Enable BDPCM for luma.\\
+	2 & Enable BDPCM for luma and chroma. BDPCM for chroma is available for 444.\\
+\end{tabular}
+\\
+
+\Option{LFNST} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the use of low frequency non-separable transform (LFNST).
+\\
+
+\Option{FastLFNST} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the fast encoding of low frequency non-separable transform (LFNST).
+\\
+
+\Option{LMCSEnable} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables or disables the use of LMCS (luma mapping with chroma scaling).
+\\
+
+\Option{LMCSSignalType} &
+%\ShortOption{\None} &
+\Default{0} &
+LMCS signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG.
+\\
+
+\Option{LMCSUpdateCtrl} &
+%\ShortOption{\None} &
+\Default{0} &
+LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP.
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+	0 & Random access: derive a new LMCS model at each IRAP.\\
+	1 & All intra: derive a new LMCS model at each intra slice.\\
+	2 & Low delay: derive a new LMCS model every second. \\
+\end{tabular}
+\\
+
+\Option{LMCSAdpOption} &
+%\ShortOption{\None} &
+\Default{0} &
+Adaptive LMCS mapping derivation options: Options 1 to 4 are for experimental testing purposes and need to set parameter LMCSInitialCW.
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+	0 & Automatic adaptive algorithm (default).\\
+	1 & Derives LMCS mapping with input LMCSInitialCW and enables LMCS for all slices. Uses a static LMCS mapping for low QP ($QP<=22$). \\
+	2 & Derives LMCS mapping with input LMCSInitialCW and enables LMCS only for slices in lowest temporal layer. \\
+	3 & In addition to 1, disables LMCS for intra slices. \\
+	4 & Derives LMCS mapping with input LMCSInitialCW and enables LMCS only for inter slices. \\
+\end{tabular}
+\\
+
+\Option{LMCSInitialCW} &
+%\ShortOption{\None} &
+\Default{0} &
+LMCS initial total codeword (valid values [$0 - 1023$]) to be used in LMCS mapping derivation when LMCSAdpOption is not equal to 0.
+\\
+
+\Option{LMCSOffset} &
+%\ShortOption{\None} &
+\Default{0} &
+Specifies the LMCS chroma residual scaling offset. This parameter corresponds to the value of lmcsDeltaCrs, derived from lmcs_delta_sign_crs_flag and lmcs_delta_abs_crs, that are transmitted in the APS. Valid values are in the range [-7;7].
+\\
+
+\Option{ColorTransform} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the use of adaptive color transform (ACT).
+\\
+
+\Option{HorCollocatedChroma} &
+%\ShortOption{\None} &
+\Default{true} &
+Specifies location of a chroma sample relatively to the luma sample in horizontal direction in the reference picture resampling.
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+	0 & horizontally shifted by 0.5 units of luma samples.\\
+	1 & collocated (default). \\
+\end{tabular}
+\\
+
+\Option{VerCollocatedChroma} &
+%\ShortOption{\None} &
+\Default{false} &
+Specifies location of a chroma sample relatively to the luma sample in vertical direction in the cross-component linear model intra prediction and the reference picture resampling.
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+	0 & vertically shifted by 0.5 units of luma samples (default).\\
+	1 & collocated. \\
+\end{tabular}
+\\
+
 \end{OptionTableNoShorthand}
 
 %%
@@ -2125,32 +2515,15 @@ Indicates whether cropped decoded pictures are suitable for display using oversc
   1 & Indicates that the decoded pictures may be displayed using overscan. \\
 \end{tabular}
 \\
-\Option{VideoSignalTypePresent} &
-\Default{false} &
-Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present.
-\\
-\Option{VideoFormat} &
-\Default{5} &
-Indicates representation of pictures.
-\\
-\Option{VideoFullRange} &
-\Default{false} &
-Indicates the black level and range of luma and chroma signals.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
-  0 & Indicates that the luma and chroma signals are to be scaled prior to display. \\
-  1 & Indicates that the luma and chroma signals are not to be scaled prior to display. \\
-\end{tabular}
-\\
 \Option{ColourDescriptionPresent} &
 \Default{false} &
-Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present.
+Signals whether colour_primaries, transfer_characteristics, matrix_coefficients and video_full_range_flag are present.
 \\
 \Option{ColourPrimaries} &
 \Default{2} &
 Indicates chromaticity coordinates of the source primaries.
 \\
-\Option{TransferCharateristics} &
+\Option{TransferCharacteristics} &
 \Default{2} &
 Indicates the opto-electronic transfer characteristics of the source.
 \\
@@ -2158,6 +2531,15 @@ Indicates the opto-electronic transfer characteristics of the source.
 \Default{2} &
 Describes the matrix coefficients used in deriving luma and chroma from RGB primaries.
 \\
+\Option{VideoFullRange} &
+\Default{false} &
+Indicates the black level and range of luma and chroma signals.
+\par
+\begin{tabular}{cp{0.45\textwidth}}
+  0 & Indicates that the luma and chroma signals are to be scaled prior to display. \\
+  1 & Indicates that the luma and chroma signals are not to be scaled prior to display. \\
+\end{tabular}
+\\
 \Option{ChromaLocInfoPresent} &
 \Default{false} &
 Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present.
@@ -2170,74 +2552,6 @@ Specifies the location of chroma samples for top field.
 \Default{0} &
 Specifies the location of chroma samples for bottom field.
 \\
-\Option{NeutralChromaIndication} &
-\Default{false} &
-Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1).
-\\
-
-\Option{DefaultDisplayWindowFlag} &
-\Default{flag} &
-Indicates the presence of the Default Window parameters.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
-false & Disabled \\
-true & Enabled \\
-\end{tabular}
-\\
-
-\Option{DefDispWinLeftOffset}%
-\Option{DefDispWinRightOffset}%
-\Option{DefDispWinTopOffset}%
-\Option{DefDispWinBottomOffset} &
-\Default{0} &
-Specifies the horizontal and vertical offset to be applied to the
-input video from the conformance window in luma samples.
-Must be a multiple of the chroma resolution (e.g. a multiple of two for 4:2:0).
-\\
-
-\Option{FrameFieldInfoPresentFlag} &
-\Default{false} &
-Specificies the value of the VUI syntax element `frame_field_info_present_flag', which indicates that pic_struct and field coding related values are present in picture timing SEI messages.
-\\
-
-\Option{PocProportionalToTimingFlag} &
-\Default{false} &
-Specificies the value of the VUI syntax element `vui_poc_proportional_to_timing_flag', which indicates that the POC value is proportional to the output time with respect to the first picture in the CVS.
-\\
-
-\Option{NumTicksPocDiffOneMinus} &
-\Default{0} &
-Specificies the value of the VUI syntax element `vui_num_ticks_poc_diff_one_minus1', which specifies the number of clock ticks corresponding to a difference of picture order count values equal to 1, and is used only when PocProportionalToTimingFlag is true.
-\\
-
-\Option{BitstreamRestriction} &
-\Default{false} &
-Signals whether bitstream restriction parameters are present.
-\\
-\Option{TilesFixedStructure} &
-\Default{false} &
-Indicates that each active picture parameter set has the same values of the syntax elements related to tiles.
-\\
-\Option{MotionVectorsOverPicBoundaries} &
-\Default{false} &
-Indicates that no samples outside the picture boundaries are used for inter prediction.
-\\
-\Option{MaxBytesPerPicDenom} &
-\Default{2} &
-Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture.
-\\
-\Option{MaxBitsPerMinCuDenom} &
-\Default{1} &
-Indicates an upper bound for the number of bits of coding_unit() data.
-\\
-\Option{Log2MaxMvLengthHorizontal} &
-\Default{15} &
-Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units.
-\\
-\Option{Log2MaxMvLengthVertical} &
-\Default{15} &
-Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units.
-\\
 \end{OptionTableNoShorthand}
 
 
@@ -2249,9 +2563,9 @@ Specifies the cost mode to use.
 \par
 \begin{tabular}{lp{0.3\textwidth}}
   lossy                   & $cost=distortion+\lambda \times bits$ \\
-  sequence_level_lossless & $cost=distortion / \lambda + bits$. \\
-  lossless                & As with sequence_level_lossless, but QP is also set to 0 (this will be deprecated in the future) \\
-  mixed_lossless_lossy    & As with sequence_level_lossless, but QP'=4 is used for pre-estimates of transquant-bypass blocks \\
+%  sequence_level_lossless & $cost=distortion / \lambda + bits$. \\
+  lossless                & $cost = bits$, QP'=0 is used for all transform blocks and the only allowed encoder result is either an empty transform block or an transform skipped block. \\
+%  mixed_lossless_lossy    & As with sequence_level_lossless, but QP'=4 is used for pre-estimates of transquant-bypass blocks \\
 \end{tabular}
 \\
 
@@ -2288,6 +2602,7 @@ Specifies the shift to apply to the SAO parameters. If negative, an estimate wil
 Specifies the maximum TU size for which transform-skip can be used; the minimum value is 2. Version 1 and some Version 2 (RExt) profiles require this to be 2.
 \\
 
+
 \Option{ImplicitResidualDPCM} &
 \Default{false} &
 When true, specifies the use of the implicitly signalled residual RDPCM tool (for intra). Version 1 and some Version 2 (RExt) profiles require this to be false.
@@ -2341,7 +2656,7 @@ The table below lists the SEI messages defined for Version 1 and Range-Extension
    15 & Picture snapshot                         & (Not handled)\\
    16 & Progressive refinement segment start     & (Not handled)\\
    17 & Progressive refinement segment end       & (Not handled)\\
-   19 & Film grain characteristics               & (Not handled)\\
+   19 & Film grain characteristics               & Table \ref{tab:sei-film-grain} \\
    22 & Post-filter hint                         & (Not handled)\\
    23 & Tone mapping information                 & Table \ref{tab:sei-tone-mapping-info} \\
    45 & Frame packing arrangement                & Table \ref{tab:sei-frame-packing-arrangement} \\
@@ -2363,6 +2678,18 @@ The table below lists the SEI messages defined for Version 1 and Range-Extension
   141 & Knee function information                & Table \ref{tab:sei-knee-function} \\
   142 & Colour remapping information             & Table \ref{tab:sei-colour-remapping}\\
   143 & Deinterlaced field identification        & (Not handled)\\
+  144 & Content light level info                 & Table \ref{tab:sei-content-light-level}\\
+  147 & Alternative transfer characteristics     & Table \ref{tab:sei-alternative-transfer-characteristics}\\
+  148 & Ambient viewing environment              & Table \ref{tab:sei-ambient-viewing-environment}\\
+  149 & Content colour volume                    & Table \ref{tab:sei-content-colour-volume}\\
+  150 & Equirectangular projection               & Table \ref{tab:sei-erp} \\
+  153 & Generalized cubemap projection           & Table \ref{tab:sei-gcmp} \\
+  154 & Sphere rotation                          & Table \ref{tab:sei-sphere-rotation} \\
+  155 & Region-wise packing                      & Table \ref{tab:sei-rwp} \\
+  156 & Omni viewport                            & Table \ref{tab:sei-omni-viewport} \\  
+  168 & Frame-field information                  & Table \ref{tab:sei-frame-field} \\  
+  203 & Subpicture Level Information             & Table \ref{tab:sei-subpic-level} \\  
+  204 & Sample Aspect Ratio Information          & Table \ref{tab:sei-sari} \\  
 \end{SEIListTable}
 %%
 %% SEI messages
@@ -2401,6 +2728,59 @@ SEI messages.
 
 
 
+\begin{OptionTableNoShorthand}{Film grain characteristics SEI message encoder parameters}{tab:sei-film-grain}
+\Option{SEIFGCEnabled} &
+\Default{0} &
+Enables or disables the insertion of the film grain characteristics SEI message.
+\\
+\Option{SEIFGCCancelFlag} &
+\Default{0} &
+Specifies the persistence of any previous film grain characteristics SEI message in output order.
+\\
+\Option{SEIFGCPersistenceFlag} &
+\Default{1} &
+Specifies the persistence of the film grain characteristics SEI message for the current layer.
+\\
+\Option{SEIFGCModelID} &
+\Default{0} &
+Specifies the film grain simulation model.
+\par
+\begin{tabular}{cp{0.35\textwidth}}
+  0 & frequency filtering \\
+  1 & auto-regression \\
+\end{tabular}
+\\
+\Option{SEIFGCSepColourDescPresentFlag} &
+\Default{0} &
+Specifies the presence of a distinct colour space description for the film grain characteristics specified in the SEI message.
+\\
+\Option{SEIFGCBlendingModeID} &
+\Default{0} &
+Specifies the blending mode used to blend the simulated film grain with the decoded images.
+\par
+\begin{tabular}{cp{0.35\textwidth}}
+  0 & additive \\
+  1 & multiplicative \\
+\end{tabular}
+\\
+\Option{SEIFGCLog2ScaleFactor} &
+\Default{0} &
+Specifies a scale factor used in the film grain characterization equations.
+\\
+\Option{SEIFGCCompModelPresentComp0} &
+\Default{0} &
+Specifies the presence of film grain modelling on colour component 0.
+\\
+\Option{SEIFGCCompModelPresentComp1} &
+\Default{0} &
+Specifies the presence of film grain modelling on colour component 1.
+\\
+\Option{SEIFGCCompModelPresentComp2} &
+\Default{0} &
+Specifies the presence of film grain modelling on colour component 2.
+\\
+\end{OptionTableNoShorthand}
+
 \begin{OptionTableNoShorthand}{Tone mapping information SEI message encoder parameters}{tab:sei-tone-mapping-info}
 \Option{SEIToneMappingInfo} &
 \Default{0} &
@@ -2582,7 +2962,7 @@ Specifies luma sample value of the extended dynamic range assigned decoded pictu
 Enables or disables the insertion of the Frame packing arrangement SEI messages.
 \\
 \Option{SEIFramePackingType} &
-\Default{0} &
+\Default{3} &
 Indicates the arrangement type in the Frame packing arrangement SEI message.
 This option has no effect if SEIFramePacking is disabled.
 \par
@@ -2604,7 +2984,7 @@ This option has no effect if SEIFramePacking is disabled.
 \end{tabular}
 \\
 \Option{SEIFramePackingQuincunx} &
-\Default{0} &
+\Default{1} &
 Enables or disables the quincunx_sampling signalling in the
 Frame packing arrangement SEI messages. This option has no
 effect if SEIFramePacking is disabled.
@@ -2967,6 +3347,463 @@ An example file can be found in cfg/misc/example_colour_remapping_sei_encoder_0.
 \\
 \end{OptionTableNoShorthand}
 
+\begin{OptionTableNoShorthand}{Equirectangular Projection SEI message encoder parameters}{tab:sei-erp}
+\Option{SEIErpEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of equirectangular projection SEI message.
+\\
+\Option{SEIErpCancelFlag} &
+\Default{true} &
+Indicates that equirectangular projection SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEIErpPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the equirectangular projection SEI message.
+\\
+\Option{SEIErpGuardBandFlag} &
+\Default{false} &
+Indicates the existence of guard band areas in the constituent picture.
+\\
+\Option{SEIErpGuardBandType} &
+\Default{0} &
+Indicates the type of the guard bands.
+\\
+\Option{SEIErpLeftGuardBandWidth} &
+\Default{0} &
+Inicates the width of the guard band on the left side of the onstituent picture.
+\\
+\Option{SEIErpRightGuardBandWidth} &
+\Default{0} &
+Inicates the width of the guard band on the right side of the onstituent picture.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Generalized Cubemap Projection SEI message encoder parameters}{tab:sei-gcmp}
+\Option{SEIGcmpEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of generalized cubemap projection SEI message.
+\\
+\Option{SEIGcmpCancelFlag} &
+\Default{true} &
+Indicates that generalized cubemap projection SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEIGcmpPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the generalized cubemap projection SEI message.
+\\
+\Option{SEIGcmpPackingType} &
+\Default{0} &
+Specifies the packing type.
+\par
+\begin{tabular}{cp{0.35\textwidth}}
+ 0 & 6 rows and 1 columns \\
+ 1 & 3 rows and 2 columns \\
+ 2 & 2 rows and 3 columns \\
+ 3 & 1 rows and 6 columns \\
+ 4 & 1 rows and 5 columns (hemisphere cubemap) \\
+ 5 & 5 rows and 1 columns (hemisphere cubemap) \\
+\end{tabular}
+\\
+\Option{SEIGcmpMappingFunctionType} &
+\Default{0} &
+Specifies the mapping function used to adjust the sample locations.
+\par
+\begin{tabular}{cp{0.35\textwidth}}
+ 0 & Disabled (conventional cubemap projection) \\
+ 1 & Equi-angular mapping function \\
+ 2 & Defined by SEIGcmpFunctionCoeffU, SEIGcmpFunctionUAffectedByVFlag, SEIGcmpFunctionCoeffV, and SEIGcmpFunctionVAffectedByUFlag \\
+\end{tabular}
+\\
+\Option{SEIGcmpFaceIndex} &
+\Default{} &
+An array that specifies the face index for the faces packed in the cubemap projected picture.
+\par
+\begin{tabular}{cp{0.35\textwidth}}
+ 0 & Front face \\
+ 1 & Back face \\
+ 2 & Top face \\
+ 3 & Bottom face \\
+ 4 & Right face \\
+ 5 & Left face \\
+\end{tabular}
+\\
+\Option{SEIGcmpFaceRotation} &
+\Default{} &
+An array that specifies the rotation to be applied to the faces.
+\par
+\begin{tabular}{cp{0.35\textwidth}}
+ 0 & No rotation \\
+ 1 & 90 degree anticlockwise \\
+ 2 & 180 degree anticlockwise \\
+ 3 & 270 degree anticlockwise \\
+\end{tabular}
+\\
+\Option{SEIGcmpFunctionCoeffU} &
+\Default{} &
+An array that specifies the coefficients used in the cubemap mapping function of the u-axis for the faces when SEIGcmpMappingFunctionType is set to 2.
+\\
+\Option{SEIGcmpFunctionUAffectedByVFlag} &
+\Default{} &
+An array that specifies whether the cubemap mapping function of the u-axis refers to the v position of the sample location for the faces when SEIGcmpMappingFunctionType is set to 2.
+\\
+\Option{SEIGcmpFunctionCoeffV} &
+\Default{} &
+An array that specifies the coefficients used in the cubemap mapping function of the v-axis for the faces when SEIGcmpMappingFunctionType is set to 2.
+\\
+\Option{SEIGcmpFunctionVAffectedByUFlag} &
+\Default{} &
+An array that specifies whether the cubemap mapping function of the v-axis refers to the u position of the sample location for the faces when SEIGcmpMappingFunctionType is set to 2.
+\\
+\Option{SEIGcmpGuardBandFlag} &
+\Default{false} &
+Indicates the existence of guard band areas in the picture.
+\\
+\Option{SEIGcmpGuardBandBoundaryType} &
+\Default{false} &
+Enables (true) or disables (false) the boundary guard bands.
+\\
+\Option{SEIGcmpGuardBandSamplesMinus1} &
+\Default{0} &
+Specifies the number of guard band samples minus 1 used in the cubemap projected picture.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Sphere Rotation SEI message encoder parameters}{tab:sei-sphere-rotation}
+\Option{SEISphereRotationEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of sphere rotation SEI message.
+\\
+\Option{SEISphereRotationCancelFlag} &
+\Default{true} &
+Indicates that the sphere rotation SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEISphereRotationPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the sphere rotation SEI message.
+\\
+\Option{SEISphereRotationYaw} &
+\Default{0} &
+Specifies the value of the yaw rotation angle.
+\\
+\Option{SEISphereRotationPitch} &
+\Default{0} &
+Specifies the value of the pitch rotation angle.
+\\
+\Option{SEISphereRotationRoll} &
+\Default{0} &
+Specifies the value of the roll rotation angle.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Region-wise packing SEI message encoder parameters}{tab:sei-rwp}
+\Option{SEIRwpEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of region-wise packing SEI message.
+\\
+\Option{SEIRwpCancelFlag} &
+\Default{true} &
+Indicates that RWP SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEIRwpPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the RWP SEI message.
+\\
+\Option{SEIRwpConstituentPictureMatchingFlag} &
+\Default{false} &
+Specifies the RWP SEI message applies individually to each constituent picture (true) or to the projected picture (false).
+\\
+\Option{SEIRwpNumPackedRegions} &
+\Default{0} &
+Specifies the number of packed regions when constituent picture matching flag is equal to 0.
+\\
+\Option{SEIRwpProjPictureWidth} &
+\Default{0} &
+Specifies the width of the projected picture.
+\\
+\Option{SEIRwpProjPictureHeight} &
+\Default{0} &
+Specifies the height of the projected picture.
+\\
+\Option{SEIRwpPackedPictureWidth} &
+\Default{0} &
+Specifies the width of the packed picture.
+\\
+\Option{SEIRwpPackedPictureHeight} &
+\Default{0} &
+Specifies the height of the packed picture.
+\\
+\Option{SEIRwpTransformType} &
+\Default{} &
+An array that specifies the rotation and mirroring to be applied to the packed regions.
+\\
+\Option{SEIRwpGuardBandFlag} &
+\Default{} &
+An array that specifies the existence of guard band in the packed regions.
+\\
+\Option{SEIRwpProjRegionWidth} &
+\Default{} &
+An array that specifies the width of the projected regions.
+\\
+\Option{SEIRwpProjRegionHeight} &
+\Default{} &
+An array that specifies the height of the projected regions.
+\\
+\Option{SEIRwpGuardBandFlag} &
+\Default{} &
+An array that specifies the existence of guard band in the packed regions.
+\\
+\Option{SEIRwpProjRegionTop} &
+\Default{} &
+An array that specifies the top sample row of the projected regions.
+\\
+\Option{SEIRwpProjRegionLeft} &
+\Default{} &
+An array that specifies the left-most sample column of the projected regions. 
+\\
+\Option{SEIRwpPackedRegionWidth} &
+\Default{} &
+An array that specifies the width of the packed regions.
+\\
+\Option{SEIRwpPackedRegionHeight} &
+\Default{} &
+An array that specifies the height of the packed regions.
+\\
+\Option{SEIRwpPackedRegionTop} &
+\Default{} &
+An array that specifies the top luma sample row of the packed regions.
+\\
+\Option{SEIRwpPackedRegionLeft} &
+\Default{} &
+An array that specifies the left-most luma sample column of the packed regions.
+\\
+\Option{SEIRwpLeftGuardBandWidth} &
+\Default{} &
+An array that specifies the width of the guard band on the left side of the packed regions.
+\\
+\Option{SEIRwpRightGuardBandWidth} &
+\Default{} &
+An array that specifies the width of the guard band on the right side of the packed regions.
+\\
+\Option{SEIRwpTopGuardBandHeight} &
+\Default{} &
+An array that specifies the height of the guard band above the packed regions.
+\\
+\Option{SEIRwpBottomGuardBandHeight} &
+\Default{} &
+An array that specifies the height of the guard band below the packed regions.
+\\
+\Option{SEIRwpGuardBandNotUsedForPredFlag} &
+\Default{} &
+An array that specifies if the guard bands is used in the inter prediction process.
+\\
+\Option{SEIRwpGuardBandType} &
+\Default{} &
+An array that specifies the type of the guard bands for the packed regions.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Omni Viewport SEI message encoder parameters}{tab:sei-omni-viewport}
+\Option{SEIOmniViewportEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of omni viewport SEI message.
+\\
+\Option{SEIOmniViewportId} &
+\Default{0} &
+Contains an identifying number that may be used to identify the purpose of the one or more recommended viewport regions.
+\\
+\Option{SEIOmniViewportCancelFlag} &
+\Default{true} &
+Indicates that the omni viewport SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEIOmniViewportPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the omni viewport SEI message.
+\\
+\Option{SEIOmniViewportCntMinus1} &
+\Default{0} &
+Specifies the number of recommended viewport regions minus 1.
+\\
+\Option{SEIOmniViewportAzimuthCentre} &
+\Default{} &
+An array that indicates the centre of the i-th recommended viewport region.
+\\
+\Option{SEIOmniViewportElevationCentre} &
+\Default{} &
+An array that indicates the centre of the i-th recommended viewport region.
+\\
+\Option{SEIOmniViewportTiltCentre} &
+\Default{} &
+An array that indicates the tilt angle of the i-th recommended viewport region.
+\\
+\Option{SEIOmniViewportHorRange} &
+\Default{} &
+An array that indicates the azimuth range of the i-th recommended viewport region.
+\\
+\Option{SEIOmniViewportVerRange} &
+\Default{} &
+An array that indicates the elevation range of the i-th recommended viewport region.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Sample Aspect Ratio Information SEI message encoder parameters}{tab:sei-sari}
+\Option{SEISampleAspectRatioInfo} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Sample Aspect Ratio Information SEI message.
+\\
+\Option{SEISARICancelFlag} &
+\Default{true} &
+Indicates that the Sample Aspect Ratio Information SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEISARIPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the Sample Aspect Ratio Information SEI message.
+\\
+\Option{SEISARIAspectRatioIdc} &
+\Default{0} &
+Specifies aspect ratio IDC as defined in the standard.
+\\
+\Option{SEISARISarWidth} &
+\Default{0} &
+Specifies the horizontal size of the sample aspect ratio, if SEISARIAspectRatioIdc is equal to 255.
+\\
+\Option{SEISARISarHeight} &
+\Default{0} &
+Specifies the vertical size of the sample aspect ratio, if SEISARIAspectRatioIdc is equal to 255.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Frame-Field Information SEI message encoder parameters}{tab:sei-frame-field}
+\Option{SEIFrameFieldInfo} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Frame-Field Information SEI message.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Subpicture Level Information SEI message encoder parameters}{tab:sei-subpic-level}
+\Option{SEISubpictureLevelInfo} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Subpicture Level Information SEI message.
+Note, currently no other configuration options are available, because this depends on the number of subpictures,
+which are still not supported in the software. An example SEI with dummy values is generated, when the option is enabled.
+\\
+\end{OptionTableNoShorthand}
+
+
+\begin{OptionTableNoShorthand}{Content light level info SEI message encoder parameters}{tab:sei-content-light-level}
+\Option{SEICLLEnabled} &
+\Default{false} &
+Enables or disables the insertion of the content light level SEI message.
+\\
+\Option{SEICLLMaxContentLightLevel} &
+\Default{4000} &
+When not equal to 0, specifies an upper bound on the maximum light level among all individual samples in a 4:4:4 representation of red, green, and blue colour primary intensities in the linear light domain for the pictures of the CLVS, in units of candelas per square metre. When equal to 0, no such upper bound is indicated.
+\\
+\Option{SEICLLMaxPicAvgLightLevel} &
+\Default{0} &
+When not equal to 0, specifies an upper bound on the maximum average light level among the samples in a 4:4:4 representation of red, green, and blue colour primary intensities in the linear light domain for any individual picture of the CLVS, in units of candelas per square metre. When equal to 0, no such upper bound is indicated.
+\\
+\end{OptionTableNoShorthand}
+
+
+
+\begin{OptionTableNoShorthand}{Alternative transfer characteristics SEI message encoder parameters}{tab:sei-alternative-transfer-characteristics}
+\Option{SEIPreferredTransferCharacteristics} &
+\Default{18} &
+Indicates a preferred alternative value for the transfer_characteristics syntax element that is indicated by the colour description syntax of VUI parameters.
+\\
+\end{OptionTableNoShorthand}
+
+
+
+\begin{OptionTableNoShorthand}{Ambient viewing environment SEI message encoder parameters}{tab:sei-ambient-viewing-environment}
+\Option{SEIAVEEnabled} &
+\Default{false} &
+Enables or disables the insertion of the ambient viewing environment SEI message.
+\\
+\Option{SEIAVEAmbientIlluminance} &
+\Default{100000} &
+Specifies the environmental illuminance of the ambient viewing environment in units of 1/10000 lux. The value shall not be 0.
+\\
+\Option{SEIAVEAmbientLightX} &
+\Default{15635} &
+Specifies the x chromaticity coordinate, according to the CIE 1931 definition, of the environmental ambient light in the nominal viewing environment in normalized increments of 1/50000. The value shall be in the range of 0 to 50,000, inclusive.
+\\
+\Option{SEIAVEAmbientLightY} &
+\Default{16450} &
+Specifies the y chromaticity coordinate, according to the CIE 1931 definition, of the environmental ambient light in the nominal viewing environment in normalized increments of 1/50000. The value shall be in the range of 0 to 50,000, inclusive.
+\\
+\end{OptionTableNoShorthand}
+
+
+
+\begin{OptionTableNoShorthand}{Content colour volume SEI message encoder parameters}{tab:sei-content-colour-volume}
+\Option{SEICCVEnabled} &
+\Default{false} &
+Enables or disables the insertion of the content colour volume SEI message.
+\\
+\Option{SEICCVCancelFlag} &
+\Default{0} &
+Specifies the persistence of any previous content colour volume SEI message in output order.
+\\
+\Option{SEICCVPersistenceFlag} &
+\Default{1} &
+Specifies the persistence of the content colour volume SEI message for the current layer.
+\\
+\Option{SEICCVPrimariesPresent} &
+\Default{1} &
+Specifies whether the CCV primaries are present in the content colour volume SEI message.
+\\
+\Option{m_ccvSEIPrimariesX0} &
+\Default{0.300} &
+Specifies the x coordinate, according to the CIE 1931 definition, of the first (green) colour primary component in normalized increments of 1/50000.
+\\
+\Option{m_ccvSEIPrimariesY0} &
+\Default{0.600} &
+Specifies the y coordinate, according to the CIE 1931 definition, of the first (green) colour primary component in normalized increments of 1/50000.
+\\
+\Option{m_ccvSEIPrimariesX1} &
+\Default{0.150} &
+Specifies the x coordinate, according to the CIE 1931 definition, of the second (blue) colour primary component in normalized increments of 1/50000.
+\\
+\Option{m_ccvSEIPrimariesY1} &
+\Default{0.060} &
+Specifies the y coordinate, according to the CIE 1931 definition, of the second (blue) colour primary component in normalized increments of 1/50000.
+\\
+\Option{m_ccvSEIPrimariesX2} &
+\Default{0.640} &
+Specifies the x coordinate, according to the CIE 1931 definition, of the third (red) colour primary component in normalized increments of 1/50000.
+\\
+\Option{m_ccvSEIPrimariesY2} &
+\Default{0.330} &
+Specifies the y coordinate, according to the CIE 1931 definition, of the third (red) colour primary component in normalized increments of 1/50000.
+\\
+\Option{SEICCVMinLuminanceValuePresent} &
+\Default{1} &
+Specifies whether the CCV min luminance value is present in the content colour volume SEI message.
+\\
+\Option{SEICCVMinLuminanceValue} &
+\Default{0.0} &
+specifies the CCV min luminance value in the content colour volume SEI message.
+\\
+\Option{SEICCVMaxLuminanceValuePresent} &
+\Default{1} &
+Specifies whether the CCV max luminance value is present in the content colour volume SEI message.
+\\
+\Option{SEICCVMaxLuminanceValue} &
+\Default{0.1} &
+specifies the CCV max luminance value in the content colour volume SEI message.
+\\
+\Option{SEICCVAvgLuminanceValuePresent} &
+\Default{1} &
+Specifies whether the CCV avg luminance value is present in the content colour volume SEI message.
+\\
+\Option{SEICCVAvgLuminanceValue} &
+\Default{0.01} &
+specifies the CCV avg luminance value in the content colour volume SEI message.
+\\
+\end{OptionTableNoShorthand}
+
+
 
 
 %\Option{SEITimeCode} &
@@ -3054,7 +3891,7 @@ Numerous constants that guard individual adoptions are defined within
 \section{Using the decoder}
 \subsection{General}
 \begin{minted}{bash}
-TAppDecoder -b str.bin -o dec.yuv [options]
+DecoderApp -b str.bin -o dec.yuv [options]
 \end{minted}
 
 \begin{OptionTableNoShorthand}{Decoder options}{tab:decoder-options}
@@ -3073,7 +3910,7 @@ Defines the input bit stream file name.
 \Option{ReconFile (-o)} &
 %\ShortOption{-o} &
 \Default{\NotSet} &
-Defines reconstructed YUV file name. If empty, no file is generated.
+Defines the reconstructed video file name. If empty, no file is generated. If the bitstream contains multiple layer and no single target layer is specified (i.e. TargetLayer=-1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'decoded.yuv' becomes 'decoded0.yuv' for layer id 0, 'decoded' becomes 'decoded0'.
 \\
 
 \Option{SkipFrames (-s)} &
@@ -3109,6 +3946,13 @@ Defines the chroma bit-depth of the reconstructed YUV file (the value 0 indicate
 that the native bit-depth is used)
 \\
 
+\Option{TargetLayer (-p)} &
+%\ShortOption{-p} &
+\Default{-1 \\ (Native)} &
+Specifies the target bitstream Layer to be decoded. (the value -1 indicates
+that decoding the whole bitstream )
+\\
+
 \Option{SEIDecodedPictureHash} &
 %\ShortOption{\None} &
 \Default{1} &
@@ -3153,17 +3997,6 @@ Specifies that the colour remapping SEI message should be applied to the output
 If no value is specified, the SEI message is ignored and no mapping is applied.
 \\
 
-\Option{RespectDefDispWindow (-w)} &
-%\ShortOption{-w} &
-\Default{0} &
-Video region to be output by the decoder.
-\par
-\begin{tabular}{cp{0.45\textwidth}}
-  0 & Output content inside the conformance window. \\
-  1 & Output content inside the default window. \\
-\end{tabular}
-\\
-
 \Option{OutputColourSpaceConvert} &
 \Default{\NotSet} &
 Specifies the colour space conversion to apply to 444 video. Permitted values are:
@@ -3176,6 +4009,11 @@ Specifies the colour space conversion to apply to 444 video. Permitted values ar
 If no value is specified, no colour space conversion is applied. The list may eventually also include RGB to YCbCr or YCgCo conversions.\\
 \\
 
+\Option{PYUV} &
+\Default{false} &
+When true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. See doc/pyuv_format.pdf for details. Ignored for interlaced output.
+\\
+
 \Option{SEINoDisplay} &
 \Default{false} &
 When true, do not output frames for which there is an SEI NoDisplay message.
@@ -3417,4 +4255,28 @@ DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL,
 \end{minted}
 \end{description}
 
+
+
+
+
+\section{Using the stream merge tool}
+\label{sec:stream-merge-tool}
+
+The StreamMergeApp tool takes multiple single-layer (singe nuh_layer_id) bistreams 
+as inputs and merge them into a multi-layer bistream by interleaving the NALUs 
+from the input single layer bistreams. During the merge, the tool assigns a new unique
+nuh_layer_id for each input bitstream. Then the decoder could specify which layer 
+bitstream to be decoded through the command line option "-p nuh_layer_id". 
+
+\subsection{Usage}
+\label{sec:stream-merge-usage}
+
+\begin{minted}{bash}
+StreamMergeApp 	<bitstream1> <bitstream2> [<bitstream3> ...] <outfile>
+\end{minted}
+
+The command line options bistreamX specify the file names of the input single-layer 
+bistreams. At least two input bitstreams need to be specified. The merged multi-layer 
+bistream will be stored into the outfile.
+
 \end{document}
diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp
index 7735309afc36ac6d4e62a2e33522f93f757c0f1a..7d9adcb83ad87c01360e65189c64c0884c574f86 100644
--- a/source/App/DecoderApp/DecApp.cpp
+++ b/source/App/DecoderApp/DecApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -111,77 +111,76 @@ uint32_t DecApp::decode()
   }
 
   // main decoder loop
-  bool openedReconFile = false; // reconstruction file not yet opened. (must be performed after SPS is seen)
   bool loopFiltered = false;
 
+  bool bPicSkipped = false;
+
   while (!!bitstreamFile)
   {
-    /* location serves to work around a design fault in the decoder, whereby
-     * the process of reading a new slice that is the first slice of a new frame
-     * requires the DecApp::decode() method to be called again with the same
-     * nal unit. */
-#if RExt__DECODER_DEBUG_STATISTICS
-    CodingStatistics& stat = CodingStatistics::GetSingletonInstance();
-    CHECK(m_statMode < STATS__MODE_NONE || m_statMode > STATS__MODE_ALL, "Wrong coding statistics output mode");
-    stat.m_mode = m_statMode;
-
-    CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics());
-#endif
-
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-    streampos location = bitstreamFile.tellg() - streampos(bytestream.GetNumBufferedBytes());
-#else
-    streampos location = bitstreamFile.tellg();
-#endif
-    AnnexBStats stats = AnnexBStats();
-
     InputNALUnit nalu;
-    byteStreamNALUnit(bytestream, nalu.getBitstream().getFifo(), stats);
-
-    // call actual decoding function
-    bool bNewPicture = false;
-    if (nalu.getBitstream().getFifo().empty())
-    {
-      /* this can happen if the following occur:
-       *  - empty input file
-       *  - two back-to-back start_code_prefixes
-       *  - start_code_prefix immediately followed by EOF
-       */
-      msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n");
-    }
-    else
-    {
-      read(nalu);
-
-      if( (m_iMaxTemporalLayer >= 0 && nalu.m_temporalId > m_iMaxTemporalLayer) || !isNaluWithinTargetDecLayerIdSet(&nalu)  )
+    nalu.m_nalUnitType = NAL_UNIT_INVALID;
+
+    // determine if next NAL unit will be the first one from a new picture
+    bool bNewPicture = isNewPicture(&bitstreamFile, &bytestream);
+    bool bNewAccessUnit = bNewPicture && isNewAccessUnit( bNewPicture, &bitstreamFile, &bytestream );
+    if(!bNewPicture) 
+    { 
+      AnnexBStats stats = AnnexBStats();
+
+      // find next NAL unit in stream
+      byteStreamNALUnit(bytestream, nalu.getBitstream().getFifo(), stats);
+      if (nalu.getBitstream().getFifo().empty())
       {
-        bNewPicture = false;
+        /* this can happen if the following occur:
+         *  - empty input file
+         *  - two back-to-back start_code_prefixes
+         *  - start_code_prefix immediately followed by EOF
+         */
+        msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n");
       }
       else
       {
-        bNewPicture = m_cDecLib.decode(nalu, m_iSkipFrame, m_iPOCLastDisplay);
-        if (bNewPicture)
+        // read NAL unit header
+        read(nalu);
+
+        // flush output for first slice of an IDR picture
+        if(m_cDecLib.getFirstSliceInPicture() &&
+            (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL ||
+             nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP))
         {
-          bitstreamFile.clear();
-          /* location points to the current nalunit payload[1] due to the
-           * need for the annexB parser to read three extra bytes.
-           * [1] except for the first NAL unit in the file
-           *     (but bNewPicture doesn't happen then) */
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-          bitstreamFile.seekg(location);
-          bytestream.reset();
-          CodingStatistics::SetStatistics(*backupStats);
-#else
-          bitstreamFile.seekg(location-streamoff(3));
-          bytestream.reset();
-#endif
+          xFlushOutput(pcListPic, nalu.m_nuhLayerId);
+        }
+
+        // parse NAL unit syntax if within target decoding layer
+        if ((m_iMaxTemporalLayer < 0 || nalu.m_temporalId <= m_iMaxTemporalLayer) && isNaluWithinTargetDecLayerIdSet(&nalu))
+        {
+          if (bPicSkipped)
+          {
+            if ((nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR))
+            {
+              if (m_cDecLib.isSliceNaluFirstInAU(true, nalu))
+              {
+                m_cDecLib.resetAccessUnitNals();
+                m_cDecLib.resetAccessUnitApsNals();
+              }
+              bPicSkipped = false;
+            }
+          }
+          m_cDecLib.decode(nalu, m_iSkipFrame, m_iPOCLastDisplay);
+          if (nalu.m_nalUnitType == NAL_UNIT_VPS)
+          {
+            deriveOutputLayerSet();
+          }
+        }
+        else
+        {
+          bPicSkipped = true;
         }
       }
     }
 
 
-
-    if( ( bNewPicture || !bitstreamFile || nalu.m_nalUnitType == NAL_UNIT_EOS ) && !m_cDecLib.getFirstSliceInSequence() )
+    if ((bNewPicture || !bitstreamFile || nalu.m_nalUnitType == NAL_UNIT_EOS) && !m_cDecLib.getFirstSliceInSequence() && !bPicSkipped)
     {
       if (!loopFiltered || bitstreamFile)
       {
@@ -203,7 +202,7 @@ uint32_t DecApp::decode()
 
     if( pcListPic )
     {
-      if ( (!m_reconFileName.empty()) && (!openedReconFile) )
+      if( !m_reconFileName.empty() && !m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].isOpen() )
       {
         const BitDepths &bitDepths=pcListPic->front()->cs->sps->getBitDepths(); // use bit depths of first reconstructed picture.
         for( uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++ )
@@ -219,51 +218,46 @@ uint32_t DecApp::decode()
           EXIT ("Invalid output bit-depth for packed YUV output, aborting\n");
         }
 
-        m_cVideoIOYuvReconFile.open( m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode
-        openedReconFile = true;
+        std::string reconFileName = m_reconFileName;
+        if (m_reconFileName.compare("/dev/null") && (m_cDecLib.getVPS() != nullptr) && (m_cDecLib.getVPS()->getMaxLayers() > 1) && (isNaluWithinTargetOutputLayerIdSet(&nalu)))
+        {
+          size_t pos = reconFileName.find_last_of('.');
+          if (pos != string::npos)
+          {
+            reconFileName.insert( pos, std::to_string( nalu.m_nuhLayerId ) );
+          }
+          else
+          {
+            reconFileName.append( std::to_string( nalu.m_nuhLayerId ) );
+          }
+        }
+        if(((m_cDecLib.getVPS() != nullptr) &&
+              ((m_cDecLib.getVPS()->getMaxLayers() == 1) || (isNaluWithinTargetOutputLayerIdSet(&nalu)))) ||
+            (m_cDecLib.getVPS() == nullptr))
+        m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].open(reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon); // write mode
       }
       // write reconstruction to file
       if( bNewPicture )
       {
         xWriteOutput( pcListPic, nalu.m_temporalId );
       }
-      if ( (bNewPicture || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA) && m_cDecLib.getNoOutputPriorPicsFlag() )
-      {
-        m_cDecLib.checkNoOutputPriorPics( pcListPic );
-        m_cDecLib.setNoOutputPriorPicsFlag (false);
-      }
-      if ( bNewPicture &&
-#if !JVET_M0101_HLS
-           (   nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-            || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-            || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP
-            || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-            || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP ) )
-#else
-          (   nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-            || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP) )
-#endif
-      {
-        xFlushOutput( pcListPic );
-      }
       if (nalu.m_nalUnitType == NAL_UNIT_EOS)
       {
         xWriteOutput( pcListPic, nalu.m_temporalId );
         m_cDecLib.setFirstSliceInPicture (false);
       }
       // write reconstruction to file -- for additional bumping as defined in C.5.2.3
-#if !JVET_M0101_HLS
-      if(!bNewPicture && nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL_N && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_VCL31)
-#else
-      if (!bNewPicture && nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_VCL15)
-#endif
+      if (!bNewPicture && ((nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_IRAP_VCL_12)
+        || (nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu.m_nalUnitType <= NAL_UNIT_CODED_SLICE_GDR)))
       {
         xWriteOutput( pcListPic, nalu.m_temporalId );
       }
     }
-#if RExt__DECODER_DEBUG_STATISTICS
-    delete backupStats;
-#endif
+    if(bNewAccessUnit) 
+    {
+        m_cDecLib.resetAccessUnitNals();
+        m_cDecLib.resetAccessUnitApsNals();
+    }
   }
 
   xFlushOutput( pcListPic );
@@ -285,6 +279,366 @@ uint32_t DecApp::decode()
   return nRet;
 }
 
+bool DecApp::deriveOutputLayerSet()
+{
+  int vps_max_layers_minus1 = m_cDecLib.getVPS()->getMaxLayers() - 1;
+  int TotalNumOlss = 0;
+  int each_layer_is_an_ols_flag = m_cDecLib.getVPS()->getEachLayerIsAnOlsFlag();
+  int ols_mode_idc = m_cDecLib.getVPS()->getOlsModeIdc();
+  int num_output_layer_sets_minus1 = m_cDecLib.getVPS()->getNumOutputLayerSets() - 1;
+  int i = 0, j = 0, k = 0, r = 0;
+  int*  NumOutputLayersInOls;
+  int*  NumLayersInOls;
+  int** OutputLayerIdInOls;
+  int** OutputLayerIdx;
+  int** layerIncludedInOlsFlag;
+  int** LayerIdInOls;
+  int** dependencyFlag;
+  int** RefLayerIdx;
+  int*  NumRefLayers;
+
+  if (vps_max_layers_minus1 == 0)
+    TotalNumOlss = 1;
+  else if (each_layer_is_an_ols_flag || ols_mode_idc == 0 || ols_mode_idc == 1)
+    TotalNumOlss = vps_max_layers_minus1 + 1;
+  else if (ols_mode_idc == 2)
+    TotalNumOlss = num_output_layer_sets_minus1 + 1;
+
+  NumOutputLayersInOls = new int[m_cDecLib.getVPS()->getNumOutputLayerSets()];
+  NumLayersInOls = new int[m_cDecLib.getVPS()->getNumOutputLayerSets()];
+  OutputLayerIdInOls = new int*[TotalNumOlss];
+  OutputLayerIdx = new int*[TotalNumOlss];
+  layerIncludedInOlsFlag = new int*[TotalNumOlss];
+  LayerIdInOls = new int*[TotalNumOlss];
+
+  for (i = 0; i < TotalNumOlss; i++)
+  {
+    OutputLayerIdInOls[i] = new int[vps_max_layers_minus1 + 1];
+    OutputLayerIdx[i] = new int[vps_max_layers_minus1 + 1];
+    layerIncludedInOlsFlag[i] = new int[vps_max_layers_minus1 + 1];
+    LayerIdInOls[i] = new int[vps_max_layers_minus1 + 1];
+  }
+
+  dependencyFlag = new int*[vps_max_layers_minus1 + 1];
+  RefLayerIdx = new int*[vps_max_layers_minus1 + 1];
+  NumRefLayers = new int[vps_max_layers_minus1 + 1];
+
+  for (i = 0; i <= vps_max_layers_minus1; i++)
+  {
+    dependencyFlag[i] = new int[vps_max_layers_minus1 + 1];
+    RefLayerIdx[i] = new int[vps_max_layers_minus1 + 1];
+  }
+
+  for (i = 0; i <= vps_max_layers_minus1; i++) {
+    for (j = 0; j <= vps_max_layers_minus1; j++) {
+      dependencyFlag[i][j] = m_cDecLib.getVPS()->getDirectRefLayerFlag(i, j);
+      for (k = 0; k < i; k++)
+        if (m_cDecLib.getVPS()->getDirectRefLayerFlag(i, k) && dependencyFlag[k][j])
+          dependencyFlag[i][j] = 1;
+    }
+  }
+  for (i = 0; i <= vps_max_layers_minus1; i++)
+  {
+    for (j = 0, r = 0; j <= vps_max_layers_minus1; j++)
+    {
+      if (dependencyFlag[i][j])
+        RefLayerIdx[i][r++] = j;
+    }
+    NumRefLayers[i] = r;
+  }
+
+  NumOutputLayersInOls[0] = 1;
+  OutputLayerIdInOls[0][0] = m_cDecLib.getVPS()->getLayerId(0);
+  for (i = 1; i < TotalNumOlss; i++)
+  {
+    if (each_layer_is_an_ols_flag || ols_mode_idc == 0)
+    {
+      NumOutputLayersInOls[i] = 1;
+      OutputLayerIdInOls[i][0] = m_cDecLib.getVPS()->getLayerId(i);
+    }
+    else if (ols_mode_idc == 1) {
+      NumOutputLayersInOls[i] = i + 1;
+      for (j = 0; j < NumOutputLayersInOls[i]; j++)
+        OutputLayerIdInOls[i][j] = m_cDecLib.getVPS()->getLayerId(j);
+    }
+    else if (ols_mode_idc == 2) {
+      for (j = 0; j <= vps_max_layers_minus1; j++)
+      {
+        layerIncludedInOlsFlag[i][j] = 0;
+      }
+      for (k = 0, j = 0; k <= vps_max_layers_minus1; k++)
+      {
+        if (m_cDecLib.getVPS()->getOlsOutputLayerFlag(i, k))
+        {
+          layerIncludedInOlsFlag[i][k] = 1;
+          OutputLayerIdx[i][j] = k;
+          OutputLayerIdInOls[i][j++] = m_cDecLib.getVPS()->getLayerId(k);
+        }
+      }
+      NumOutputLayersInOls[i] = j;
+      for (j = 0; j < NumOutputLayersInOls[i]; j++)
+      {
+        int idx = OutputLayerIdx[i][j];
+        for (k = 0; k < NumRefLayers[idx]; k++)
+          layerIncludedInOlsFlag[i][RefLayerIdx[idx][k]] = 1;
+      }
+    }
+  }
+
+  m_targetOutputLayerIdSet.clear();
+  for (i = 0; i < NumOutputLayersInOls[m_iTargetOLS]; i++)
+    m_targetOutputLayerIdSet.push_back(OutputLayerIdInOls[m_iTargetOLS][i]);
+
+  NumLayersInOls[0] = 1;
+  LayerIdInOls[0][0] = m_cDecLib.getVPS()->getLayerId(0);
+  for (i = 1; i < TotalNumOlss; i++)
+  {
+    if (each_layer_is_an_ols_flag)
+    {
+      NumLayersInOls[i] = 1;
+      LayerIdInOls[i][0] = m_cDecLib.getVPS()->getLayerId(i);
+    }
+    else if (ols_mode_idc == 0 || ols_mode_idc == 1)
+    {
+      NumLayersInOls[i] = i + 1;
+      for (j = 0; j < NumLayersInOls[i]; j++)
+        LayerIdInOls[i][j] = m_cDecLib.getVPS()->getLayerId(j);
+    }
+    else if (ols_mode_idc == 2)
+    {
+      for (k = 0, j = 0; k <= vps_max_layers_minus1; k++)
+        if (layerIncludedInOlsFlag[i][k])
+          LayerIdInOls[i][j++] = m_cDecLib.getVPS()->getLayerId(k);
+      NumLayersInOls[i] = j;
+    }
+  }
+
+  m_targetDecLayerIdSet.clear();
+  for (i = 0; i < NumLayersInOls[m_iTargetOLS]; i++)
+    m_targetDecLayerIdSet.push_back(LayerIdInOls[m_iTargetOLS][i]);
+
+  delete[] NumOutputLayersInOls;
+  delete[] NumLayersInOls;
+  delete[] NumRefLayers;
+
+  for (i = 0; i < TotalNumOlss; i++)
+  {
+    delete[] OutputLayerIdInOls[i];
+    delete[] OutputLayerIdx[i];
+    delete[] layerIncludedInOlsFlag[i];
+    delete[] LayerIdInOls[i];
+  }
+  delete[] OutputLayerIdInOls;
+  delete[] OutputLayerIdx;
+  delete[] layerIncludedInOlsFlag;
+  delete[] LayerIdInOls;
+
+  for (i = 0; i <= vps_max_layers_minus1; i++)
+  {
+    delete[] dependencyFlag[i];
+    delete[] RefLayerIdx[i];
+  }
+  delete[] dependencyFlag;
+  delete[] RefLayerIdx;
+
+  return true;
+}
+
+/**
+ - lookahead through next NAL units to determine if current NAL unit is the first NAL unit in a new picture
+ */
+bool DecApp::isNewPicture(ifstream *bitstreamFile, class InputByteStream *bytestream)
+{
+  bool ret = false;
+  bool finished = false;
+
+  // cannot be a new picture if there haven't been any slices yet
+  if(m_cDecLib.getFirstSliceInPicture())
+  {
+    return false;
+  }
+
+  // save stream position for backup
+#if RExt__DECODER_DEBUG_STATISTICS
+  CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics());
+  streampos location = bitstreamFile->tellg() - streampos(bytestream->GetNumBufferedBytes());
+#else
+  streampos location = bitstreamFile->tellg();
+#endif
+
+  // look ahead until picture start location is determined
+  while (!finished && !!(*bitstreamFile))
+  {
+    AnnexBStats stats = AnnexBStats();
+    InputNALUnit nalu;
+    byteStreamNALUnit(*bytestream, nalu.getBitstream().getFifo(), stats);
+    if (nalu.getBitstream().getFifo().empty())
+    {
+      msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n");
+    }
+    else
+    {
+      // get next NAL unit type
+      read(nalu);
+      switch( nalu.m_nalUnitType ) {
+
+        // NUT that indicate the start of a new picture
+        case NAL_UNIT_ACCESS_UNIT_DELIMITER:
+        case NAL_UNIT_DPS:
+        case NAL_UNIT_VPS:
+        case NAL_UNIT_SPS:
+        case NAL_UNIT_PPS:
+        case NAL_UNIT_PH:
+          ret = true;
+          finished = true;
+          break;
+        
+        // NUT that are not the start of a new picture
+        case NAL_UNIT_CODED_SLICE_TRAIL:
+        case NAL_UNIT_CODED_SLICE_STSA:
+        case NAL_UNIT_CODED_SLICE_RASL:
+        case NAL_UNIT_CODED_SLICE_RADL:
+        case NAL_UNIT_RESERVED_VCL_4:
+        case NAL_UNIT_RESERVED_VCL_5:
+        case NAL_UNIT_RESERVED_VCL_6:
+        case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
+        case NAL_UNIT_CODED_SLICE_IDR_N_LP:
+        case NAL_UNIT_CODED_SLICE_CRA:
+        case NAL_UNIT_CODED_SLICE_GDR:
+        case NAL_UNIT_RESERVED_IRAP_VCL_11:
+        case NAL_UNIT_RESERVED_IRAP_VCL_12:
+        case NAL_UNIT_EOS:
+        case NAL_UNIT_EOB:
+        case NAL_UNIT_SUFFIX_APS:
+        case NAL_UNIT_SUFFIX_SEI:
+        case NAL_UNIT_FD:
+          ret = false;
+          finished = true;
+          break;
+        
+        // NUT that might indicate the start of a new picture - keep looking
+        case NAL_UNIT_PREFIX_APS:
+        case NAL_UNIT_PREFIX_SEI:
+        case NAL_UNIT_RESERVED_NVCL_26:
+        case NAL_UNIT_RESERVED_NVCL_27:
+        case NAL_UNIT_UNSPECIFIED_28:
+        case NAL_UNIT_UNSPECIFIED_29:
+        case NAL_UNIT_UNSPECIFIED_30:
+        case NAL_UNIT_UNSPECIFIED_31:
+        default:
+          break;
+      }
+    }
+  }
+  
+  // restore previous stream location - minus 3 due to the need for the annexB parser to read three extra bytes
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  bitstreamFile->clear();
+  bitstreamFile->seekg(location);
+  bytestream->reset();
+  CodingStatistics::SetStatistics(*backupStats);
+  delete backupStats;
+#else
+  bitstreamFile->clear();
+  bitstreamFile->seekg(location-streamoff(3));
+  bytestream->reset();
+#endif
+
+  // return TRUE if next NAL unit is the start of a new picture
+  return ret;
+}
+
+/**
+ - lookahead through next NAL units to determine if current NAL unit is the first NAL unit in a new access unit
+ */
+bool DecApp::isNewAccessUnit( bool newPicture, ifstream *bitstreamFile, class InputByteStream *bytestream )
+{
+  bool ret = false;
+  bool finished = false;
+  
+  // can only be the start of an AU if this is the start of a new picture
+  if( newPicture == false )
+  {
+    return false;
+  }
+
+  // save stream position for backup
+#if RExt__DECODER_DEBUG_STATISTICS
+  CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics());
+  streampos location = bitstreamFile->tellg() - streampos(bytestream->GetNumBufferedBytes());
+#else
+  streampos location = bitstreamFile->tellg();
+#endif
+
+  // look ahead until access unit start location is determined
+  while (!finished && !!(*bitstreamFile))
+  {
+    AnnexBStats stats = AnnexBStats();
+    InputNALUnit nalu;
+    byteStreamNALUnit(*bytestream, nalu.getBitstream().getFifo(), stats);
+    if (nalu.getBitstream().getFifo().empty())
+    {
+      msg( ERROR, "Warning: Attempt to decode an empty NAL unit\n");
+    }
+    else
+    {
+      // get next NAL unit type
+      read(nalu);
+      switch( nalu.m_nalUnitType ) {
+        
+        // AUD always indicates the start of a new access unit
+        case NAL_UNIT_ACCESS_UNIT_DELIMITER:
+          ret = true;
+          finished = true;
+          break;
+
+        // slice types - check layer ID and POC
+        case NAL_UNIT_CODED_SLICE_TRAIL:
+        case NAL_UNIT_CODED_SLICE_STSA:
+        case NAL_UNIT_CODED_SLICE_RASL:
+        case NAL_UNIT_CODED_SLICE_RADL:
+        case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
+        case NAL_UNIT_CODED_SLICE_IDR_N_LP:
+        case NAL_UNIT_CODED_SLICE_CRA:
+        case NAL_UNIT_CODED_SLICE_GDR:
+          ret = m_cDecLib.isSliceNaluFirstInAU( newPicture, nalu );          
+          finished = true;
+          break;
+          
+        // NUT that are not the start of a new access unit
+        case NAL_UNIT_EOS:
+        case NAL_UNIT_EOB:
+        case NAL_UNIT_SUFFIX_APS:
+        case NAL_UNIT_SUFFIX_SEI:
+        case NAL_UNIT_FD:
+          ret = false;
+          finished = true;
+          break;
+        
+        // all other NUT - keep looking to find first VCL
+        default:
+          break;
+      }
+    }
+  }
+  
+  // restore previous stream location
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  bitstreamFile->clear();
+  bitstreamFile->seekg(location);
+  bytestream->reset();
+  CodingStatistics::SetStatistics(*backupStats);
+  delete backupStats;
+#else
+  bitstreamFile->clear();
+  bitstreamFile->seekg(location);
+  bytestream->reset();
+#endif
+
+  // return TRUE if next NAL unit is the start of a new picture
+  return ret;
+}
+
 // ====================================================================================================================
 // Protected member functions
 // ====================================================================================================================
@@ -303,18 +657,24 @@ void DecApp::xCreateDecLib()
 #endif
   );
   m_cDecLib.setDecodedPictureHashSEIEnabled(m_decodedPictureHashSEIEnabled);
+
+
   if (!m_outputDecodedSEIMessagesFilename.empty())
   {
     std::ostream &os=m_seiMessageFileStream.is_open() ? m_seiMessageFileStream : std::cout;
     m_cDecLib.setDecodedSEIMessageOutputStream(&os);
   }
+  m_cDecLib.initScalingList();
 }
 
 void DecApp::xDestroyDecLib()
 {
-  if ( !m_reconFileName.empty() )
+  if( !m_reconFileName.empty() )
   {
-    m_cVideoIOYuvReconFile.close();
+    for( auto & recFile : m_cVideoIOYuvReconFile )
+    {
+      recFile.second.close();
+    }
   }
 
   // destroy decoder class
@@ -394,11 +754,11 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
         numPicsNotYetDisplayed = numPicsNotYetDisplayed-2;
         if ( !m_reconFileName.empty() )
         {
-          const Window &conf = pcPicTop->cs->sps->getConformanceWindow();
-          const Window  defDisp = (m_respectDefDispWindow && pcPicTop->cs->sps->getVuiParametersPresentFlag()) ? pcPicTop->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window();
+          const Window &conf = pcPicTop->cs->pps->getConformanceWindow();
           const bool isTff = pcPicTop->topField;
 
           bool display = true;
+#if HEVC_SEI
           if( m_decodedNoDisplaySEIEnabled )
           {
             SEIMessages noDisplay = getSeisByType( pcPic->SEIs, SEI::NO_DISPLAY );
@@ -408,16 +768,17 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
               display = false;
             }
           }
+#endif
 
           if (display)
           {
-            m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(),
+            m_cVideoIOYuvReconFile[pcPicTop->layerId].write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(),
                                           m_outputColourSpaceConvert,
                                           false, // TODO: m_packedYUVMode,
-                                          conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
-                                          conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
-                                          conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
-                                          conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(),
+                                          conf.getWindowLeftOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ),
+                                          conf.getWindowRightOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ),
+                                          conf.getWindowTopOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ),
+                                          conf.getWindowBottomOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ),
                                           NUM_CHROMA_FORMAT, isTff );
           }
         }
@@ -460,24 +821,32 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
 
         if (!m_reconFileName.empty())
         {
-          const Window &conf    = pcPic->cs->sps->getConformanceWindow();
-          const Window  defDisp = (m_respectDefDispWindow && pcPic->cs->sps->getVuiParametersPresentFlag()) ? pcPic->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window();
-
-          m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(),
+          const Window &conf = pcPic->getConformanceWindow();
+          const SPS* sps = pcPic->cs->sps;
+          ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc();
+          if( m_upscaledOutput )
+          {
+            m_cVideoIOYuvReconFile[pcPic->layerId].writeUpscaledPicture( *sps, *pcPic->cs->pps, pcPic->getRecoBuf(), m_outputColourSpaceConvert, m_packedYUVMode, m_upscaledOutput, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+          }
+          else
+          {
+            m_cVideoIOYuvReconFile[pcPic->layerId].write( pcPic->getRecoBuf().get( COMPONENT_Y ).width, pcPic->getRecoBuf().get( COMPONENT_Y ).height, pcPic->getRecoBuf(),
                                         m_outputColourSpaceConvert,
                                         m_packedYUVMode,
-                                        conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
-                                        conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
-                                        conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
-                                        conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(),
+                                        conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                        conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                        conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+                                        conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
                                         NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+            }
         }
 
+#if HEVC_SEI
         if (m_seiMessageFileStream.is_open())
         {
           m_cColourRemapping.outputColourRemapPic (pcPic, m_seiMessageFileStream);
         }
-
+#endif
         // update POC of display order
         m_iPOCLastDisplay = pcPic->getPOC();
 
@@ -496,7 +865,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
 
 /** \param pcListPic list of pictures to be written to file
  */
-void DecApp::xFlushOutput( PicList* pcListPic )
+void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
 {
   if(!pcListPic || pcListPic->empty())
   {
@@ -518,22 +887,26 @@ void DecApp::xFlushOutput( PicList* pcListPic )
       iterPic++;
       pcPicBottom = *(iterPic);
 
+      if( pcPicTop->layerId != layerId && layerId != NOT_VALID )
+      {
+        continue;
+      }
+
       if ( pcPicTop->neededForOutput && pcPicBottom->neededForOutput && !(pcPicTop->getPOC()%2) && (pcPicBottom->getPOC() == pcPicTop->getPOC()+1) )
       {
         // write to file
         if ( !m_reconFileName.empty() )
         {
-          const Window &conf    = pcPicTop->cs->sps->getConformanceWindow();
-          const Window  defDisp = (m_respectDefDispWindow && pcPicTop->cs->sps->getVuiParametersPresentFlag()) ? pcPicTop->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window();
+          const Window &conf = pcPicTop->cs->pps->getConformanceWindow();
           const bool    isTff   = pcPicTop->topField;
 
-          m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(),
+          m_cVideoIOYuvReconFile[pcPicTop->layerId].write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(),
                                         m_outputColourSpaceConvert,
                                         false, // TODO: m_packedYUVMode,
-                                        conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
-                                        conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
-                                        conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
-                                        conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(),
+                                        conf.getWindowLeftOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ),
+                                        conf.getWindowRightOffset() * SPS::getWinUnitX( pcPicTop->cs->sps->getChromaFormatIdc() ),
+                                        conf.getWindowTopOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ),
+                                        conf.getWindowBottomOffset() * SPS::getWinUnitY( pcPicTop->cs->sps->getChromaFormatIdc() ),
                                         NUM_CHROMA_FORMAT, isTff );
         }
 
@@ -573,29 +946,44 @@ void DecApp::xFlushOutput( PicList* pcListPic )
     {
       pcPic = *(iterPic);
 
+      if( pcPic->layerId != layerId && layerId != NOT_VALID )
+      {
+        iterPic++;
+        continue;
+      }
+
       if (pcPic->neededForOutput)
       {
         // write to file
 
         if (!m_reconFileName.empty())
         {
-          const Window &conf    = pcPic->cs->sps->getConformanceWindow();
-          const Window  defDisp = (m_respectDefDispWindow && pcPic->cs->sps->getVuiParametersPresentFlag()) ? pcPic->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window();
-
-          m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(),
+          const Window &conf = pcPic->getConformanceWindow();
+          const SPS* sps = pcPic->cs->sps;
+          ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc();
+          if( m_upscaledOutput )
+          {
+            m_cVideoIOYuvReconFile[pcPic->layerId].writeUpscaledPicture( *sps, *pcPic->cs->pps, pcPic->getRecoBuf(), m_outputColourSpaceConvert, m_packedYUVMode, m_upscaledOutput, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+          }
+          else
+          {
+            m_cVideoIOYuvReconFile[pcPic->layerId].write( pcPic->getRecoBuf().get( COMPONENT_Y ).width, pcPic->getRecoBuf().get( COMPONENT_Y ).height, pcPic->getRecoBuf(),
                                         m_outputColourSpaceConvert,
                                         m_packedYUVMode,
-                                        conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
-                                        conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
-                                        conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
-                                        conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(),
+                                        conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                        conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                        conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+                                        conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
                                         NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+            }
         }
 
+#if HEVC_SEI
         if (m_seiMessageFileStream.is_open())
         {
           m_cColourRemapping.outputColourRemapPic (pcPic, m_seiMessageFileStream);
         }
+#endif
 
         // update POC of display order
         m_iPOCLastDisplay = pcPic->getPOC();
@@ -612,10 +1000,17 @@ void DecApp::xFlushOutput( PicList* pcListPic )
         pcPic->destroy();
         delete pcPic;
         pcPic = NULL;
+        *iterPic = nullptr;
       }
       iterPic++;
     }
   }
+
+  if( layerId != NOT_VALID )
+  {
+    pcListPic->remove_if([](Picture* p) { return p == nullptr; });
+  }
+  else
   pcListPic->clear();
   m_iPOCLastDisplay = -MAX_INT;
 }
@@ -638,4 +1033,23 @@ bool DecApp::isNaluWithinTargetDecLayerIdSet( InputNALUnit* nalu )
   return false;
 }
 
+/** \param nalu Input nalu to check whether its LayerId is within targetOutputLayerIdSet
+ */
+bool DecApp::isNaluWithinTargetOutputLayerIdSet(InputNALUnit* nalu)
+{
+  if (m_targetOutputLayerIdSet.size() == 0) // By default, the set is empty, meaning all LayerIds are allowed
+  {
+    return true;
+  }
+  for (std::vector<int>::iterator it = m_targetOutputLayerIdSet.begin(); it != m_targetOutputLayerIdSet.end(); it++)
+  {
+    if (nalu->m_nuhLayerId == (*it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+
 //! \}
diff --git a/source/App/DecoderApp/DecApp.h b/source/App/DecoderApp/DecApp.h
index a0bf8a6fb4b48fdca0c477bf5ffd74738a64fa08..2d5c0fcdae81b11c04f0ca70ac489f23aac9cc86 100644
--- a/source/App/DecoderApp/DecApp.h
+++ b/source/App/DecoderApp/DecApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,13 +61,14 @@ class DecApp : public DecAppCfg
 private:
   // class interface
   DecLib          m_cDecLib;                     ///< decoder class
-  VideoIOYuv      m_cVideoIOYuvReconFile;        ///< reconstruction YUV class
+  std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvReconFile;        ///< reconstruction YUV class
 
   // for output control
   int             m_iPOCLastDisplay;              ///< last POC in display order
   std::ofstream   m_seiMessageFileStream;         ///< Used for outputing SEI messages.
+#if HEVC_SEI
   ColourRemapping m_cColourRemapping;             ///< colour remapping handler
-
+#endif
 
 public:
   DecApp();
@@ -79,8 +80,12 @@ private:
   void  xCreateDecLib     (); ///< create internal classes
   void  xDestroyDecLib    (); ///< destroy internal classes
   void  xWriteOutput      ( PicList* pcListPic , uint32_t tId); ///< write YUV to file
-  void  xFlushOutput      ( PicList* pcListPic ); ///< flush all remaining decoded pictures to file
+  void  xFlushOutput( PicList* pcListPic, const int layerId = NOT_VALID ); ///< flush all remaining decoded pictures to file
   bool  isNaluWithinTargetDecLayerIdSet ( InputNALUnit* nalu ); ///< check whether given Nalu is within targetDecLayerIdSet
+  bool  isNaluWithinTargetOutputLayerIdSet(InputNALUnit* nalu); ///< check whether given Nalu is within targetOutputLayerIdSet
+  bool  deriveOutputLayerSet(); ///< derive OLS and layer sets
+  bool  isNewPicture(ifstream *bitstreamFile, class InputByteStream *bytestream);  ///< check if next NAL unit will be the first NAL unit from a new picture
+  bool  isNewAccessUnit(bool newPicture, ifstream *bitstreamFile, class InputByteStream *bytestream);  ///< check if next NAL unit will be the first NAL unit from a new access unit
 };
 
 //! \}
diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp
index c058d6fe8c2493163d0dc40596b7654de627442b..b3a3098256b863af1734166d57d1101d9de6687c 100644
--- a/source/App/DecoderApp/DecAppCfg.cpp
+++ b/source/App/DecoderApp/DecAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -87,12 +87,12 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("OutputBitDepthC,d",         m_outputBitDepth[CHANNEL_TYPE_CHROMA], 0,          "bit depth of YUV output chroma component (default: use luma output bit-depth)")
   ("OutputColourSpaceConvert",  outputColourSpaceConvert,              string(""), "Colour space conversion to apply to input 444 video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(false))
   ("MaxTemporalLayer,t",        m_iMaxTemporalLayer,                   -1,         "Maximum Temporal Layer to be decoded. -1 to decode all layers")
+  ("TargetOutputLayerSet,p",    m_iTargetOLS,                          -1,         "Target output layer set.")
   ("SEIDecodedPictureHash,-dph",m_decodedPictureHashSEIEnabled,        1,          "Control handling of decoded picture hash SEI messages\n"
                                                                                    "\t1: check hash in SEI messages if available in the bitstream\n"
                                                                                    "\t0: ignore SEI message")
   ("SEINoDisplay",              m_decodedNoDisplaySEIEnabled,          true,       "Control handling of decoded no display SEI messages")
   ("TarDecLayerIdSetFile,l",    cfg_TargetDecLayerIdSetFile,           string(""), "targetDecLayerIdSet file name. The file should include white space separated LayerId values to be decoded. Omitting the option or a value of -1 in the file decodes all layers.")
-  ("RespectDefDispWindow,w",    m_respectDefDispWindow,                0,          "Only output content inside the default display window\n")
   ("SEIColourRemappingInfoFilename",  m_colourRemapSEIFileName,        string(""), "Colour Remapping YUV output file name. If empty, no remapping is applied (ignore SEI message)\n")
   ("OutputDecodedSEIMessagesFilename",  m_outputDecodedSEIMessagesFilename,    string(""), "When non empty, output decoded SEI messages to the indicated file. If file is '-', then output to stdout\n")
   ("ClipOutputVideoToRec709Range",      m_bClipOutputVideoToRec709Range,  false,   "If true then clip output video to the Rec. 709 Range on saving")
@@ -113,6 +113,7 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
                                                                                    "\t3: enable bit and tool statistic\n")
 #endif
   ("MCTSCheck",                m_mctsCheck,                           false,       "If enabled, the decoder checks for violations of mc_exact_sample_value_match_flag in Temporal MCTS ")
+  ( "UpscaledOutput",          m_upscaledOutput,                          0,       "Upscaled output for RPR" )
   ;
 
   po::setDefaults(opts);
@@ -222,12 +223,12 @@ DecAppCfg::DecAppCfg()
 , m_iSkipFrame(0)
 // m_outputBitDepth array initialised below
 , m_outputColourSpaceConvert(IPCOLOURSPACE_UNCHANGED)
+, m_iTargetOLS(0)
 , m_iMaxTemporalLayer(-1)
 , m_decodedPictureHashSEIEnabled(0)
 , m_decodedNoDisplaySEIEnabled(false)
 , m_colourRemapSEIFileName()
 , m_targetDecLayerIdSet()
-, m_respectDefDispWindow(0)
 , m_outputDecodedSEIMessagesFilename()
 , m_bClipOutputVideoToRec709Range(false)
 , m_packedYUVMode(false)
diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h
index a27a069284084ccdf974feb3f20c2f835f1291dd..c06ebdd999d3e3082229697826437763b6a2becf 100644
--- a/source/App/DecoderApp/DecAppCfg.h
+++ b/source/App/DecoderApp/DecAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,13 +61,13 @@ protected:
   int           m_iSkipFrame;                           ///< counter for frames prior to the random access point to skip
   int           m_outputBitDepth[MAX_NUM_CHANNEL_TYPE]; ///< bit depth used for writing output
   InputColourSpaceConversion m_outputColourSpaceConvert;
-
+  int           m_iTargetOLS;                         ///< target output layer set
+  std::vector<int> m_targetOutputLayerIdSet;          ///< set of LayerIds to be outputted           
   int           m_iMaxTemporalLayer;                  ///< maximum temporal layer to be decoded
   int           m_decodedPictureHashSEIEnabled;       ///< Checksum(3)/CRC(2)/MD5(1)/disable(0) acting on decoded picture hash SEI message
   bool          m_decodedNoDisplaySEIEnabled;         ///< Enable(true)/disable(false) writing only pictures that get displayed based on the no display SEI message
   std::string   m_colourRemapSEIFileName;             ///< output Colour Remapping file name
   std::vector<int> m_targetDecLayerIdSet;             ///< set of LayerIds to be included in the sub-bitstream extraction process.
-  int           m_respectDefDispWindow;               ///< Only output content inside the default display window
   std::string   m_outputDecodedSEIMessagesFilename;   ///< filename to output decoded SEI messages to. If '-', then use stdout. If empty, do not output details.
   bool          m_bClipOutputVideoToRec709Range;      ///< If true, clip the output video to the Rec 709 range on saving.
   bool          m_packedYUVMode;                      ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data
@@ -75,6 +75,8 @@ protected:
   int           m_statMode;                           ///< Config statistic mode (0 - bit stat, 1 - tool stat, 3 - both)
   bool          m_mctsCheck;
 
+  int          m_upscaledOutput;                     ////< Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR.
+
 public:
   DecAppCfg();
   virtual ~DecAppCfg();
diff --git a/source/App/DecoderApp/decmain.cpp b/source/App/DecoderApp/decmain.cpp
index aa3c772b8c2b846c1ba5b8048785ae16efae7926..c8a6e3bd7070cdcbd867a95d3f39c27cca6739e1 100644
--- a/source/App/DecoderApp/decmain.cpp
+++ b/source/App/DecoderApp/decmain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -100,9 +100,9 @@ int main(int argc, char* argv[])
     std::cerr << e.what() << std::endl;
     returnCode = EXIT_FAILURE;
   }
-  catch( ... )
+  catch (const std::bad_alloc &e)
   {
-    std::cerr << "Unspecified error occurred" << std::endl;
+    std::cout << "Memory allocation failed: " << e.what() << std::endl;
     returnCode = EXIT_FAILURE;
   }
 #endif
diff --git a/source/App/EncoderApp/CMakeLists.txt b/source/App/EncoderApp/CMakeLists.txt
index 2299bcf8f1987839181ad36b53cba3e95c844042..dd87e52d1f8244c607c34d42359dd0595e63cd88 100644
--- a/source/App/EncoderApp/CMakeLists.txt
+++ b/source/App/EncoderApp/CMakeLists.txt
@@ -66,6 +66,10 @@ if( EXTENSION_360_VIDEO )
   target_link_libraries( ${EXE_NAME} Lib360 AppEncHelper360 )
 endif()
 
+if( EXTENSION_HDRTOOLS )
+  target_link_libraries( ${EXE_NAME} HDRLib )
+endif()
+
 # lldb custom data formatters
 if( XCODE )
   add_dependencies( ${EXE_NAME} Install${PROJECT_NAME}LldbFiles )
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 9cdd07a6932cce8b0cf2446c00f5b7d20293a51a..77f765178b1b70b789850c6e9c59035a7721233f 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,9 +44,7 @@
 
 #include "EncApp.h"
 #include "EncoderLib/AnnexBwrite.h"
-#if EXTENSION_360_VIDEO
-#include "AppEncHelper360/TExt360AppEncTop.h"
-#endif
+#include "EncoderLib/EncLibCommon.h"
 
 using namespace std;
 
@@ -57,11 +55,18 @@ using namespace std;
 // Constructor / destructor / initialization / destroy
 // ====================================================================================================================
 
-EncApp::EncApp()
+EncApp::EncApp( fstream& bitStream, EncLibCommon* encLibCommon )
+  : m_cEncLib( encLibCommon )
+  , m_bitstream( bitStream )
 {
   m_iFrameRcvd = 0;
   m_totalBytes = 0;
   m_essentialBytes = 0;
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  m_metricTime = std::chrono::milliseconds(0);
+#endif
+  m_numEncoded = 0;
+  m_flush = false;
 }
 
 EncApp::~EncApp()
@@ -70,33 +75,115 @@ EncApp::~EncApp()
 
 void EncApp::xInitLibCfg()
 {
-#if HEVC_VPS
   VPS vps;
 
-  vps.setMaxTLayers                                               ( m_maxTempLayer );
-  if (m_maxTempLayer == 1)
+  vps.setMaxLayers( m_maxLayers );
+
+  if (vps.getMaxLayers() > 1)
   {
-    vps.setTemporalNestingFlag(true);
+    vps.setVPSId(1);  //JVET_P0205 vps_video_parameter_set_id shall be greater than 0 for multi-layer coding
   }
-  vps.setMaxLayers                                                ( 1 );
-  for(int i = 0; i < MAX_TLAYER; i++)
+  else
+  {
+    vps.setVPSId(0);
+    vps.setEachLayerIsAnOlsFlag(1); // If vps_max_layers_minus1 is equal to 0,
+                                    // the value of each_layer_is_an_ols_flag is inferred to be equal to 1.
+                                    // Otherwise, when vps_all_independent_layers_flag is equal to 0,
+                                    // the value of each_layer_is_an_ols_flag is inferred to be equal to 0.
+  }
+  vps.setMaxSubLayers(m_maxSublayers);
+  if (vps.getMaxLayers() > 1 && vps.getMaxSubLayers() > 1)
+  {
+    vps.setAllLayersSameNumSublayersFlag(m_allLayersSameNumSublayersFlag);
+  }
+  if (vps.getMaxLayers() > 1)
+  {
+    vps.setAllIndependentLayersFlag(m_allIndependentLayersFlag);
+    if (!vps.getAllIndependentLayersFlag())
+    {
+      vps.setEachLayerIsAnOlsFlag(0);
+    }
+  }
+
+  for (int i = 0; i < vps.getMaxLayers(); i++)
+  {
+    vps.setGeneralLayerIdx( m_layerId[i], i );
+    vps.setLayerId(i, m_layerId[i]);
+
+    if (i > 0 && !vps.getAllIndependentLayersFlag())
+    {
+      vps.setIndependentLayerFlag( i, m_numRefLayers[i] ? false : true );
+
+      if (!vps.getIndependentLayerFlag(i))
+      {
+        for (int j = 0, k = 0; j < i; j++)
+        {
+          if (m_refLayerIdxStr[i].find(to_string(j)) != std::string::npos)
+          {
+            vps.setDirectRefLayerFlag(i, j, true);
+            vps.setInterLayerRefIdc( i, j, k );
+            vps.setDirectRefLayerIdx(i, k++, j);
+          }
+          else
+          {
+            vps.setDirectRefLayerFlag(i, j, false);
+          }
+        }
+      }
+    }
+  }
+
+
+  if (vps.getMaxLayers() > 1)
   {
-    vps.setNumReorderPics                                         ( m_numReorderPics[i], i );
-    vps.setMaxDecPicBuffering                                     ( m_maxDecPicBuffering[i], i );
+    if (vps.getAllIndependentLayersFlag())
+    {
+      vps.setEachLayerIsAnOlsFlag(m_eachLayerIsAnOlsFlag);
+      if (vps.getEachLayerIsAnOlsFlag() == 0)
+      {
+        vps.setOlsModeIdc(2); // When vps_all_independent_layers_flag is equal to 1 and each_layer_is_an_ols_flag is equal to 0, the value of ols_mode_idc is inferred to be equal to 2
+      }
+    }
+    if (!vps.getEachLayerIsAnOlsFlag())
+    {
+      if (!vps.getAllIndependentLayersFlag())
+      {
+        vps.setOlsModeIdc(m_olsModeIdc);
+      }
+      if (vps.getOlsModeIdc() == 2)
+      {
+        vps.setNumOutputLayerSets(m_numOutputLayerSets);
+        for (int i = 1; i < vps.getNumOutputLayerSets(); i++)
+        {
+          for (int j = 0; j < vps.getMaxLayers(); j++)
+          {
+            if (m_olsOutputLayerStr[i].find(to_string(j)) != std::string::npos)
+            {
+              vps.setOlsOutputLayerFlag(i, j, 1);
+            }
+            else
+            {
+              vps.setOlsOutputLayerFlag(i, j, 0);
+            }
+          }
+        }
+      }
+    }
   }
+  vps.setVPSExtensionFlag                                        ( false );
   m_cEncLib.setVPS(&vps);
-#endif
   m_cEncLib.setProfile                                           ( m_profile);
   m_cEncLib.setLevel                                             ( m_levelTier, m_level);
+  m_cEncLib.setNumSubProfile                                     ( m_numSubProfile );
+  for (int i = 0; i < m_numSubProfile; i++)
+  {
+    m_cEncLib.setSubProfile(i, m_subProfile[i]);
+  }
   m_cEncLib.setProgressiveSourceFlag                             ( m_progressiveSourceFlag);
   m_cEncLib.setInterlacedSourceFlag                              ( m_interlacedSourceFlag);
   m_cEncLib.setNonPackedConstraintFlag                           ( m_nonPackedConstraintFlag);
   m_cEncLib.setFrameOnlyConstraintFlag                           ( m_frameOnlyConstraintFlag);
-  m_cEncLib.setBitDepthConstraintValue                           ( m_bitDepthConstraint );
-  m_cEncLib.setChromaFormatConstraintValue                       ( m_chromaFormatConstraint );
   m_cEncLib.setIntraConstraintFlag                               ( m_intraConstraintFlag );
-  m_cEncLib.setOnePictureOnlyConstraintFlag                      ( m_onePictureOnlyConstraintFlag );
-  m_cEncLib.setLowerBitRateConstraintFlag                        ( m_lowerBitRateConstraintFlag );
 
   m_cEncLib.setPrintMSEBasedSequencePSNR                         ( m_printMSEBasedSequencePSNR);
   m_cEncLib.setPrintFrameMSE                                     ( m_printFrameMSE);
@@ -109,7 +196,11 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setTemporalSubsampleRatio                            ( m_temporalSubsampleRatio );
   m_cEncLib.setSourceWidth                                       ( m_iSourceWidth );
   m_cEncLib.setSourceHeight                                      ( m_iSourceHeight );
-  m_cEncLib.setConformanceWindow                                 ( m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom );
+  m_cEncLib.setConformanceWindow                                 ( m_confWinLeft / SPS::getWinUnitX( m_InputChromaFormatIDC ), m_confWinRight / SPS::getWinUnitX( m_InputChromaFormatIDC ), m_confWinTop / SPS::getWinUnitY( m_InputChromaFormatIDC ), m_confWinBottom / SPS::getWinUnitY( m_InputChromaFormatIDC ) );
+  m_cEncLib.setScalingRatio                                      ( m_scalingRatioHor, m_scalingRatioVer );
+  m_cEncLib.setRPREnabled                                        ( m_rprEnabled );
+  m_cEncLib.setSwitchPocPeriod                                   ( m_switchPocPeriod );
+  m_cEncLib.setUpscaledOutput                                    ( m_upscaledOutput );
   m_cEncLib.setFramesToBeEncoded                                 ( m_framesToBeEncoded );
 
   //====== SPS constraint flags =======
@@ -118,35 +209,51 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setMaxChromaFormatConstraintIdc                      ( m_chromaFormatConstraint );
   m_cEncLib.setFrameConstraintFlag                               ( m_bFrameConstraintFlag );
   m_cEncLib.setNoQtbttDualTreeIntraConstraintFlag                ( !m_dualTree );
+  m_cEncLib.setNoPartitionConstraintsOverrideConstraintFlag      ( !m_SplitConsOverrideEnabledFlag );
   m_cEncLib.setNoSaoConstraintFlag                               ( !m_bUseSAO );
   m_cEncLib.setNoAlfConstraintFlag                               ( !m_alf );
-  m_cEncLib.setNoPcmConstraintFlag                               ( !m_usePCM );
   m_cEncLib.setNoRefWraparoundConstraintFlag                     ( m_bNoRefWraparoundConstraintFlag );
   m_cEncLib.setNoTemporalMvpConstraintFlag                       ( m_TMVPModeId ? false : true );
   m_cEncLib.setNoSbtmvpConstraintFlag                            ( m_SubPuMvpMode ? false : true );
   m_cEncLib.setNoAmvrConstraintFlag                              ( m_bNoAmvrConstraintFlag );
   m_cEncLib.setNoBdofConstraintFlag                              ( !m_BIO );
+  m_cEncLib.setNoDmvrConstraintFlag                              ( !m_DMVR );
   m_cEncLib.setNoCclmConstraintFlag                              ( m_LMChroma ? false : true );
   m_cEncLib.setNoMtsConstraintFlag                               ( (m_MTS || m_MTSImplicit) ? false : true );
+  m_cEncLib.setNoSbtConstraintFlag                               ( !m_SBT );
   m_cEncLib.setNoAffineMotionConstraintFlag                      ( !m_Affine );
-  m_cEncLib.setNoGbiConstraintFlag                               ( !m_GBi );
-  m_cEncLib.setNoMhIntraConstraintFlag                           ( !m_MHIntra );
+  m_cEncLib.setNoBcwConstraintFlag                               ( !m_bcw );
+  m_cEncLib.setNoIbcConstraintFlag                               ( m_IBCMode ? false : true );
+  m_cEncLib.setNoCiipConstraintFlag                           ( !m_ciip );
+  m_cEncLib.setNoFPelMmvdConstraintFlag                          ( !(m_MMVD && m_allowDisFracMMVD) );
   m_cEncLib.setNoTriangleConstraintFlag                          ( !m_Triangle );
   m_cEncLib.setNoLadfConstraintFlag                              ( !m_LadfEnabed );
-  m_cEncLib.setNoCurrPicRefConstraintFlag                        ( !m_IBCMode );
+  m_cEncLib.setNoTransformSkipConstraintFlag                     ( !m_useTransformSkip );
+  m_cEncLib.setNoBDPCMConstraintFlag                             ( m_useBDPCM == 0 );
+  m_cEncLib.setNoJointCbCrConstraintFlag                         (!m_JointCbCrMode);
   m_cEncLib.setNoQpDeltaConstraintFlag                           ( m_bNoQpDeltaConstraintFlag );
   m_cEncLib.setNoDepQuantConstraintFlag                          ( !m_depQuantEnabledFlag);
   m_cEncLib.setNoSignDataHidingConstraintFlag                    ( !m_signDataHidingEnabledFlag );
+  m_cEncLib.setNoTrailConstraintFlag                             ( m_iIntraPeriod == 1 );
+  m_cEncLib.setNoStsaConstraintFlag                              ( m_iIntraPeriod == 1 || !xHasNonZeroTemporalID() );
+  m_cEncLib.setNoRaslConstraintFlag                              ( m_iIntraPeriod == 1 || !xHasLeadingPicture() );
+  m_cEncLib.setNoRadlConstraintFlag                              ( m_iIntraPeriod == 1 || !xHasLeadingPicture() );
+  m_cEncLib.setNoIdrConstraintFlag                               ( false ); // Not yet possible to encode bitstream starting with a GDR picture
+  m_cEncLib.setNoCraConstraintFlag                               ( m_iDecodingRefreshType != 1 );
+  m_cEncLib.setNoGdrConstraintFlag                               ( false ); // Not yet possible to encode GDR using config parameters
+  m_cEncLib.setNoApsConstraintFlag                               ( !m_alf && !m_lmcsEnabled && m_useScalingListId == SCALING_LIST_OFF);
 
   //====== Coding Structure ========
   m_cEncLib.setIntraPeriod                                       ( m_iIntraPeriod );
   m_cEncLib.setDecodingRefreshType                               ( m_iDecodingRefreshType );
   m_cEncLib.setGOPSize                                           ( m_iGOPSize );
-#if JCTVC_Y0038_PARAMS
+  m_cEncLib.setDrapPeriod                                        ( m_drapPeriod );
   m_cEncLib.setReWriteParamSets                                  ( m_rewriteParamSets );
-#endif
+  m_cEncLib.setRPLList0                                          ( m_RPLList0);
+  m_cEncLib.setRPLList1                                          ( m_RPLList1);
+  m_cEncLib.setIDRRefParamListPresent                            ( m_idrRefParamList );
   m_cEncLib.setGopList                                           ( m_GOPList );
-  m_cEncLib.setExtraRPSs                                         ( m_extraRPSs );
+
   for(int i = 0; i < MAX_TLAYER; i++)
   {
     m_cEncLib.setNumReorderPics                                  ( m_numReorderPics[i], i );
@@ -165,6 +272,8 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setIntraQPOffset                                     ( m_intraQPOffset );
   m_cEncLib.setLambdaFromQPEnable                                ( m_lambdaFromQPEnable );
 #endif
+  m_cEncLib.setChromaQpMappingTableParams                         (m_chromaQpMappingTableParams);
+
   m_cEncLib.setPad                                               ( m_aiPad );
 
   m_cEncLib.setAccessUnitDelimiter                               ( m_AccessUnitDelimiter );
@@ -202,6 +311,8 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setChromaCrQpOffset                                  ( m_crQpOffset  );
   m_cEncLib.setChromaCbQpOffsetDualTree                          ( m_cbQpOffsetDualTree );
   m_cEncLib.setChromaCrQpOffsetDualTree                          ( m_crQpOffsetDualTree );
+  m_cEncLib.setChromaCbCrQpOffset                                ( m_cbCrQpOffset         );
+  m_cEncLib.setChromaCbCrQpOffsetDualTree                        ( m_cbCrQpOffsetDualTree );
 #if ER_CHROMA_QP_WCG_PPS
   m_cEncLib.setWCGChromaQpControl                                ( m_wcgChromaQpControl );
 #endif
@@ -240,25 +351,58 @@ void EncApp::xInitLibCfg()
 #endif
   m_cEncLib.setRDpenalty                                         ( m_rdPenalty );
   m_cEncLib.setCTUSize                                           ( m_uiCTUSize );
+  m_cEncLib.setSubPicPresentFlag                                 ( m_subPicPresentFlag );
+  if(m_subPicPresentFlag)
+  {
+    m_cEncLib.setNumSubPics                                      ( m_numSubPics );
+    for (int i = 0; i < m_numSubPics; i++)
+    {
+      m_cEncLib.setSubPicCtuTopLeftX                             ( m_subPicCtuTopLeftX[i], i );
+      m_cEncLib.setSubPicCtuTopLeftY                             ( m_subPicCtuTopLeftY[i], i );
+      m_cEncLib.setSubPicWidth                                   ( m_subPicWidth[i], i );
+      m_cEncLib.setSubPicHeight                                  ( m_subPicHeight[i], i );
+      m_cEncLib.setSubPicTreatedAsPicFlag                        ( m_subPicTreatedAsPicFlag[i], i );
+      m_cEncLib.setLoopFilterAcrossSubpicEnabledFlag             ( m_loopFilterAcrossSubpicEnabledFlag[i], i );
+    }
+  }
+  m_cEncLib.setSubPicIdPresentFlag                               ( m_subPicIdPresentFlag );
+  if (m_subPicIdPresentFlag) 
+  {
+    m_cEncLib.setSubPicIdSignallingPresentFlag                   ( m_subPicIdSignallingPresentFlag );
+    if(m_subPicIdSignallingPresentFlag)
+    {
+      m_cEncLib.setSubPicIdLen                                   ( m_subPicIdLen );
+      for (int i = 0; i < m_numSubPics; i++)
+      {
+        m_cEncLib.setSubPicId                                    ( m_subPicId[i], i );
+      }
+    }
+  }
   m_cEncLib.setUseSplitConsOverride                              ( m_SplitConsOverrideEnabledFlag );
   m_cEncLib.setMinQTSizes                                        ( m_uiMinQT );
-  m_cEncLib.setMaxBTDepth                                        ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma );
+  m_cEncLib.setMaxMTTHierarchyDepth                              ( m_uiMaxMTTHierarchyDepth, m_uiMaxMTTHierarchyDepthI, m_uiMaxMTTHierarchyDepthIChroma );
   m_cEncLib.setDualITree                                         ( m_dualTree );
+  m_cEncLib.setLFNST                                             ( m_LFNST );
+  m_cEncLib.setUseFastLFNST                                      ( m_useFastLFNST );
   m_cEncLib.setSubPuMvpMode                                      ( m_SubPuMvpMode );
   m_cEncLib.setAffine                                            ( m_Affine );
   m_cEncLib.setAffineType                                        ( m_AffineType );
+  m_cEncLib.setPROF                                              ( m_PROF );
   m_cEncLib.setBIO                                               (m_BIO);
   m_cEncLib.setUseLMChroma                                       ( m_LMChroma );
-  m_cEncLib.setCclmCollocatedChromaFlag                          ( m_cclmCollocatedChromaFlag );
+  m_cEncLib.setHorCollocatedChromaFlag                           ( m_horCollocatedChromaFlag );
+  m_cEncLib.setVerCollocatedChromaFlag                           ( m_verCollocatedChromaFlag );
   m_cEncLib.setIntraMTS                                          ( m_MTS & 1 );
-  m_cEncLib.setIntraMTSMaxCand                                   ( m_MTSIntraMaxCand );
   m_cEncLib.setInterMTS                                          ( ( m_MTS >> 1 ) & 1 );
-  m_cEncLib.setInterMTSMaxCand                                   ( m_MTSInterMaxCand );
+  m_cEncLib.setMTSIntraMaxCand                                   ( m_MTSIntraMaxCand );
+  m_cEncLib.setMTSInterMaxCand                                   ( m_MTSInterMaxCand );
   m_cEncLib.setImplicitMTS                                       ( m_MTSImplicit );
   m_cEncLib.setUseSBT                                            ( m_SBT );
+  m_cEncLib.setSBTFast64WidthTh                                  ( m_SBTFast64WidthTh );
   m_cEncLib.setUseCompositeRef                                   ( m_compositeRefEnabled );
-  m_cEncLib.setUseGBi                                            ( m_GBi );
-  m_cEncLib.setUseGBiFast                                        ( m_GBiFast );
+  m_cEncLib.setUseSMVD                                           ( m_SMVD );
+  m_cEncLib.setUseBcw                                            ( m_bcw );
+  m_cEncLib.setUseBcwFast                                        ( m_BcwFast );
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   m_cEncLib.setUseLadf                                           ( m_LadfEnabed );
   if ( m_LadfEnabed )
@@ -271,7 +415,7 @@ void EncApp::xInitLibCfg()
     }
   }
 #endif
-  m_cEncLib.setUseMHIntra                                        ( m_MHIntra );
+  m_cEncLib.setUseCiip                                        ( m_ciip );
   m_cEncLib.setUseTriangle                                       ( m_Triangle );
   m_cEncLib.setUseHashME                                         ( m_HashME );
 
@@ -279,6 +423,12 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setUseAffineAmvr                                     ( m_AffineAmvr );
   m_cEncLib.setUseAffineAmvrEncOpt                               ( m_AffineAmvrEncOpt );
   m_cEncLib.setDMVR                                              ( m_DMVR );
+  m_cEncLib.setMMVD                                              ( m_MMVD );
+  m_cEncLib.setMmvdDisNum                                        (m_MmvdDisNum);
+  m_cEncLib.setRGBFormatFlag(m_rgbFormat);
+  m_cEncLib.setUseColorTrans(m_useColorTrans);
+  m_cEncLib.setPLTMode                                           ( m_PLTMode );
+  m_cEncLib.setJointCbCr                                         ( m_JointCbCrMode );
   m_cEncLib.setIBCMode                                           ( m_IBCMode );
   m_cEncLib.setIBCLocalSearchRangeX                              ( m_IBCLocalSearchRangeX );
   m_cEncLib.setIBCLocalSearchRangeY                              ( m_IBCLocalSearchRangeY );
@@ -292,13 +442,23 @@ void EncApp::xInitLibCfg()
 
   // ADD_NEW_TOOL : (encoder app) add setting of tool enabling flags and associated parameters here
 
+  m_cEncLib.setLoopFilterAcrossVirtualBoundariesDisabledFlag     ( m_loopFilterAcrossVirtualBoundariesDisabledFlag );
+  m_cEncLib.setNumVerVirtualBoundaries                           ( m_numVerVirtualBoundaries );
+  m_cEncLib.setNumHorVirtualBoundaries                           ( m_numHorVirtualBoundaries );
+  for( unsigned i = 0; i < m_numVerVirtualBoundaries; i++ )
+  {
+    m_cEncLib.setVirtualBoundariesPosX                           ( m_virtualBoundariesPosX[ i ], i );
+  }
+  for( unsigned i = 0; i < m_numHorVirtualBoundaries; i++ )
+  {
+    m_cEncLib.setVirtualBoundariesPosY                           ( m_virtualBoundariesPosY[ i ], i );
+  }
+
   m_cEncLib.setMaxCUWidth                                        ( m_uiCTUSize );
   m_cEncLib.setMaxCUHeight                                       ( m_uiCTUSize );
   m_cEncLib.setMaxCodingDepth                                    ( m_uiMaxCodingDepth );
   m_cEncLib.setLog2DiffMaxMinCodingBlockSize                     ( m_uiLog2DiffMaxMinCodingBlockSize );
-#if MAX_TB_SIZE_SIGNALLING
   m_cEncLib.setLog2MaxTbSize                                     ( m_log2MaxTbSize );
-#endif
   m_cEncLib.setUseEncDbOpt(m_encDbOpt);
   m_cEncLib.setUseFastLCTU                                       ( m_useFastLCTU );
   m_cEncLib.setFastInterSearchMode                               ( m_fastInterSearchMode );
@@ -311,12 +471,21 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setUseAMaxBT                                         ( m_useAMaxBT );
   m_cEncLib.setUseE0023FastEnc                                   ( m_e0023FastEnc );
   m_cEncLib.setUseContentBasedFastQtbt                           ( m_contentBasedFastQtbt );
+  m_cEncLib.setUseNonLinearAlfLuma                               ( m_useNonLinearAlfLuma );
+  m_cEncLib.setUseNonLinearAlfChroma                             ( m_useNonLinearAlfChroma );
+  m_cEncLib.setMaxNumAlfAlternativesChroma                       ( m_maxNumAlfAlternativesChroma );
+  m_cEncLib.setUseMRL                                            ( m_MRL );
+  m_cEncLib.setUseMIP                                            ( m_MIP );
+  m_cEncLib.setUseFastMIP                                        ( m_useFastMIP );
+  m_cEncLib.setFastLocalDualTreeMode                             ( m_fastLocalDualTreeMode );
   m_cEncLib.setCrossComponentPredictionEnabledFlag               ( m_crossComponentPredictionEnabledFlag );
   m_cEncLib.setUseReconBasedCrossCPredictionEstimate             ( m_reconBasedCrossCPredictionEstimate );
   m_cEncLib.setLog2SaoOffsetScale                                ( CHANNEL_TYPE_LUMA  , m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA]   );
   m_cEncLib.setLog2SaoOffsetScale                                ( CHANNEL_TYPE_CHROMA, m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] );
   m_cEncLib.setUseTransformSkip                                  ( m_useTransformSkip      );
   m_cEncLib.setUseTransformSkipFast                              ( m_useTransformSkipFast  );
+  m_cEncLib.setUseChromaTS                                       ( m_useChromaTS && m_useTransformSkip);
+  m_cEncLib.setUseBDPCM                                          ( m_useBDPCM );
   m_cEncLib.setTransformSkipRotationEnabledFlag                  ( m_transformSkipRotationEnabledFlag );
   m_cEncLib.setTransformSkipContextEnabledFlag                   ( m_transformSkipContextEnabledFlag   );
   m_cEncLib.setPersistentRiceAdaptationEnabledFlag               ( m_persistentRiceAdaptationEnabledFlag );
@@ -326,12 +495,10 @@ void EncApp::xInitLibCfg()
   {
     m_cEncLib.setRdpcmEnabledFlag                                ( RDPCMSignallingMode(signallingModeIndex), m_rdpcmEnabledFlag[signallingModeIndex]);
   }
-  m_cEncLib.setUseConstrainedIntraPred                           ( m_bUseConstrainedIntraPred );
   m_cEncLib.setFastUDIUseMPMEnabled                              ( m_bFastUDIUseMPMEnabled );
   m_cEncLib.setFastMEForGenBLowDelayEnabled                      ( m_bFastMEForGenBLowDelayEnabled );
   m_cEncLib.setUseBLambdaForNonKeyLowDelayPictures               ( m_bUseBLambdaForNonKeyLowDelayPictures );
-  m_cEncLib.setPCMLog2MinSize                                    ( m_uiPCMLog2MinSize);
-  m_cEncLib.setUsePCM                                            ( m_usePCM );
+  m_cEncLib.setUseISP                                            ( m_ISP );
   m_cEncLib.setUseFastISP                                        ( m_useFastISP );
 
   // set internal bit-depth and constants
@@ -339,35 +506,42 @@ void EncApp::xInitLibCfg()
   {
     m_cEncLib.setBitDepth((ChannelType)channelType, m_internalBitDepth[channelType]);
     m_cEncLib.setInputBitDepth((ChannelType)channelType, m_inputBitDepth[channelType]);
-    m_cEncLib.setPCMBitDepth((ChannelType)channelType, m_bPCMInputBitDepthFlag ? m_MSBExtendedBitDepth[channelType] : m_internalBitDepth[channelType]);
   }
 
-  m_cEncLib.setPCMLog2MaxSize                                    ( m_pcmLog2MaxSize);
   m_cEncLib.setMaxNumMergeCand                                   ( m_maxNumMergeCand );
   m_cEncLib.setMaxNumAffineMergeCand                             ( m_maxNumAffineMergeCand );
+  m_cEncLib.setMaxNumTriangleCand                                ( m_maxNumTriangleCand );
+  m_cEncLib.setMaxNumIBCMergeCand                                ( m_maxNumIBCMergeCand );
 
   //====== Weighted Prediction ========
   m_cEncLib.setUseWP                                             ( m_useWeightedPred     );
   m_cEncLib.setWPBiPred                                          ( m_useWeightedBiPred   );
 
-  //====== Parallel Merge Estimation ========
-  m_cEncLib.setLog2ParallelMergeLevelMinus2                      ( m_log2ParallelMergeLevel - 2 );
-
-  //====== Slice ========
-  m_cEncLib.setSliceMode                                         ( m_sliceMode );
-  m_cEncLib.setSliceArgument                                     ( m_sliceArgument );
-
-#if HEVC_DEPENDENT_SLICES
-  //====== Dependent Slice ========
-  m_cEncLib.setSliceSegmentMode                                  ( m_sliceSegmentMode );
-  m_cEncLib.setSliceSegmentArgument                              ( m_sliceSegmentArgument );
-#endif
-
-  if(m_sliceMode == NO_SLICES )
+  //====== Tiles and Slices ========
+  m_cEncLib.setNoPicPartitionFlag( !m_picPartitionFlag );
+  if( m_picPartitionFlag )
   {
-    m_bLFCrossSliceBoundaryFlag = true;
+    m_cEncLib.setTileColWidths( m_tileColumnWidth );
+    m_cEncLib.setTileRowHeights( m_tileRowHeight );
+    m_cEncLib.setRectSliceFlag( !m_rasterSliceFlag );
+    m_cEncLib.setNumSlicesInPic( m_numSlicesInPic );
+    m_cEncLib.setTileIdxDeltaPresentFlag( m_tileIdxDeltaPresentFlag );
+    m_cEncLib.setRectSlices( m_rectSlices );
+    m_cEncLib.setRasterSliceSizes( m_rasterSliceSize );
+    m_cEncLib.setLFCrossTileBoundaryFlag( !m_disableLFCrossTileBoundaryFlag );
+    m_cEncLib.setLFCrossSliceBoundaryFlag( !m_disableLFCrossSliceBoundaryFlag );
   }
-  m_cEncLib.setLFCrossSliceBoundaryFlag                          ( m_bLFCrossSliceBoundaryFlag );
+  else
+  {
+    m_cEncLib.setRectSliceFlag( true );
+    m_cEncLib.setNumSlicesInPic( 1 );
+    m_cEncLib.setTileIdxDeltaPresentFlag( 0 );
+    m_cEncLib.setLFCrossTileBoundaryFlag( true );
+    m_cEncLib.setLFCrossSliceBoundaryFlag( true );
+  }
+
+  //====== Sub-picture and Slices ========
+  m_cEncLib.setSingleSlicePerSubPicFlagFlag                      ( m_singleSlicePerSubPicFlag );
   m_cEncLib.setUseSAO                                            ( m_bUseSAO );
   m_cEncLib.setTestSAODisableAtPictureLevel                      ( m_bTestSAODisableAtPictureLevel );
   m_cEncLib.setSaoEncodingRate                                   ( m_saoEncodingRate );
@@ -375,121 +549,157 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setMaxNumOffsetsPerPic                               ( m_maxNumOffsetsPerPic);
 
   m_cEncLib.setSaoCtuBoundary                                    ( m_saoCtuBoundary);
-  m_cEncLib.setPCMInputBitDepthFlag                              ( m_bPCMInputBitDepthFlag);
-  m_cEncLib.setPCMFilterDisableFlag                              ( m_bPCMFilterDisableFlag);
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   m_cEncLib.setSaoGreedyMergeEnc                                 ( m_saoGreedyMergeEnc);
-#endif
   m_cEncLib.setIntraSmoothingDisabledFlag                        (!m_enableIntraReferenceSmoothing );
   m_cEncLib.setDecodedPictureHashSEIType                         ( m_decodedPictureHashSEIType );
+#if HEVC_SEI
   m_cEncLib.setRecoveryPointSEIEnabled                           ( m_recoveryPointSEIEnabled );
+#endif
+  m_cEncLib.setDependentRAPIndicationSEIEnabled                  ( m_drapPeriod > 0 );
   m_cEncLib.setBufferingPeriodSEIEnabled                         ( m_bufferingPeriodSEIEnabled );
   m_cEncLib.setPictureTimingSEIEnabled                           ( m_pictureTimingSEIEnabled );
-  m_cEncLib.setToneMappingInfoSEIEnabled                         ( m_toneMappingInfoSEIEnabled );
-  m_cEncLib.setTMISEIToneMapId                                   ( m_toneMapId );
-  m_cEncLib.setTMISEIToneMapCancelFlag                           ( m_toneMapCancelFlag );
-  m_cEncLib.setTMISEIToneMapPersistenceFlag                      ( m_toneMapPersistenceFlag );
-  m_cEncLib.setTMISEICodedDataBitDepth                           ( m_toneMapCodedDataBitDepth );
-  m_cEncLib.setTMISEITargetBitDepth                              ( m_toneMapTargetBitDepth );
-  m_cEncLib.setTMISEIModelID                                     ( m_toneMapModelId );
-  m_cEncLib.setTMISEIMinValue                                    ( m_toneMapMinValue );
-  m_cEncLib.setTMISEIMaxValue                                    ( m_toneMapMaxValue );
-  m_cEncLib.setTMISEISigmoidMidpoint                             ( m_sigmoidMidpoint );
-  m_cEncLib.setTMISEISigmoidWidth                                ( m_sigmoidWidth );
-  m_cEncLib.setTMISEIStartOfCodedInterva                         ( m_startOfCodedInterval );
-  m_cEncLib.setTMISEINumPivots                                   ( m_numPivots );
-  m_cEncLib.setTMISEICodedPivotValue                             ( m_codedPivotValue );
-  m_cEncLib.setTMISEITargetPivotValue                            ( m_targetPivotValue );
-  m_cEncLib.setTMISEICameraIsoSpeedIdc                           ( m_cameraIsoSpeedIdc );
-  m_cEncLib.setTMISEICameraIsoSpeedValue                         ( m_cameraIsoSpeedValue );
-  m_cEncLib.setTMISEIExposureIndexIdc                            ( m_exposureIndexIdc );
-  m_cEncLib.setTMISEIExposureIndexValue                          ( m_exposureIndexValue );
-  m_cEncLib.setTMISEIExposureCompensationValueSignFlag           ( m_exposureCompensationValueSignFlag );
-  m_cEncLib.setTMISEIExposureCompensationValueNumerator          ( m_exposureCompensationValueNumerator );
-  m_cEncLib.setTMISEIExposureCompensationValueDenomIdc           ( m_exposureCompensationValueDenomIdc );
-  m_cEncLib.setTMISEIRefScreenLuminanceWhite                     ( m_refScreenLuminanceWhite );
-  m_cEncLib.setTMISEIExtendedRangeWhiteLevel                     ( m_extendedRangeWhiteLevel );
-  m_cEncLib.setTMISEINominalBlackLevelLumaCodeValue              ( m_nominalBlackLevelLumaCodeValue );
-  m_cEncLib.setTMISEINominalWhiteLevelLumaCodeValue              ( m_nominalWhiteLevelLumaCodeValue );
-  m_cEncLib.setTMISEIExtendedWhiteLevelLumaCodeValue             ( m_extendedWhiteLevelLumaCodeValue );
-  m_cEncLib.setChromaResamplingFilterHintEnabled                 ( m_chromaResamplingFilterSEIenabled );
-  m_cEncLib.setChromaResamplingHorFilterIdc                      ( m_chromaResamplingHorFilterIdc );
-  m_cEncLib.setChromaResamplingVerFilterIdc                      ( m_chromaResamplingVerFilterIdc );
+  m_cEncLib.setFrameFieldInfoSEIEnabled                          ( m_frameFieldInfoSEIEnabled );
+   m_cEncLib.setBpDeltasGOPStructure                             ( m_bpDeltasGOPStructure );
+  m_cEncLib.setDecodingUnitInfoSEIEnabled                        ( m_decodingUnitInfoSEIEnabled );
+  m_cEncLib.setHrdParametersPresentFlag                          ( m_hrdParametersPresentFlag );
   m_cEncLib.setFramePackingArrangementSEIEnabled                 ( m_framePackingSEIEnabled );
   m_cEncLib.setFramePackingArrangementSEIType                    ( m_framePackingSEIType );
   m_cEncLib.setFramePackingArrangementSEIId                      ( m_framePackingSEIId );
   m_cEncLib.setFramePackingArrangementSEIQuincunx                ( m_framePackingSEIQuincunx );
   m_cEncLib.setFramePackingArrangementSEIInterpretation          ( m_framePackingSEIInterpretation );
-  m_cEncLib.setSegmentedRectFramePackingArrangementSEIEnabled    ( m_segmentedRectFramePackingSEIEnabled );
-  m_cEncLib.setSegmentedRectFramePackingArrangementSEICancel     ( m_segmentedRectFramePackingSEICancel );
-  m_cEncLib.setSegmentedRectFramePackingArrangementSEIType       ( m_segmentedRectFramePackingSEIType );
-  m_cEncLib.setSegmentedRectFramePackingArrangementSEIPersistence( m_segmentedRectFramePackingSEIPersistence );
-  m_cEncLib.setDisplayOrientationSEIAngle                        ( m_displayOrientationSEIAngle );
-  m_cEncLib.setTemporalLevel0IndexSEIEnabled                     ( m_temporalLevel0IndexSEIEnabled );
-  m_cEncLib.setGradualDecodingRefreshInfoEnabled                 ( m_gradualDecodingRefreshInfoEnabled );
-  m_cEncLib.setNoDisplaySEITLayer                                ( m_noDisplaySEITLayer );
-  m_cEncLib.setDecodingUnitInfoSEIEnabled                        ( m_decodingUnitInfoSEIEnabled );
-  m_cEncLib.setSOPDescriptionSEIEnabled                          ( m_SOPDescriptionSEIEnabled );
-  m_cEncLib.setScalableNestingSEIEnabled                         ( m_scalableNestingSEIEnabled );
-  m_cEncLib.setTMCTSSEIEnabled                                   ( m_tmctsSEIEnabled );
+  m_cEncLib.setErpSEIEnabled                                     ( m_erpSEIEnabled );           
+  m_cEncLib.setErpSEICancelFlag                                  ( m_erpSEICancelFlag );        
+  m_cEncLib.setErpSEIPersistenceFlag                             ( m_erpSEIPersistenceFlag );   
+  m_cEncLib.setErpSEIGuardBandFlag                               ( m_erpSEIGuardBandFlag );     
+  m_cEncLib.setErpSEIGuardBandType                               ( m_erpSEIGuardBandType );     
+  m_cEncLib.setErpSEILeftGuardBandWidth                          ( m_erpSEILeftGuardBandWidth );
+  m_cEncLib.setErpSEIRightGuardBandWidth                         ( m_erpSEIRightGuardBandWidth );
+  m_cEncLib.setSphereRotationSEIEnabled                          ( m_sphereRotationSEIEnabled );
+  m_cEncLib.setSphereRotationSEICancelFlag                       ( m_sphereRotationSEICancelFlag );
+  m_cEncLib.setSphereRotationSEIPersistenceFlag                  ( m_sphereRotationSEIPersistenceFlag );
+  m_cEncLib.setSphereRotationSEIYaw                              ( m_sphereRotationSEIYaw );
+  m_cEncLib.setSphereRotationSEIPitch                            ( m_sphereRotationSEIPitch );
+  m_cEncLib.setSphereRotationSEIRoll                             ( m_sphereRotationSEIRoll );
+  m_cEncLib.setOmniViewportSEIEnabled                            ( m_omniViewportSEIEnabled );          
+  m_cEncLib.setOmniViewportSEIId                                 ( m_omniViewportSEIId );               
+  m_cEncLib.setOmniViewportSEICancelFlag                         ( m_omniViewportSEICancelFlag );       
+  m_cEncLib.setOmniViewportSEIPersistenceFlag                    ( m_omniViewportSEIPersistenceFlag );  
+  m_cEncLib.setOmniViewportSEICntMinus1                          ( m_omniViewportSEICntMinus1 );        
+  m_cEncLib.setOmniViewportSEIAzimuthCentre                      ( m_omniViewportSEIAzimuthCentre );    
+  m_cEncLib.setOmniViewportSEIElevationCentre                    ( m_omniViewportSEIElevationCentre );  
+  m_cEncLib.setOmniViewportSEITiltCentre                         ( m_omniViewportSEITiltCentre );       
+  m_cEncLib.setOmniViewportSEIHorRange                           ( m_omniViewportSEIHorRange );         
+  m_cEncLib.setOmniViewportSEIVerRange                           ( m_omniViewportSEIVerRange );         
+  m_cEncLib.setRwpSEIEnabled                                     (m_rwpSEIEnabled);
+  m_cEncLib.setRwpSEIRwpCancelFlag                               (m_rwpSEIRwpCancelFlag);
+  m_cEncLib.setRwpSEIRwpPersistenceFlag                          (m_rwpSEIRwpPersistenceFlag);
+  m_cEncLib.setRwpSEIConstituentPictureMatchingFlag              (m_rwpSEIConstituentPictureMatchingFlag);
+  m_cEncLib.setRwpSEINumPackedRegions                            (m_rwpSEINumPackedRegions);
+  m_cEncLib.setRwpSEIProjPictureWidth                            (m_rwpSEIProjPictureWidth);
+  m_cEncLib.setRwpSEIProjPictureHeight                           (m_rwpSEIProjPictureHeight);
+  m_cEncLib.setRwpSEIPackedPictureWidth                          (m_rwpSEIPackedPictureWidth);
+  m_cEncLib.setRwpSEIPackedPictureHeight                         (m_rwpSEIPackedPictureHeight);
+  m_cEncLib.setRwpSEIRwpTransformType                            (m_rwpSEIRwpTransformType);
+  m_cEncLib.setRwpSEIRwpGuardBandFlag                            (m_rwpSEIRwpGuardBandFlag);
+  m_cEncLib.setRwpSEIProjRegionWidth                             (m_rwpSEIProjRegionWidth);
+  m_cEncLib.setRwpSEIProjRegionHeight                            (m_rwpSEIProjRegionHeight);
+  m_cEncLib.setRwpSEIRwpSEIProjRegionTop                         (m_rwpSEIRwpSEIProjRegionTop);
+  m_cEncLib.setRwpSEIProjRegionLeft                              (m_rwpSEIProjRegionLeft);
+  m_cEncLib.setRwpSEIPackedRegionWidth                           (m_rwpSEIPackedRegionWidth);
+  m_cEncLib.setRwpSEIPackedRegionHeight                          (m_rwpSEIPackedRegionHeight);
+  m_cEncLib.setRwpSEIPackedRegionTop                             (m_rwpSEIPackedRegionTop);
+  m_cEncLib.setRwpSEIPackedRegionLeft                            (m_rwpSEIPackedRegionLeft);
+  m_cEncLib.setRwpSEIRwpLeftGuardBandWidth                       (m_rwpSEIRwpLeftGuardBandWidth);
+  m_cEncLib.setRwpSEIRwpRightGuardBandWidth                      (m_rwpSEIRwpRightGuardBandWidth);
+  m_cEncLib.setRwpSEIRwpTopGuardBandHeight                       (m_rwpSEIRwpTopGuardBandHeight);
+  m_cEncLib.setRwpSEIRwpBottomGuardBandHeight                    (m_rwpSEIRwpBottomGuardBandHeight);
+  m_cEncLib.setRwpSEIRwpGuardBandNotUsedForPredFlag              (m_rwpSEIRwpGuardBandNotUsedForPredFlag);
+  m_cEncLib.setRwpSEIRwpGuardBandType                            (m_rwpSEIRwpGuardBandType);
+  m_cEncLib.setGcmpSEIEnabled                                    ( m_gcmpSEIEnabled );
+  m_cEncLib.setGcmpSEICancelFlag                                 ( m_gcmpSEICancelFlag );
+  m_cEncLib.setGcmpSEIPersistenceFlag                            ( m_gcmpSEIPersistenceFlag );                                                              
+  m_cEncLib.setGcmpSEIPackingType                                ( (uint8_t)m_gcmpSEIPackingType );
+  m_cEncLib.setGcmpSEIMappingFunctionType                        ( (uint8_t)m_gcmpSEIMappingFunctionType );                                                              
+  m_cEncLib.setGcmpSEIFaceIndex                                  ( m_gcmpSEIFaceIndex );
+  m_cEncLib.setGcmpSEIFaceRotation                               ( m_gcmpSEIFaceRotation );
+  m_cEncLib.setGcmpSEIFunctionCoeffU                             ( m_gcmpSEIFunctionCoeffU );
+  m_cEncLib.setGcmpSEIFunctionUAffectedByVFlag                   ( m_gcmpSEIFunctionUAffectedByVFlag );
+  m_cEncLib.setGcmpSEIFunctionCoeffV                             ( m_gcmpSEIFunctionCoeffV );
+  m_cEncLib.setGcmpSEIFunctionVAffectedByUFlag                   ( m_gcmpSEIFunctionVAffectedByUFlag );                                                              
+  m_cEncLib.setGcmpSEIGuardBandFlag                              ( m_gcmpSEIGuardBandFlag );
+  m_cEncLib.setGcmpSEIGuardBandBoundaryType                      ( m_gcmpSEIGuardBandBoundaryType );
+  m_cEncLib.setGcmpSEIGuardBandSamplesMinus1                     ( (uint8_t)m_gcmpSEIGuardBandSamplesMinus1 );
+  m_cEncLib.setSubpicureLevelInfoSEIEnabled                      (m_subpicureLevelInfoSEIEnabled);
+  m_cEncLib.setSampleAspectRatioInfoSEIEnabled                   (m_sampleAspectRatioInfoSEIEnabled);
+  m_cEncLib.setSariCancelFlag                                    (m_sariCancelFlag);
+  m_cEncLib.setSariPersistenceFlag                               (m_sariPersistenceFlag);
+  m_cEncLib.setSariAspectRatioIdc                                (m_sariAspectRatioIdc);
+  m_cEncLib.setSariSarWidth                                      (m_sariSarWidth);
+  m_cEncLib.setSariSarHeight                                     (m_sariSarHeight);
   m_cEncLib.setMCTSEncConstraint                                 ( m_MCTSEncConstraint);
-  m_cEncLib.setTimeCodeSEIEnabled                                ( m_timeCodeSEIEnabled );
-  m_cEncLib.setNumberOfTimeSets                                  ( m_timeCodeSEINumTs );
-  for(int i = 0; i < m_timeCodeSEINumTs; i++)
-  {
-    m_cEncLib.setTimeSet(m_timeSetArray[i], i);
-  }
-  m_cEncLib.setKneeSEIEnabled                                    ( m_kneeSEIEnabled );
-  m_cEncLib.setKneeSEIId                                         ( m_kneeSEIId );
-  m_cEncLib.setKneeSEICancelFlag                                 ( m_kneeSEICancelFlag );
-  m_cEncLib.setKneeSEIPersistenceFlag                            ( m_kneeSEIPersistenceFlag );
-  m_cEncLib.setKneeSEIInputDrange                                ( m_kneeSEIInputDrange );
-  m_cEncLib.setKneeSEIInputDispLuminance                         ( m_kneeSEIInputDispLuminance );
-  m_cEncLib.setKneeSEIOutputDrange                               ( m_kneeSEIOutputDrange );
-  m_cEncLib.setKneeSEIOutputDispLuminance                        ( m_kneeSEIOutputDispLuminance );
-  m_cEncLib.setKneeSEINumKneePointsMinus1                        ( m_kneeSEINumKneePointsMinus1 );
-  m_cEncLib.setKneeSEIInputKneePoint                             ( m_kneeSEIInputKneePoint );
-  m_cEncLib.setKneeSEIOutputKneePoint                            ( m_kneeSEIOutputKneePoint );
-  m_cEncLib.setColourRemapInfoSEIFileRoot                        ( m_colourRemapSEIFileRoot );
   m_cEncLib.setMasteringDisplaySEI                               ( m_masteringDisplay );
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   m_cEncLib.setSEIAlternativeTransferCharacteristicsSEIEnable    ( m_preferredTransferCharacteristics>=0     );
   m_cEncLib.setSEIPreferredTransferCharacteristics               ( uint8_t(m_preferredTransferCharacteristics) );
 #endif
-  m_cEncLib.setSEIGreenMetadataInfoSEIEnable                     ( m_greenMetadataType > 0 );
-  m_cEncLib.setSEIGreenMetadataType                              ( uint8_t(m_greenMetadataType) );
-  m_cEncLib.setSEIXSDMetricType                                  ( uint8_t(m_xsdMetricType) );
-
-#if HEVC_TILES_WPP
-  m_cEncLib.setTileUniformSpacingFlag                            ( m_tileUniformSpacingFlag );
-  m_cEncLib.setNumColumnsMinus1                                  ( m_numTileColumnsMinus1 );
-  m_cEncLib.setNumRowsMinus1                                     ( m_numTileRowsMinus1 );
-  if(!m_tileUniformSpacingFlag)
-  {
-    m_cEncLib.setColumnWidth                                     ( m_tileColumnWidth );
-    m_cEncLib.setRowHeight                                       ( m_tileRowHeight );
+  // film grain charcteristics
+  m_cEncLib.setFilmGrainCharactersticsSEIEnabled                 (m_fgcSEIEnabled);
+  m_cEncLib.setFilmGrainCharactersticsSEICancelFlag              (m_fgcSEICancelFlag);
+  m_cEncLib.setFilmGrainCharactersticsSEIPersistenceFlag         (m_fgcSEIPersistenceFlag);
+  m_cEncLib.setFilmGrainCharactersticsSEIModelID                 ((uint8_t)m_fgcSEIModelID);
+  m_cEncLib.setFilmGrainCharactersticsSEISepColourDescPresent    (m_fgcSEISepColourDescPresentFlag);
+  m_cEncLib.setFilmGrainCharactersticsSEIBlendingModeID          ((uint8_t)m_fgcSEIBlendingModeID);
+  m_cEncLib.setFilmGrainCharactersticsSEILog2ScaleFactor         ((uint8_t)m_fgcSEILog2ScaleFactor);
+  for (int i = 0; i < MAX_NUM_COMPONENT; i++) {
+    m_cEncLib.setFGCSEICompModelPresent                          (m_fgcSEICompModelPresent[i], i);
   }
-#endif
-  m_cEncLib.xCheckGSParameters();
-#if HEVC_TILES_WPP
-  int uiTilesCount = (m_numTileRowsMinus1+1) * (m_numTileColumnsMinus1+1);
-  if(uiTilesCount == 1)
-  {
-    m_bLFCrossTileBoundaryFlag = true;
+  // content light level
+  m_cEncLib.setCLLSEIEnabled                                     (m_cllSEIEnabled);
+  m_cEncLib.setCLLSEIMaxContentLightLevel                        ((uint16_t)m_cllSEIMaxContentLevel);
+  m_cEncLib.setCLLSEIMaxPicAvgLightLevel                         ((uint16_t)m_cllSEIMaxPicAvgLevel);
+  // ambient viewing enviornment
+  m_cEncLib.setAmbientViewingEnvironmentSEIEnabled               (m_aveSEIEnabled);
+  m_cEncLib.setAmbientViewingEnvironmentSEIIlluminance           (m_aveSEIAmbientIlluminance);
+  m_cEncLib.setAmbientViewingEnvironmentSEIAmbientLightX         ((uint16_t)m_aveSEIAmbientLightX);
+  m_cEncLib.setAmbientViewingEnvironmentSEIAmbientLightY         ((uint16_t)m_aveSEIAmbientLightY);
+  // content colour volume SEI
+  m_cEncLib.setCcvSEIEnabled                                     (m_ccvSEIEnabled);
+  m_cEncLib.setCcvSEICancelFlag                                  (m_ccvSEICancelFlag);
+  m_cEncLib.setCcvSEIPersistenceFlag                             (m_ccvSEIPersistenceFlag);
+  m_cEncLib.setCcvSEIEnabled                                     (m_ccvSEIEnabled);
+  m_cEncLib.setCcvSEICancelFlag                                  (m_ccvSEICancelFlag);
+  m_cEncLib.setCcvSEIPersistenceFlag                             (m_ccvSEIPersistenceFlag);
+  m_cEncLib.setCcvSEIPrimariesPresentFlag                        (m_ccvSEIPrimariesPresentFlag);
+  m_cEncLib.setCcvSEIMinLuminanceValuePresentFlag                (m_ccvSEIMinLuminanceValuePresentFlag);
+  m_cEncLib.setCcvSEIMaxLuminanceValuePresentFlag                (m_ccvSEIMaxLuminanceValuePresentFlag);
+  m_cEncLib.setCcvSEIAvgLuminanceValuePresentFlag                (m_ccvSEIAvgLuminanceValuePresentFlag);
+  for(int i = 0; i < MAX_NUM_COMPONENT; i++) {
+    m_cEncLib.setCcvSEIPrimariesX                                (m_ccvSEIPrimariesX[i], i);
+    m_cEncLib.setCcvSEIPrimariesY                                (m_ccvSEIPrimariesY[i], i);
   }
-  m_cEncLib.setLFCrossTileBoundaryFlag                           ( m_bLFCrossTileBoundaryFlag );
+  m_cEncLib.setCcvSEIMinLuminanceValue                           (m_ccvSEIMinLuminanceValue);
+  m_cEncLib.setCcvSEIMaxLuminanceValue                           (m_ccvSEIMaxLuminanceValue);
+  m_cEncLib.setCcvSEIAvgLuminanceValue                           (m_ccvSEIAvgLuminanceValue);
   m_cEncLib.setEntropyCodingSyncEnabledFlag                      ( m_entropyCodingSyncEnabledFlag );
-#endif
   m_cEncLib.setTMVPModeId                                        ( m_TMVPModeId );
-#if HEVC_USE_SCALING_LISTS
+  m_cEncLib.setSliceLevelRpl                                     ( m_sliceLevelRpl  );
+  m_cEncLib.setSliceLevelDblk                                    ( m_sliceLevelDblk );
+  m_cEncLib.setSliceLevelSao                                     ( m_sliceLevelSao  );
+  m_cEncLib.setSliceLevelAlf                                     ( m_sliceLevelAlf  );
+  m_cEncLib.setConstantSliceHeaderParamsEnabledFlag              ( m_constantSliceHeaderParamsEnabledFlag );
+  m_cEncLib.setPPSDepQuantEnabledIdc                             ( m_PPSDepQuantEnabledIdc );
+  m_cEncLib.setPPSRefPicListSPSIdc0                              ( m_PPSRefPicListSPSIdc0 );
+  m_cEncLib.setPPSRefPicListSPSIdc1                              ( m_PPSRefPicListSPSIdc1 );
+  m_cEncLib.setPPSMvdL1ZeroIdc                                   ( m_PPSMvdL1ZeroIdc );
+  m_cEncLib.setPPSCollocatedFromL0Idc                            ( m_PPSCollocatedFromL0Idc );
+  m_cEncLib.setPPSSixMinusMaxNumMergeCandPlus1                   ( m_PPSSixMinusMaxNumMergeCandPlus1 );
+  m_cEncLib.setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1    ( m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 );
   m_cEncLib.setUseScalingListId                                  ( m_useScalingListId  );
   m_cEncLib.setScalingListFileName                               ( m_scalingListFileName );
-#endif
+  m_cEncLib.setDisableScalingMatrixForLfnstBlks                  ( m_disableScalingMatrixForLfnstBlks);
   m_cEncLib.setDepQuantEnabledFlag                               ( m_depQuantEnabledFlag);
-#if HEVC_USE_SIGN_HIDING
   m_cEncLib.setSignDataHidingEnabledFlag                         ( m_signDataHidingEnabledFlag);
-#endif
   m_cEncLib.setUseRateCtrl                                       ( m_RCEnableRateControl );
   m_cEncLib.setTargetBitrate                                     ( m_RCTargetBitrate );
   m_cEncLib.setKeepHierBit                                       ( m_RCKeepHierarchicalBit );
@@ -502,24 +712,17 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setCpbSize                                           ( m_RCCpbSize );
   m_cEncLib.setInitialCpbFullness                                ( m_RCInitialCpbFullness );
 #endif
-  m_cEncLib.setTransquantBypassEnabledFlag                       ( m_TransquantBypassEnabledFlag );
-  m_cEncLib.setCUTransquantBypassFlagForceValue                  ( m_CUTransquantBypassFlagForce );
   m_cEncLib.setCostMode                                          ( m_costMode );
   m_cEncLib.setUseRecalculateQPAccordingToLambda                 ( m_recalculateQPAccordingToLambda );
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  m_cEncLib.setUseStrongIntraSmoothing                           ( m_useStrongIntraSmoothing );
-#endif
+  m_cEncLib.setDecodingParameterSetEnabled                       ( m_decodingParameterSetEnabled );
+#if HEVC_SEI
   m_cEncLib.setActiveParameterSetsSEIEnabled                     ( m_activeParameterSetsSEIEnabled );
+#endif
   m_cEncLib.setVuiParametersPresentFlag                          ( m_vuiParametersPresentFlag );
   m_cEncLib.setAspectRatioInfoPresentFlag                        ( m_aspectRatioInfoPresentFlag);
   m_cEncLib.setAspectRatioIdc                                    ( m_aspectRatioIdc );
   m_cEncLib.setSarWidth                                          ( m_sarWidth );
   m_cEncLib.setSarHeight                                         ( m_sarHeight );
-  m_cEncLib.setOverscanInfoPresentFlag                           ( m_overscanInfoPresentFlag );
-  m_cEncLib.setOverscanAppropriateFlag                           ( m_overscanAppropriateFlag );
-  m_cEncLib.setVideoSignalTypePresentFlag                        ( m_videoSignalTypePresentFlag );
-  m_cEncLib.setVideoFormat                                       ( m_videoFormat );
-  m_cEncLib.setVideoFullRangeFlag                                ( m_videoFullRangeFlag );
   m_cEncLib.setColourDescriptionPresentFlag                      ( m_colourDescriptionPresentFlag );
   m_cEncLib.setColourPrimaries                                   ( m_colourPrimaries );
   m_cEncLib.setTransferCharacteristics                           ( m_transferCharacteristics );
@@ -527,21 +730,10 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setChromaLocInfoPresentFlag                          ( m_chromaLocInfoPresentFlag );
   m_cEncLib.setChromaSampleLocTypeTopField                       ( m_chromaSampleLocTypeTopField );
   m_cEncLib.setChromaSampleLocTypeBottomField                    ( m_chromaSampleLocTypeBottomField );
-  m_cEncLib.setNeutralChromaIndicationFlag                       ( m_neutralChromaIndicationFlag );
-  m_cEncLib.setDefaultDisplayWindow                              ( m_defDispWinLeftOffset, m_defDispWinRightOffset, m_defDispWinTopOffset, m_defDispWinBottomOffset );
-  m_cEncLib.setFrameFieldInfoPresentFlag                         ( m_frameFieldInfoPresentFlag );
-  m_cEncLib.setPocProportionalToTimingFlag                       ( m_pocProportionalToTimingFlag );
-  m_cEncLib.setNumTicksPocDiffOneMinus1                          ( m_numTicksPocDiffOneMinus1    );
-  m_cEncLib.setBitstreamRestrictionFlag                          ( m_bitstreamRestrictionFlag );
-#if HEVC_TILES_WPP
-  m_cEncLib.setTilesFixedStructureFlag                           ( m_tilesFixedStructureFlag );
-#endif
-  m_cEncLib.setMotionVectorsOverPicBoundariesFlag                ( m_motionVectorsOverPicBoundariesFlag );
-  m_cEncLib.setMinSpatialSegmentationIdc                         ( m_minSpatialSegmentationIdc );
-  m_cEncLib.setMaxBytesPerPicDenom                               ( m_maxBytesPerPicDenom );
-  m_cEncLib.setMaxBitsPerMinCuDenom                              ( m_maxBitsPerMinCuDenom );
-  m_cEncLib.setLog2MaxMvLengthHorizontal                         ( m_log2MaxMvLengthHorizontal );
-  m_cEncLib.setLog2MaxMvLengthVertical                           ( m_log2MaxMvLengthVertical );
+  m_cEncLib.setChromaSampleLocType                               ( m_chromaSampleLocType );
+  m_cEncLib.setOverscanInfoPresentFlag                           ( m_overscanInfoPresentFlag );
+  m_cEncLib.setOverscanAppropriateFlag                           ( m_overscanAppropriateFlag );
+  m_cEncLib.setVideoFullRangeFlag                                ( m_videoFullRangeFlag );
   m_cEncLib.setEfficientFieldIRAPEnabled                         ( m_bEfficientFieldIRAPEnabled );
   m_cEncLib.setHarmonizeGopFirstFieldCoupleEnabled               ( m_bHarmonizeGopFirstFieldCoupleEnabled );
   m_cEncLib.setSummaryOutFilename                                ( m_summaryOutFilename );
@@ -561,29 +753,47 @@ void EncApp::xInitLibCfg()
 #if ENABLE_SPLIT_PARALLELISM
   m_cEncLib.setNumSplitThreads                                   ( m_numSplitThreads );
   m_cEncLib.setForceSingleSplitThread                            ( m_forceSplitSequential );
-#endif
-#if ENABLE_WPP_PARALLELISM
-  m_cEncLib.setNumWppThreads                                     ( m_numWppThreads );
-  m_cEncLib.setNumWppExtraLines                                  ( m_numWppExtraLines );
-  m_cEncLib.setEnsureWppBitEqual                                 ( m_ensureWppBitEqual );
-
 #endif
   m_cEncLib.setUseALF                                            ( m_alf );
-  m_cEncLib.setReshaper                                          ( m_lumaReshapeEnable );
+  m_cEncLib.setLmcs                                              ( m_lmcsEnabled );
   m_cEncLib.setReshapeSignalType                                 ( m_reshapeSignalType );
   m_cEncLib.setReshapeIntraCMD                                   ( m_intraCMD );
   m_cEncLib.setReshapeCW                                         ( m_reshapeCW );
+  m_cEncLib.setReshapeCSoffset                                   ( m_CSoffset );
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++)
+  {
+    m_cEncLib.setWhitePointDeltaE                                (i, m_whitePointDeltaE[i] );
+  }
+  m_cEncLib.setMaxSampleValue                                    (m_maxSampleValue);
+  m_cEncLib.setSampleRange                                       (m_sampleRange);
+  m_cEncLib.setColorPrimaries                                    (m_colorPrimaries);
+  m_cEncLib.setEnableTFunctionLUT                                (m_enableTFunctionLUT);
+  for (int i=0; i<2; i++)
+  {
+    m_cEncLib.setChromaLocation                                    (i, m_chromaLocation);
+    m_cEncLib.setChromaUPFilter                                    (m_chromaUPFilter);
+  }
+  m_cEncLib.setCropOffsetLeft                                    (m_cropOffsetLeft);
+  m_cEncLib.setCropOffsetTop                                     (m_cropOffsetTop);
+  m_cEncLib.setCropOffsetRight                                   (m_cropOffsetRight);
+  m_cEncLib.setCropOffsetBottom                                  (m_cropOffsetBottom);
+  m_cEncLib.setCalculateHdrMetrics                               (m_calculateHdrMetrics);
+#endif
+  m_cEncLib.setGopBasedTemporalFilterEnabled(m_gopBasedTemporalFilterEnabled);
+  m_cEncLib.setNumRefLayers                                       ( m_numRefLayers );
 }
 
-void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList
-                        )
+void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId )
 {
   // Video I/O
   m_cVideoIOYuvInputFile.open( m_inputFileName,     false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth );  // read  mode
 #if EXTENSION_360_VIDEO
   m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC);
 #else
-  m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_iSourceWidth - m_aiPad[0], m_iSourceHeight - m_aiPad[1], m_InputChromaFormatIDC);
+  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
+  m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_iSourceWidth - m_aiPad[0], sourceHeight - m_aiPad[1], m_InputChromaFormatIDC);
 #endif
   if (!m_reconFileName.empty())
   {
@@ -598,11 +808,24 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList
       EXIT ("Invalid chroma output bit-depth or image width for packed YUV output, aborting\n");
     }
 
-    m_cVideoIOYuvReconFile.open(m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth);  // write mode
+    std::string reconFileName = m_reconFileName;
+    if( m_reconFileName.compare( "/dev/null" ) &&  (m_maxLayers > 1) )
+    {
+      size_t pos = reconFileName.find_last_of('.');
+      if (pos != string::npos)
+      {
+        reconFileName.insert( pos, std::to_string( layerId ) );
+      }
+      else
+      {
+        reconFileName.append( std::to_string( layerId ) );
+      }
+    }
+    m_cVideoIOYuvReconFile.open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth );  // write mode
   }
 
   // create the encoder
-  m_cEncLib.create();
+  m_cEncLib.create( layerId );
 
   // create the output buffer
   for( int i = 0; i < (m_iGOPSize + 1 + (m_isField ? 1 : 0)); i++ )
@@ -630,128 +853,172 @@ void EncApp::xInitLib(bool isFieldCoding)
 // Public member functions
 // ====================================================================================================================
 
-/**
- - create internal class
- - initialize internal variable
- - until the end of input YUV file, call encoding function in EncLib class
- - delete allocated buffers
- - destroy internal class
- .
- */
-void EncApp::encode()
+void EncApp::createLib( const int layerId )
 {
-  m_bitstream.open(m_bitstreamFileName.c_str(), fstream::binary | fstream::out);
-  if (!m_bitstream)
+  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
+  UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_iSourceWidth, sourceHeight ) );
+
+  m_orgPic = new PelStorage;
+  m_trueOrgPic = new PelStorage;
+  m_orgPic->create( unitArea );
+  m_trueOrgPic->create( unitArea );
+
+  if( !m_bitstream.is_open() )
   {
-    EXIT( "Failed to open bitstream file " << m_bitstreamFileName.c_str() << " for writing\n");
+    m_bitstream.open( m_bitstreamFileName.c_str(), fstream::binary | fstream::out );
+    if( !m_bitstream )
+    {
+      EXIT( "Failed to open bitstream file " << m_bitstreamFileName.c_str() << " for writing\n" );
+    }
   }
 
-  std::list<PelUnitBuf*> recBufList;
   // initialize internal class & member variables
   xInitLibCfg();
-  xCreateLib( recBufList
-             );
-  xInitLib(m_isField);
+  xCreateLib( m_recBufList, layerId );
+  xInitLib( m_isField );
 
   printChromaFormat();
 
-  // main encoder loop
-  int   iNumEncoded = 0;
-  bool  bEos = false;
+#if EXTENSION_360_VIDEO
+  m_ext360 = new TExt360AppEncTop( *this, m_cEncLib.getGOPEncoder()->getExt360Data(), *( m_cEncLib.getGOPEncoder() ), *m_orgPic );
+#endif
 
-  const InputColourSpaceConversion ipCSC  =  m_inputColourSpaceConvert;
-  const InputColourSpaceConversion snrCSC = (!m_snrInternalColourSpace) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED;
+  if( m_gopBasedTemporalFilterEnabled )
+  {
+    m_temporalFilter.init( m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_iSourceWidth, m_iSourceHeight,
+      m_aiPad, m_bClipInputVideoToRec709Range, m_inputFileName, m_chromaFormatIDC,
+      m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths,
+      m_gopBasedTemporalFilterFutureReference );
+  }
+}
 
-  PelStorage trueOrgPic;
-  PelStorage orgPic;
-  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
-  UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_iSourceWidth, sourceHeight ) );
+void EncApp::destroyLib()
+{
+  printf( "\nLayerId %2d", m_cEncLib.getLayerId() );
+
+  m_cEncLib.printSummary( m_isField );
 
-  orgPic.create( unitArea );
-  trueOrgPic.create( unitArea );
+  // delete used buffers in encoder class
+  m_cEncLib.deletePicBuffer();
+
+  for( auto &p : m_recBufList )
+  {
+    delete p;
+  }
+  m_recBufList.clear();
+
+  xDestroyLib();
+
+  if( m_bitstream.is_open() )
+  {
+    m_bitstream.close();
+  }
+
+  m_orgPic->destroy();
+  m_trueOrgPic->destroy();
+  delete m_trueOrgPic;
+  delete m_orgPic;
 #if EXTENSION_360_VIDEO
-  TExt360AppEncTop           ext360(*this, m_cEncLib.getGOPEncoder()->getExt360Data(), *(m_cEncLib.getGOPEncoder()), orgPic);
+  delete m_ext360;
 #endif
 
-  while ( !bEos )
-  {
-    // read input YUV file
+  printRateSummary();
+}
+
+bool EncApp::encodePrep( bool& eos )
+{
+  // main encoder loop
+  const InputColourSpaceConversion ipCSC = m_inputColourSpaceConvert;
+  const InputColourSpaceConversion snrCSC = ( !m_snrInternalColourSpace ) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED;
+
+  // read input YUV file
 #if EXTENSION_360_VIDEO
-    if (ext360.isEnabled())
-    {
-      ext360.read(m_cVideoIOYuvInputFile, orgPic, trueOrgPic, ipCSC);
-    }
-    else
-    {
-      m_cVideoIOYuvInputFile.read(orgPic, trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range);
-    }
+  if( m_ext360->isEnabled() )
+  {
+    m_ext360->read( m_cVideoIOYuvInputFile, *m_orgPic, *m_trueOrgPic, ipCSC );
+  }
+  else
+  {
+    m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
+  }
 #else
-    m_cVideoIOYuvInputFile.read( orgPic, trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
+  m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
 #endif
 
-    // increase number of received frames
-    m_iFrameRcvd++;
+  if( m_gopBasedTemporalFilterEnabled )
+  {
+    m_temporalFilter.filter( m_orgPic, m_iFrameRcvd );
+  }
+
+  // increase number of received frames
+  m_iFrameRcvd++;
 
-    bEos = (m_isField && (m_iFrameRcvd == (m_framesToBeEncoded >> 1) )) || ( !m_isField && (m_iFrameRcvd == m_framesToBeEncoded) );
+  eos = ( m_isField && ( m_iFrameRcvd == ( m_framesToBeEncoded >> 1 ) ) ) || ( !m_isField && ( m_iFrameRcvd == m_framesToBeEncoded ) );
 
-    bool flush = 0;
-    // if end of file (which is only detected on a read failure) flush the encoder of any queued pictures
-    if (m_cVideoIOYuvInputFile.isEof())
-    {
-      flush = true;
-      bEos = true;
-      m_iFrameRcvd--;
-      m_cEncLib.setFramesToBeEncoded(m_iFrameRcvd);
-    }
+  // if end of file (which is only detected on a read failure) flush the encoder of any queued pictures
+  if( m_cVideoIOYuvInputFile.isEof() )
+  {
+    m_flush = true;
+    eos = true;
+    m_iFrameRcvd--;
+    m_cEncLib.setFramesToBeEncoded( m_iFrameRcvd );
+  }
 
-    // call encoding function for one frame
-    if ( m_isField )
-    {
-      m_cEncLib.encode( bEos, flush ? 0 : &orgPic, flush ? 0 : &trueOrgPic, snrCSC, recBufList,
-                        iNumEncoded, m_isTopFieldFirst );
-    }
-    else
-    {
-      m_cEncLib.encode( bEos, flush ? 0 : &orgPic, flush ? 0 : &trueOrgPic, snrCSC, recBufList,
-                        iNumEncoded );
-    }
+  bool keepDoing = false;
+
+  // call encoding function for one frame
+  if( m_isField )
+  {
+    keepDoing = m_cEncLib.encodePrep( eos, m_flush ? 0 : m_orgPic, m_flush ? 0 : m_trueOrgPic, snrCSC, m_recBufList, m_numEncoded, m_isTopFieldFirst );
+  }
+  else
+  {
+    keepDoing = m_cEncLib.encodePrep( eos, m_flush ? 0 : m_orgPic, m_flush ? 0 : m_trueOrgPic, snrCSC, m_recBufList, m_numEncoded );
+  }
+
+  return keepDoing;
+}
+
+bool EncApp::encode()
+{
+  const InputColourSpaceConversion snrCSC = ( !m_snrInternalColourSpace ) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED;
+  bool keepDoing = false;
+
+  // call encoding function for one frame
+  if( m_isField )
+  {
+    keepDoing = m_cEncLib.encode( snrCSC, m_recBufList, m_numEncoded, m_isTopFieldFirst );
+  }
+  else
+  {
+    keepDoing = m_cEncLib.encode( snrCSC, m_recBufList, m_numEncoded );
+  }
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    m_metricTime = m_cEncLib.getMetricTime();
+#endif
+
+  // output when the entire GOP was proccessed
+  if( !keepDoing )
+  {
     // write bistream to file if necessary
-    if ( iNumEncoded > 0 )
+    if( m_numEncoded > 0 )
     {
-      xWriteOutput( iNumEncoded, recBufList
-      );
+      xWriteOutput( m_numEncoded, m_recBufList );
     }
     // temporally skip frames
     if( m_temporalSubsampleRatio > 1 )
     {
 #if EXTENSION_360_VIDEO
-      m_cVideoIOYuvInputFile.skipFrames(m_temporalSubsampleRatio - 1, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC);
+      m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC );
 #else
-      m_cVideoIOYuvInputFile.skipFrames(m_temporalSubsampleRatio-1, m_iSourceWidth - m_aiPad[0], m_iSourceHeight - m_aiPad[1], m_InputChromaFormatIDC);
+    const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
+    m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_iSourceWidth - m_aiPad[0], sourceHeight - m_aiPad[1], m_InputChromaFormatIDC );
 #endif
     }
   }
 
-  m_cEncLib.printSummary(m_isField);
-
-
-  // delete used buffers in encoder class
-  m_cEncLib.deletePicBuffer();
-
-  for( auto &p : recBufList )
-  {
-    delete p;
-  }
-  recBufList.clear();
-
-  xDestroyLib();
-
-  m_bitstream.close();
-
-  printRateSummary();
-
-  return;
+  return keepDoing;
 }
 
 // ====================================================================================================================
@@ -764,8 +1031,7 @@ void EncApp::encode()
   \param iNumEncoded    number of encoded frames
   \param accessUnits    list of access units to be written
  */
-void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList
-                          )
+void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList )
 {
   const InputColourSpaceConversion ipCSC = (!m_outputInternalColourSpace) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED;
   std::list<PelUnitBuf*>::iterator iterPicYuvRec = recBufList.end();
@@ -800,10 +1066,18 @@ void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList
       const PelUnitBuf* pcPicYuvRec = *(iterPicYuvRec++);
       if (!m_reconFileName.empty())
       {
-        m_cVideoIOYuvReconFile.write( *pcPicYuvRec,
-                                      ipCSC,
-                                      m_packedYUVMode,
-                                      m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+        if( m_cEncLib.isRPREnabled() && m_cEncLib.getUpscaledOutput() )
+        {
+          const SPS& sps = *m_cEncLib.getSPS( 0 );
+          const PPS& pps = *m_cEncLib.getPPS( ( sps.getMaxPicWidthInLumaSamples() != pcPicYuvRec->get( COMPONENT_Y ).width || sps.getMaxPicHeightInLumaSamples() != pcPicYuvRec->get( COMPONENT_Y ).height ) ? ENC_PPS_ID_RPR : 0 );
+
+          m_cVideoIOYuvReconFile.writeUpscaledPicture( sps, pps, *pcPicYuvRec, ipCSC, m_packedYUVMode, m_cEncLib.getUpscaledOutput(), NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+        }
+        else
+        {
+          m_cVideoIOYuvReconFile.write( pcPicYuvRec->get( COMPONENT_Y ).width, pcPicYuvRec->get( COMPONENT_Y ).height, *pcPicYuvRec, ipCSC, m_packedYUVMode,
+            m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+        }
       }
     }
   }
@@ -830,38 +1104,21 @@ void EncApp::rateStatsAccum(const AccessUnit& au, const std::vector<uint32_t>& a
   {
     switch ((*it_au)->m_nalUnitType)
     {
-#if !JVET_M0101_HLS
-    case NAL_UNIT_CODED_SLICE_TRAIL_R:
-    case NAL_UNIT_CODED_SLICE_TRAIL_N:
-    case NAL_UNIT_CODED_SLICE_TSA_R:
-    case NAL_UNIT_CODED_SLICE_TSA_N:
-    case NAL_UNIT_CODED_SLICE_STSA_R:
-    case NAL_UNIT_CODED_SLICE_STSA_N:
-    case NAL_UNIT_CODED_SLICE_BLA_W_LP:
-    case NAL_UNIT_CODED_SLICE_BLA_W_RADL:
-    case NAL_UNIT_CODED_SLICE_BLA_N_LP:
-    case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
-    case NAL_UNIT_CODED_SLICE_IDR_N_LP:
-    case NAL_UNIT_CODED_SLICE_CRA:
-    case NAL_UNIT_CODED_SLICE_RADL_N:
-    case NAL_UNIT_CODED_SLICE_RADL_R:
-    case NAL_UNIT_CODED_SLICE_RASL_N:
-    case NAL_UNIT_CODED_SLICE_RASL_R:
-#else
     case NAL_UNIT_CODED_SLICE_TRAIL:
     case NAL_UNIT_CODED_SLICE_STSA:
     case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
     case NAL_UNIT_CODED_SLICE_IDR_N_LP:
     case NAL_UNIT_CODED_SLICE_CRA:
+    case NAL_UNIT_CODED_SLICE_GDR:
     case NAL_UNIT_CODED_SLICE_RADL:
     case NAL_UNIT_CODED_SLICE_RASL:
-#endif
-#if HEVC_VPS
+    case NAL_UNIT_DPS:
     case NAL_UNIT_VPS:
-#endif
     case NAL_UNIT_SPS:
     case NAL_UNIT_PPS:
-    case NAL_UNIT_APS:
+    case NAL_UNIT_PH:
+    case NAL_UNIT_PREFIX_APS:
+    case NAL_UNIT_SUFFIX_APS:
       m_essentialBytes += *it_stats;
       break;
     default:
diff --git a/source/App/EncoderApp/EncApp.h b/source/App/EncoderApp/EncApp.h
index 146e9dc4a30b209fdf0baf2f33d8aeea21a14177..c3d384d12326e31526ede5eabdf8e2eafb6a61f8 100644
--- a/source/App/EncoderApp/EncApp.h
+++ b/source/App/EncoderApp/EncApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -45,6 +45,16 @@
 #include "Utilities/VideoIOYuv.h"
 #include "CommonLib/NAL.h"
 #include "EncAppCfg.h"
+#if EXTENSION_360_VIDEO
+#include "AppEncHelper360/TExt360AppEncTop.h"
+#endif
+#include "EncoderLib/EncTemporalFilter.h"
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+#include <chrono>
+#endif
+
+class EncAppCommon;
 
 //! \ingroup EncoderApp
 //! \{
@@ -62,14 +72,16 @@ private:
   VideoIOYuv        m_cVideoIOYuvInputFile;       ///< input YUV file
   VideoIOYuv        m_cVideoIOYuvReconFile;       ///< output reconstruction file
   int               m_iFrameRcvd;                 ///< number of received frames
-  uint32_t              m_essentialBytes;
-  uint32_t              m_totalBytes;
-  fstream           m_bitstream;
+  uint32_t          m_essentialBytes;
+  uint32_t          m_totalBytes;
+  fstream&          m_bitstream;
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  std::chrono::duration<long long, ratio<1, 1000000000>> m_metricTime;
+#endif
 
 private:
   // initialization
-  void xCreateLib  ( std::list<PelUnitBuf*>& recBufList
-                    );                           ///< create files & encoder class
+  void xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId );         ///< create files & encoder class
   void xInitLibCfg ();                           ///< initialize internal variables
   void xInitLib    (bool isFieldCoding);         ///< initialize encoder class
   void xDestroyLib ();                           ///< destroy encoder class
@@ -81,14 +93,33 @@ private:
   void printRateSummary ();
   void printChromaFormat();
 
+  std::list<PelUnitBuf*> m_recBufList;
+  int                    m_numEncoded;
+  PelStorage*            m_trueOrgPic;
+  PelStorage*            m_orgPic;
+#if EXTENSION_360_VIDEO
+  TExt360AppEncTop*      m_ext360;
+#endif
+  EncTemporalFilter      m_temporalFilter;
+  bool m_flush;
+
 public:
-  EncApp();
+  EncApp( fstream& bitStream, EncLibCommon* encLibCommon );
   virtual ~EncApp();
 
-  void  encode();                               ///< main encoding function
+  int   getMaxLayers() const { return m_maxLayers; }
+  void  createLib( int layerId );
+  void  destroyLib();
+  bool  encodePrep( bool& eos );
+  bool  encode();                               ///< main encoding function
 
   void  outputAU( const AccessUnit& au );
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  std::chrono::duration<long long, ratio<1, 1000000000>> getMetricTime()    const { return m_metricTime; };
+#endif
+
+
 };// END CLASS DEFINITION EncApp
 
 //! \}
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index c1c6391d2a9e00f7fbb0ebd2e88d2eeed5dd164a..cdad7cff4d66dfd3bc7e3c999f4ffeb9b2a050e5 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,39 +60,10 @@ namespace po = df::program_options_lite;
 
 enum ExtendedProfileName // this is used for determining profile strings, where multiple profiles map to a single profile idc with various constraint flag combinations
 {
-  NONE = 0,
-  MAIN = 1,
-  MAIN10 = 2,
-  MAINSTILLPICTURE = 3,
-  MAINREXT = 4,
-  HIGHTHROUGHPUTREXT = 5, // Placeholder profile for development
-  // The following are RExt profiles, which would map to the MAINREXT profile idc.
-  // The enumeration indicates the bit-depth constraint in the bottom 2 digits
-  //                           the chroma format in the next digit
-  //                           the intra constraint in the next digit
-  //                           If it is a RExt still picture, there is a '1' for the top digit.
-  MONOCHROME_8      = 1008,
-  MONOCHROME_12     = 1012,
-  MONOCHROME_16     = 1016,
-  MAIN_12           = 1112,
-  MAIN_422_10       = 1210,
-  MAIN_422_12       = 1212,
-  MAIN_444          = 1308,
-  MAIN_444_10       = 1310,
-  MAIN_444_12       = 1312,
-  MAIN_444_16       = 1316, // non-standard profile definition, used for development purposes
-  MAIN_INTRA        = 2108,
-  MAIN_10_INTRA     = 2110,
-  MAIN_12_INTRA     = 2112,
-  MAIN_422_10_INTRA = 2210,
-  MAIN_422_12_INTRA = 2212,
-  MAIN_444_INTRA    = 2308,
-  MAIN_444_10_INTRA = 2310,
-  MAIN_444_12_INTRA = 2312,
-  MAIN_444_16_INTRA = 2316,
-  MAIN_444_STILL_PICTURE = 11308,
-  MAIN_444_16_STILL_PICTURE = 12316,
-  NEXT          = 6
+  NONE        = Profile::NONE,
+  MAIN_10     = Profile::MAIN_10,
+  MAIN_444_10 = Profile::MAIN_444_10,
+  AUTO = -1
 };
 
 
@@ -113,33 +84,50 @@ EncAppCfg::EncAppCfg()
 , m_maxChromaFormatConstraintIdc(CHROMA_420)
 , m_bFrameConstraintFlag(false)
 , m_bNoQtbttDualTreeIntraConstraintFlag(false)
+, m_noPartitionConstraintsOverrideConstraintFlag(false)
 , m_bNoSaoConstraintFlag(false)
 , m_bNoAlfConstraintFlag(false)
-, m_bNoPcmConstraintFlag(false)
 , m_bNoRefWraparoundConstraintFlag(false)
 , m_bNoTemporalMvpConstraintFlag(false)
 , m_bNoSbtmvpConstraintFlag(false)
 , m_bNoAmvrConstraintFlag(false)
 , m_bNoBdofConstraintFlag(false)
+, m_noDmvrConstraintFlag(false)
 , m_bNoCclmConstraintFlag(false)
 , m_bNoMtsConstraintFlag(false)
+, m_noSbtConstraintFlag(false)
 , m_bNoAffineMotionConstraintFlag(false)
-, m_bNoGbiConstraintFlag(false)
-, m_bNoMhIntraConstraintFlag(false)
+, m_bNoBcwConstraintFlag(false)
+, m_noIbcConstraintFlag(false)
+, m_bNoCiipConstraintFlag(false)
+, m_noFPelMmvdConstraintFlag(false)
 , m_bNoTriangleConstraintFlag(false)
 , m_bNoLadfConstraintFlag(false)
-, m_bNoCurrPicRefConstraintFlag(false)
+, m_noTransformSkipConstraintFlag(false)
+, m_noBDPCMConstraintFlag(false)
+, m_noJointCbCrConstraintFlag(false)
 , m_bNoQpDeltaConstraintFlag(false)
 , m_bNoDepQuantConstraintFlag(false)
 , m_bNoSignDataHidingConstraintFlag(false)
+, m_noTrailConstraintFlag(false)
+, m_noStsaConstraintFlag(false)
+, m_noRaslConstraintFlag(false)
+, m_noRadlConstraintFlag(false)
+, m_noIdrConstraintFlag(false)
+, m_noCraConstraintFlag(false)
+, m_noGdrConstraintFlag(false)
+, m_noApsConstraintFlag(false)
+
 #if EXTENSION_360_VIDEO
 , m_ext360(*this)
 #endif
 {
   m_aidQP = NULL;
+#if HEVC_SEI
   m_startOfCodedInterval = NULL;
   m_codedPivotValue = NULL;
   m_targetPivotValue = NULL;
+#endif
 }
 
 EncAppCfg::~EncAppCfg()
@@ -148,6 +136,7 @@ EncAppCfg::~EncAppCfg()
   {
     delete[] m_aidQP;
   }
+#if HEVC_SEI
   if ( m_startOfCodedInterval )
   {
     delete[] m_startOfCodedInterval;
@@ -163,6 +152,7 @@ EncAppCfg::~EncAppCfg()
     delete[] m_targetPivotValue;
     m_targetPivotValue = NULL;
   }
+#endif
 
 #if ENABLE_TRACING
   tracing_uninit(g_trace_ctx);
@@ -194,29 +184,24 @@ std::istringstream &operator>>(std::istringstream &in, GOPEntry &entry)     //in
   in>>entry.m_tcOffsetDiv2;
   in>>entry.m_betaOffsetDiv2;
   in>>entry.m_temporalId;
-  in>>entry.m_numRefPicsActive;
-  in>>entry.m_numRefPics;
-  for ( int i = 0; i < entry.m_numRefPics; i++ )
-  {
-    in>>entry.m_referencePics[i];
-  }
-  in>>entry.m_interRPSPrediction;
-  if (entry.m_interRPSPrediction==1)
+  in >> entry.m_numRefPicsActive0;
+  in >> entry.m_numRefPics0;
+  for (int i = 0; i < entry.m_numRefPics0; i++)
   {
-    in>>entry.m_deltaRPS;
-    in>>entry.m_numRefIdc;
-    for ( int i = 0; i < entry.m_numRefIdc; i++ )
-    {
-      in>>entry.m_refIdc[i];
-    }
+    in >> entry.m_deltaRefPics0[i];
   }
-  else if (entry.m_interRPSPrediction==2)
+  in >> entry.m_numRefPicsActive1;
+  in >> entry.m_numRefPics1;
+  for (int i = 0; i < entry.m_numRefPics1; i++)
   {
-    in>>entry.m_deltaRPS;
+    in >> entry.m_deltaRefPics1[i];
   }
+
   return in;
 }
 
+
+
 bool confirmPara(bool bflag, const char* message);
 
 static inline ChromaFormat numberToChromaFormat(const int val)
@@ -239,12 +224,8 @@ static const struct MapStrToProfile
 strToProfile[] =
 {
   {"none",                 Profile::NONE               },
-  {"main",                 Profile::MAIN               },
-  {"main10",               Profile::MAIN10             },
-  {"main-still-picture",   Profile::MAINSTILLPICTURE   },
-  {"main-RExt",            Profile::MAINREXT           },
-  {"high-throughput-RExt", Profile::HIGHTHROUGHPUTREXT },
-  {"next",                 Profile::NEXT           }
+  {"main_10",              Profile::MAIN_10            },
+  {"main_444_10",          Profile::MAIN_444_10        }
 };
 
 static const struct MapStrToExtendedProfile
@@ -255,57 +236,11 @@ static const struct MapStrToExtendedProfile
 strToExtendedProfile[] =
 {
     {"none",                      NONE             },
-    {"main",                      MAIN             },
-    {"main10",                    MAIN10           },
-    {"main_still_picture",        MAINSTILLPICTURE },
-    {"main-still-picture",        MAINSTILLPICTURE },
-    {"main_RExt",                 MAINREXT         },
-    {"main-RExt",                 MAINREXT         },
-    {"main_rext",                 MAINREXT         },
-    {"main-rext",                 MAINREXT         },
-    {"high_throughput_RExt",      HIGHTHROUGHPUTREXT },
-    {"high-throughput-RExt",      HIGHTHROUGHPUTREXT },
-    {"high_throughput_rext",      HIGHTHROUGHPUTREXT },
-    {"high-throughput-rext",      HIGHTHROUGHPUTREXT },
-    {"monochrome",                MONOCHROME_8     },
-    {"monochrome12",              MONOCHROME_12    },
-    {"monochrome16",              MONOCHROME_16    },
-    {"main12",                    MAIN_12          },
-    {"main_422_10",               MAIN_422_10      },
-    {"main_422_12",               MAIN_422_12      },
-    {"main_444",                  MAIN_444         },
+    {"main_10",                   MAIN_10          },
     {"main_444_10",               MAIN_444_10      },
-    {"main_444_12",               MAIN_444_12      },
-    {"main_444_16",               MAIN_444_16      },
-    {"main_intra",                MAIN_INTRA       },
-    {"main_10_intra",             MAIN_10_INTRA    },
-    {"main_12_intra",             MAIN_12_INTRA    },
-    {"main_422_10_intra",         MAIN_422_10_INTRA},
-    {"main_422_12_intra",         MAIN_422_12_INTRA},
-    {"main_444_intra",            MAIN_444_INTRA   },
-    {"main_444_still_picture",    MAIN_444_STILL_PICTURE },
-    {"main_444_10_intra",         MAIN_444_10_INTRA},
-    {"main_444_12_intra",         MAIN_444_12_INTRA},
-    {"main_444_16_intra",         MAIN_444_16_INTRA},
-    {"main_444_16_still_picture", MAIN_444_16_STILL_PICTURE },
-    {"next",                      NEXT }
+    {"auto",                      AUTO             }
 };
 
-static const ExtendedProfileName validRExtProfileNames[2/* intraConstraintFlag*/][4/* bit depth constraint 8=0, 10=1, 12=2, 16=3*/][4/*chroma format*/]=
-{
-    {
-        { MONOCHROME_8,  NONE,          NONE,              MAIN_444          }, // 8-bit  inter for 400, 420, 422 and 444
-        { NONE,          NONE,          MAIN_422_10,       MAIN_444_10       }, // 10-bit inter for 400, 420, 422 and 444
-        { MONOCHROME_12, MAIN_12,       MAIN_422_12,       MAIN_444_12       }, // 12-bit inter for 400, 420, 422 and 444
-        { MONOCHROME_16, NONE,          NONE,              MAIN_444_16       }  // 16-bit inter for 400, 420, 422 and 444 (the latter is non standard used for development)
-    },
-    {
-        { NONE,          MAIN_INTRA,    NONE,              MAIN_444_INTRA    }, // 8-bit  intra for 400, 420, 422 and 444
-        { NONE,          MAIN_10_INTRA, MAIN_422_10_INTRA, MAIN_444_10_INTRA }, // 10-bit intra for 400, 420, 422 and 444
-        { NONE,          MAIN_12_INTRA, MAIN_422_12_INTRA, MAIN_444_12_INTRA }, // 12-bit intra for 400, 420, 422 and 444
-        { NONE,          NONE,          NONE,              MAIN_444_16_INTRA }  // 16-bit intra for 400, 420, 422 and 444
-    }
-};
 
 static const struct MapStrToTier
 {
@@ -364,7 +299,6 @@ strToCostMode[] =
   {"mixed_lossless_lossy",      COST_MIXED_LOSSLESS_LOSSY_CODING}
 };
 
-#if HEVC_USE_SCALING_LISTS
 static const struct MapStrToScalingListMode
 {
   const char* str;
@@ -379,7 +313,6 @@ strToScalingListMode[] =
   {"default", SCALING_LIST_DEFAULT},
   {"file",    SCALING_LIST_FILE_READ}
 };
-#endif
 
 template<typename T, typename P>
 static std::string enumToString(P map[], uint32_t mapLen, const T val)
@@ -439,12 +372,10 @@ static inline istream& operator >> (istream &in, CostMode &mode)
   return readStrToEnum(strToCostMode, sizeof(strToCostMode)/sizeof(*strToCostMode), in, mode);
 }
 
-#if HEVC_USE_SCALING_LISTS
 static inline istream& operator >> (istream &in, ScalingListMode &mode)
 {
   return readStrToEnum(strToScalingListMode, sizeof(strToScalingListMode)/sizeof(*strToScalingListMode), in, mode);
 }
-#endif
 
 template <class T>
 struct SMultiValueInput
@@ -484,6 +415,16 @@ uint32_t SMultiValueInput<uint32_t>::readValue(const char *&pStr, bool &bSuccess
   return val;
 }
 
+template<>
+uint8_t SMultiValueInput<uint8_t>::readValue(const char *&pStr, bool &bSuccess)
+{
+  char *eptr;
+  uint32_t val = strtoul(pStr, &eptr, 0);
+  pStr = eptr;
+  bSuccess = !(*eptr != 0 && !isspace(*eptr) && *eptr != ',') && !(val<minValIncl || val>maxValIncl);
+  return val;
+}
+
 template<>
 int SMultiValueInput<int>::readValue(const char *&pStr, bool &bSuccess)
 {
@@ -579,81 +520,109 @@ static inline istream& operator >> (std::istream &in, EncAppCfg::OptionalValue<T
 }
 #endif
 
-static void
-automaticallySelectRExtProfile(const bool bUsingGeneralRExtTools,
-                               const bool bUsingChromaQPAdjustment,
-                               const bool bUsingExtendedPrecision,
-                               const bool bIntraConstraintFlag,
-                               uint32_t &bitDepthConstraint,
-                               ChromaFormat &chromaFormatConstraint,
-                               const int  maxBitDepth,
-                               const ChromaFormat chromaFormat)
+template <class T1, class T2>
+static inline istream& operator >> (std::istream& in, std::map<T1, T2>& map)
 {
-  // Try to choose profile, according to table in Q1013.
-  uint32_t trialBitDepthConstraint=maxBitDepth;
-  if (trialBitDepthConstraint<8)
-  {
-    trialBitDepthConstraint=8;
-  }
-  else if (trialBitDepthConstraint==9 || trialBitDepthConstraint==11)
+  T1 key;
+  T2 value;
+  try
   {
-    trialBitDepthConstraint++;
+    in >> key;
+    in >> value;
   }
-  else if (trialBitDepthConstraint>12)
+  catch (...)
   {
-    trialBitDepthConstraint=16;
+    in.setstate(ios::failbit);
   }
 
-  // both format and bit depth constraints are unspecified
-  if (bUsingExtendedPrecision || trialBitDepthConstraint==16)
-  {
-    bitDepthConstraint = 16;
-    chromaFormatConstraint = (!bIntraConstraintFlag && chromaFormat==CHROMA_400) ? CHROMA_400 : CHROMA_444;
-  }
-  else if (bUsingGeneralRExtTools)
-  {
-    if (chromaFormat == CHROMA_400 && !bIntraConstraintFlag)
-    {
-      bitDepthConstraint = 16;
-      chromaFormatConstraint = CHROMA_400;
-    }
-    else
-    {
-      bitDepthConstraint = trialBitDepthConstraint;
-      chromaFormatConstraint = CHROMA_444;
-    }
+  map[key] = value;
+  return in;
+}
+
+
+
+static uint32_t getMaxTileColsByLevel( Level::Name level )
+{
+  switch( level ) 
+  {
+    case Level::LEVEL1:
+    case Level::LEVEL2:
+    case Level::LEVEL2_1:
+      return 1;
+    case Level::LEVEL3:
+      return 2;
+    case Level::LEVEL3_1:
+      return 3;
+    case Level::LEVEL4:
+    case Level::LEVEL4_1:
+      return 5;
+    case Level::LEVEL5:
+    case Level::LEVEL5_1:
+    case Level::LEVEL5_2:
+      return 10;
+    case Level::LEVEL6:
+    case Level::LEVEL6_1:
+    case Level::LEVEL6_2:
+    default:
+      return 20;
   }
-  else if (chromaFormat == CHROMA_400)
-  {
-    if (bIntraConstraintFlag)
-    {
-      chromaFormatConstraint = CHROMA_420; // there is no intra 4:0:0 profile.
-      bitDepthConstraint     = trialBitDepthConstraint;
-    }
-    else
-    {
-      chromaFormatConstraint = CHROMA_400;
-      bitDepthConstraint     = trialBitDepthConstraint == 8 ? 8 : 12;
-    }
+}
+
+static uint32_t getMaxTileRowsByLevel( Level::Name level )
+{
+  switch( level ) 
+  {
+    case Level::LEVEL1:
+    case Level::LEVEL2:
+    case Level::LEVEL2_1:
+      return 1;
+    case Level::LEVEL3:
+      return 2;
+    case Level::LEVEL3_1:
+      return 3;
+    case Level::LEVEL4:
+    case Level::LEVEL4_1:
+      return 5;
+    case Level::LEVEL5:
+    case Level::LEVEL5_1:
+    case Level::LEVEL5_2:
+      return 11;
+    case Level::LEVEL6:
+    case Level::LEVEL6_1:
+    case Level::LEVEL6_2:
+    default:
+      return 21;
   }
-  else
-  {
-    bitDepthConstraint = trialBitDepthConstraint;
-    chromaFormatConstraint = chromaFormat;
-    if (bUsingChromaQPAdjustment && chromaFormat == CHROMA_420)
-    {
-      chromaFormatConstraint = CHROMA_422; // 4:2:0 cannot use the chroma qp tool.
-    }
-    if (chromaFormatConstraint == CHROMA_422 && bitDepthConstraint == 8)
-    {
-      bitDepthConstraint = 10; // there is no 8-bit 4:2:2 profile.
-    }
-    if (chromaFormatConstraint == CHROMA_420 && !bIntraConstraintFlag)
-    {
-      bitDepthConstraint = 12; // there is no 8 or 10-bit 4:2:0 inter RExt profile.
-    }
+}
+
+static uint32_t getMaxSlicesByLevel( Level::Name level )
+{
+  switch( level ) 
+  {
+    case Level::LEVEL1:
+    case Level::LEVEL2:
+      return 16;
+    case Level::LEVEL2_1:
+      return 20;
+    case Level::LEVEL3:
+      return 30;
+    case Level::LEVEL3_1:
+      return 40;
+    case Level::LEVEL4:
+    case Level::LEVEL4_1:
+      return 75;
+    case Level::LEVEL5:
+    case Level::LEVEL5_1:
+    case Level::LEVEL5_2:
+      return 200;
+    case Level::LEVEL6:
+    case Level::LEVEL6_1:
+    case Level::LEVEL6_2:
+    default:
+      return 600;
   }
 }
+
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
@@ -672,10 +641,6 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   int tmpWeightedPredictionMethod;
   int tmpFastInterSearchMode;
   int tmpMotionEstimationSearchMethod;
-  int tmpSliceMode;
-#if HEVC_DEPENDENT_SLICES
-  int tmpSliceSegmentMode;
-#endif
   int tmpDecodedPictureHashSEIMappedType;
   string inputColourSpaceConvert;
   string inputPathPrefix;
@@ -683,12 +648,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   int saoOffsetBitShift[MAX_NUM_CHANNEL_TYPE];
 
   // Multi-value input fields:                                // minval, maxval (incl), min_entries, max_entries (incl) [, default values, number of default values]
-  SMultiValueInput<uint32_t> cfg_ColumnWidth                     (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
-  SMultiValueInput<uint32_t> cfg_RowHeight                       (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>  cfgTileColumnWidth              (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>  cfgTileRowHeight                (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>  cfgRectSlicePos                 (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>  cfgRasterSliceSize              (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
   SMultiValueInput<int>  cfg_startOfCodedInterval            (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, 1<<16);
   SMultiValueInput<int>  cfg_codedPivotValue                 (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, 1<<16);
   SMultiValueInput<int>  cfg_targetPivotValue                (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, 1<<16);
 
+
   SMultiValueInput<double> cfg_adIntraLambdaModifier         (0, std::numeric_limits<double>::max(), 0, MAX_TLAYER); ///< Lambda modifier for Intra pictures, one for each temporal layer. If size>temporalLayer, then use [temporalLayer], else if size>0, use [size()-1], else use m_adLambdaModifier.
 
 #if SHARP_LUMA_DELTA_QP
@@ -698,7 +666,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<int>  cfg_lumaLeveltoDQPMappingLuma       (0, std::numeric_limits<int>::max(), 0, LUMA_LEVEL_TO_DQP_LUT_MAXSIZE, defaultLumaLevelTodQp_LumaChangePoints, sizeof(defaultLumaLevelTodQp_LumaChangePoints)/sizeof(int));
   uint32_t lumaLevelToDeltaQPMode;
 #endif
-
+  const int qpInVals[] = { 25, 33, 43 };                // qpInVal values used to derive the chroma QP mapping table used in VTM-5.0
+  const int qpOutVals[] = { 25, 32, 37 };               // qpOutVal values used to derive the chroma QP mapping table used in VTM-5.0
+  SMultiValueInput<int> cfg_qpInValCb                   (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, qpInVals, sizeof(qpInVals)/sizeof(int));
+  SMultiValueInput<int> cfg_qpOutValCb                  (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, qpOutVals, sizeof(qpOutVals) / sizeof(int));
+  const int zeroVector[] = { 0 };
+  SMultiValueInput<int> cfg_qpInValCr                   (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
+  SMultiValueInput<int> cfg_qpOutValCr                  (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
+  SMultiValueInput<int> cfg_qpInValCbCr                 (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
+  SMultiValueInput<int> cfg_qpOutValCbCr                (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
   const uint32_t defaultInputKneeCodes[3]  = { 600, 800, 900 };
   const uint32_t defaultOutputKneeCodes[3] = { 100, 250, 450 };
   SMultiValueInput<uint32_t> cfg_kneeSEIInputKneePointValue      (1,  999, 0, 999, defaultInputKneeCodes,  sizeof(defaultInputKneeCodes )/sizeof(uint32_t));
@@ -723,12 +699,50 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<bool> cfg_timeCodeSeiHoursFlag            (0,  1, 0, MAX_TIMECODE_SEI_SETS);
   SMultiValueInput<int>  cfg_timeCodeSeiTimeOffsetLength     (0, 31, 0, MAX_TIMECODE_SEI_SETS);
   SMultiValueInput<int>  cfg_timeCodeSeiTimeOffsetValue      (std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 0, MAX_TIMECODE_SEI_SETS);
+  SMultiValueInput<int>      cfg_omniViewportSEIAzimuthCentre    (-11796480, 11796479, 0, 15);
+  SMultiValueInput<int>      cfg_omniViewportSEIElevationCentre  ( -5898240,  5898240, 0, 15);
+  SMultiValueInput<int>      cfg_omniViewportSEITiltCentre       (-11796480, 11796479, 0, 15);
+  SMultiValueInput<uint32_t> cfg_omniViewportSEIHorRange         (        1, 23592960, 0, 15);
+  SMultiValueInput<uint32_t> cfg_omniViewportSEIVerRange         (        1, 11796480, 0, 15);
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpTransformType                 (0, 7, 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<bool>       cfg_rwpSEIRwpGuardBandFlag                 (0, 1, 0, std::numeric_limits<uint8_t>::max()); 
+  SMultiValueInput<uint32_t>   cfg_rwpSEIProjRegionWidth                  (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIProjRegionHeight                 (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpSEIProjRegionTop              (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIProjRegionLeft                   (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIPackedRegionWidth                (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIPackedRegionHeight               (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIPackedRegionTop                  (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIPackedRegionLeft                 (0, std::numeric_limits<uint16_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpLeftGuardBandWidth            (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpRightGuardBandWidth           (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpTopGuardBandHeight            (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpBottomGuardBandHeight         (0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<bool>       cfg_rwpSEIRwpGuardBandNotUsedForPredFlag   (0, 1,   0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_rwpSEIRwpGuardBandType                 (0, 7,   0, 4*std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>   cfg_gcmpSEIFaceIndex                  (0, 5, 5, 6);
+  SMultiValueInput<uint32_t>   cfg_gcmpSEIFaceRotation               (0, 3, 5, 6);
+  SMultiValueInput<double>     cfg_gcmpSEIFunctionCoeffU             (0.0, 1.0, 5, 6);
+  SMultiValueInput<uint32_t>   cfg_gcmpSEIFunctionUAffectedByVFlag   (0, 1, 5, 6);
+  SMultiValueInput<double>     cfg_gcmpSEIFunctionCoeffV             (0.0, 1.0, 5, 6);
+  SMultiValueInput<uint32_t>   cfg_gcmpSEIFunctionVAffectedByUFlag   (0, 1, 5, 6);
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   const int defaultLadfQpOffset[3] = { 1, 0, 1 };
   const int defaultLadfIntervalLowerBound[2] = { 350, 833 };
   SMultiValueInput<int>  cfg_LadfQpOffset                    ( -MAX_QP, MAX_QP, 2, MAX_LADF_INTERVALS, defaultLadfQpOffset, 3 );
   SMultiValueInput<int>  cfg_LadfIntervalLowerBound          ( 0, std::numeric_limits<int>::max(), 1, MAX_LADF_INTERVALS - 1, defaultLadfIntervalLowerBound, 2 );
 #endif
+  SMultiValueInput<unsigned> cfg_virtualBoundariesPosX       (0, std::numeric_limits<uint32_t>::max(), 0, 3);
+  SMultiValueInput<unsigned> cfg_virtualBoundariesPosY       (0, std::numeric_limits<uint32_t>::max(), 0, 3);
+
+  SMultiValueInput<uint8_t> cfg_SubProfile(0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>  cfg_subPicCtuTopLeftX(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
+  SMultiValueInput<uint32_t>  cfg_subPicCtuTopLeftY(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
+  SMultiValueInput<uint32_t>  cfg_subPicWidth(1, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
+  SMultiValueInput<uint32_t>  cfg_subPicHeight(1, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
+  SMultiValueInput<uint32_t>  cfg_subPicTreatedAsPicFlag(0, 1, 0, MAX_NUM_SUB_PICS);
+  SMultiValueInput<uint32_t>  cfg_loopFilterAcrossSubpicEnabledFlag(0, 1, 0, MAX_NUM_SUB_PICS);
+  SMultiValueInput<uint32_t>  cfg_subPicId(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
   int warnUnknowParameter = 0;
 
 #if ENABLE_TRACING
@@ -765,7 +779,6 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("InputBitDepthC",                                  m_inputBitDepth[CHANNEL_TYPE_CHROMA],                 0, "As per InputBitDepth but for chroma component. (default:InputBitDepth)")
   ("OutputBitDepthC",                                 m_outputBitDepth[CHANNEL_TYPE_CHROMA],                0, "As per OutputBitDepth but for chroma component. (default: use luma output bit-depth)")
   ("MSBExtendedBitDepthC",                            m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA],           0, "As per MSBExtendedBitDepth but for chroma component. (default:MSBExtendedBitDepth)")
-  ("InternalBitDepthC",                               m_internalBitDepth[CHANNEL_TYPE_CHROMA],              0, "As per InternalBitDepth but for chroma component. (default:InternalBitDepth)")
   ("ExtendedPrecision",                               m_extendedPrecisionProcessingFlag,                false, "Increased internal accuracies to support high bit depths (not valid in V1 profiles)")
   ("HighPrecisionPredictionWeighting",                m_highPrecisionOffsetsEnabledFlag,                false, "Use high precision option for weighted prediction (not valid in V1 profiles)")
   ("InputColourSpaceConvert",                         inputColourSpaceConvert,                     string(""), "Colour space conversion to apply to input video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(true))
@@ -803,6 +816,23 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("SummaryVerboseness",                              m_summaryVerboseness,                                0u, "Specifies the level of the verboseness of the text output")
   ("Verbosity,v",                                     m_verbosity,                               (int)VERBOSE, "Specifies the level of the verboseness")
 
+#if JVET_O0756_CONFIG_HDRMETRICS || JVET_O0756_CALCULATE_HDRMETRICS
+  ( "WhitePointDeltaE1",                              m_whitePointDeltaE[0],                            100.0, "1st reference white point value")
+  ( "WhitePointDeltaE2",                              m_whitePointDeltaE[1],                           1000.0, "2nd reference white point value")
+  ( "WhitePointDeltaE3",                              m_whitePointDeltaE[2],                           5000.0, "3rd reference white point value")
+  ( "MaxSampleValue",                                 m_maxSampleValue,                               10000.0, "Maximum sample value for floats")
+  ( "InputSampleRange",                               m_sampleRange,                                        0, "Sample Range")
+  ( "InputColorPrimaries",                            m_colorPrimaries,                                     1, "Input Color Primaries")
+  ( "EnableTFunctionLUT",                             m_enableTFunctionLUT,                             false, "Input Color Primaries")
+  ( "ChromaLocation",                                 m_chromaLocation,                                     2, "Location of Chroma Samples")
+  ( "ChromaUpsampleFilter",                           m_chromaUPFilter,                                     1, "420 to 444 conversion filters")
+  ( "CropOffsetLeft",                                 m_cropOffsetLeft,                                     0, "Crop Offset Left position")
+  ( "CropOffsetTop",                                  m_cropOffsetTop,                                      0, "Crop Offset Top position")
+  ( "CropOffsetRight",                                m_cropOffsetRight,                                    0, "Crop Offset Right position")
+  ( "CropOffsetBottom",                               m_cropOffsetBottom,                                   0, "Crop Offset Bottom position")
+  ( "CalculateHdrMetrics",                            m_calculateHdrMetrics,                            false, "Enable HDR metric calculation")
+#endif
+
   //Field coding parameters
   ("FieldCoding",                                     m_isField,                                        false, "Signals if it's a field based coding")
   ("TopFieldFirst, Tff",                              m_isTopFieldFirst,                                false, "In case of field based coding, signals whether if it's a top field first or not")
@@ -810,42 +840,61 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("HarmonizeGopFirstFieldCoupleEnabled",             m_bHarmonizeGopFirstFieldCoupleEnabled,            true, "Enables harmonization of Gop first field couple")
 
   // Profile and level
-  ("Profile",                                         extendedProfile,                                   NONE, "Profile name to use for encoding. Use main (for main), main10 (for main10), main-still-picture, main-RExt (for Range Extensions profile), any of the RExt specific profile names, or none")
+  ("Profile",                                         extendedProfile,              ExtendedProfileName::NONE, "Profile name to use for encoding. Use main_10, main_444_10, auto, or none")
   ("Level",                                           m_level,                                    Level::NONE, "Level limit to be used, eg 5.1, or none")
   ("Tier",                                            m_levelTier,                                Level::MAIN, "Tier to use for interpretation of --Level (main or high only)")
+  ("SubProfile",                                      cfg_SubProfile,                          cfg_SubProfile,  "Sub-profile idc")
+  ("EnableDecodingParameterSet",                      m_decodingParameterSetEnabled,                    false, "Enables writing of Decoding Parameter Set")
   ("MaxBitDepthConstraint",                           m_bitDepthConstraint,                                0u, "Bit depth to use for profile-constraint for RExt profiles. 0=automatically choose based upon other parameters")
   ("MaxChromaFormatConstraint",                       tmpConstraintChromaFormat,                            0, "Chroma-format to use for the profile-constraint for RExt profiles. 0=automatically choose based upon other parameters")
   ("IntraConstraintFlag",                             m_intraConstraintFlag,                            false, "Value of general_intra_constraint_flag to use for RExt profiles (not used if an explicit RExt sub-profile is specified)")
-  ("OnePictureOnlyConstraintFlag",                    m_onePictureOnlyConstraintFlag,                   false, "Value of general_one_picture_only_constraint_flag to use for RExt profiles (not used if an explicit RExt sub-profile is specified)")
-  ("LowerBitRateConstraintFlag",                      m_lowerBitRateConstraintFlag,                      true, "Value of general_lower_bit_rate_constraint_flag to use for RExt profiles")
 
   ("ProgressiveSource",                               m_progressiveSourceFlag,                          false, "Indicate that source is progressive")
   ("InterlacedSource",                                m_interlacedSourceFlag,                           false, "Indicate that source is interlaced")
   ("NonPackedSource",                                 m_nonPackedConstraintFlag,                        false, "Indicate that source does not contain frame packing")
   ("FrameOnly",                                       m_frameOnlyConstraintFlag,                        false, "Indicate that the bitstream contains only frames")
   ("CTUSize",                                         m_uiCTUSize,                                       128u, "CTUSize (specifies the CTU size if QTBT is on) [default: 128]")
+  ("SubPicPresentFlag",                               m_subPicPresentFlag,                              false, "equal to 1 specifies that subpicture parameters are present in in the SPS RBSP syntax")
+  ("NumSubPics",                                      m_numSubPics,                                        0u, "specifies the number of subpictures")
+  ("SubPicCtuTopLeftX",                               cfg_subPicCtuTopLeftX,            cfg_subPicCtuTopLeftX, "specifies horizontal position of top left CTU of i-th subpicture in unit of CtbSizeY")
+  ("SubPicCtuTopLeftY",                               cfg_subPicCtuTopLeftY,            cfg_subPicCtuTopLeftY, "specifies vertical position of top left CTU of i-th subpicture in unit of CtbSizeY")
+  ("SubPicWidth",                                     cfg_subPicWidth,                        cfg_subPicWidth, "specifies the width of the i-th subpicture in units of CtbSizeY")
+  ("SubPicHeight",                                    cfg_subPicHeight,                      cfg_subPicHeight, "specifies the height of the i-th subpicture in units of CtbSizeY")
+  ("SubPicTreatedAsPicFlag",                          cfg_subPicTreatedAsPicFlag,  cfg_subPicTreatedAsPicFlag, "equal to 1 specifies that the i-th subpicture of each coded picture in the CLVS is treated as a picture in the decoding process excluding in-loop filtering operations")
+  ("LoopFilterAcrossSubpicEnabledFlag",               cfg_loopFilterAcrossSubpicEnabledFlag, cfg_loopFilterAcrossSubpicEnabledFlag, "equal to 1 specifies that in-loop filtering operations may be performed across the boundaries of the i-th subpicture in each coded picture in the CLVS")
+  ("SubPicIdPresentFlag",                             m_subPicIdPresentFlag,                            false, "equal to 1 specifies that subpicture ID mapping is present in the SPS")
+  ("SubPicIdSignallingPresentFlag",                   m_subPicIdSignallingPresentFlag,                  false, "equal to 1 specifies that subpicture ID mapping is signalled in the SPS")
+  ("SubPicIdLen",                                     m_subPicIdLen,                                       0u, "specifies the number of bits used to represent the syntax element sps_subpic_id[ i ]. ")
+  ("SubPicId",                                        cfg_subPicId,                              cfg_subPicId, "specifies that subpicture ID of the i-th subpicture")
   ("EnablePartitionConstraintsOverride",              m_SplitConsOverrideEnabledFlag,                    true, "Enable partition constraints override")
   ("MinQTISlice",                                     m_uiMinQT[0],                                        8u, "MinQTISlice")
   ("MinQTLumaISlice",                                 m_uiMinQT[0],                                        8u, "MinQTLumaISlice")
   ("MinQTChromaISlice",                               m_uiMinQT[2],                                        4u, "MinQTChromaISlice")
   ("MinQTNonISlice",                                  m_uiMinQT[1],                                        8u, "MinQTNonISlice")
-  ("MaxBTDepth",                                      m_uiMaxBTDepth,                                      3u, "MaxBTDepth")
-  ("MaxBTDepthI",                                     m_uiMaxBTDepthI,                                     3u, "MaxBTDepthI")
-  ("MaxBTDepthISliceL",                               m_uiMaxBTDepthI,                                     3u, "MaxBTDepthISliceL")
-  ("MaxBTDepthISliceC",                               m_uiMaxBTDepthIChroma,                               3u, "MaxBTDepthISliceC")
+  ("MaxMTTHierarchyDepth",                            m_uiMaxMTTHierarchyDepth,                            3u, "MaxMTTHierarchyDepth")
+  ("MaxMTTHierarchyDepthI",                           m_uiMaxMTTHierarchyDepthI,                           3u, "MaxMTTHierarchyDepthI")
+  ("MaxMTTHierarchyDepthISliceL",                     m_uiMaxMTTHierarchyDepthI,                           3u, "MaxMTTHierarchyDepthISliceL")
+  ("MaxMTTHierarchyDepthISliceC",                     m_uiMaxMTTHierarchyDepthIChroma,                     3u, "MaxMTTHierarchyDepthISliceC")
   ("DualITree",                                       m_dualTree,                                       false, "Use separate QTBT trees for intra slice luma and chroma channel types")
-  ("SubPuMvp",                                       m_SubPuMvpMode,                                       0, "Enable Sub-PU temporal motion vector prediction (0:off, 1:ATMVP, 2:STMVP, 3:ATMVP+STMVP)  [default: off]")
-  ("Affine",                                         m_Affine,                                         false, "Enable affine prediction (0:off, 1:on)  [default: off]")
-  ("AffineType",                                     m_AffineType,                                     true,  "Enable affine type prediction (0:off, 1:on)  [default: on]" )
-  ("BIO",                                            m_BIO,                                             false, "Enable bi-directional optical flow")
+  ( "LFNST",                                          m_LFNST,                                          false, "Enable LFNST (0:off, 1:on)  [default: off]" )
+  ( "FastLFNST",                                      m_useFastLFNST,                                   false, "Fast methods for LFNST" )
+  ("SubPuMvp",                                        m_SubPuMvpMode,                                       0, "Enable Sub-PU temporal motion vector prediction (0:off, 1:ATMVP, 2:STMVP, 3:ATMVP+STMVP)  [default: off]")
+  ("MMVD",                                            m_MMVD,                                            true, "Enable Merge mode with Motion Vector Difference (0:off, 1:on)  [default: 1]")
+  ("Affine",                                          m_Affine,                                         false, "Enable affine prediction (0:off, 1:on)  [default: off]")
+  ("AffineType",                                      m_AffineType,                                      true,  "Enable affine type prediction (0:off, 1:on)  [default: on]" )
+  ("PROF",                                            m_PROF,                                           false, "Enable Prediction refinement with optical flow for affine mode (0:off, 1:on)  [default: off]")
+  ("BIO",                                             m_BIO,                                            false, "Enable bi-directional optical flow")
   ("IMV",                                             m_ImvMode,                                            1, "Adaptive MV precision Mode (IMV)\n"
                                                                                                                "\t0: disabled\n"
-                                                                                                               "\t1: enabled (Full-Pel and 4-PEL)\n")
+                                                                                                               "\t1: enabled (1/2-Pel, Full-Pel and 4-PEL)\n")
   ("IMV4PelFast",                                     m_Imv4PelFast,                                        1, "Fast 4-Pel Adaptive MV precision Mode 0:disabled, 1:enabled)  [default: 1]")
   ("LMChroma",                                        m_LMChroma,                                           1, " LMChroma prediction "
                                                                                                                "\t0:  Disable LMChroma\n"
                                                                                                                "\t1:  Enable LMChroma\n")
-  ("CclmCollocatedChroma",                            m_cclmCollocatedChromaFlag,                       false, "Specifies the location of the top-left downsampled luma sample in cross-component linear model intra prediction relative to the top-left luma sample\n"
+  ("HorCollocatedChroma",                             m_horCollocatedChromaFlag,                         true, "Specifies location of a chroma sample relatively to the luma sample in horizontal direction in the reference picture resampling\n"
+                                                                                                               "\t0:  horizontally shifted by 0.5 units of luma samples\n"
+                                                                                                               "\t1:  collocated (default)\n")
+  ("VerCollocatedChroma",                             m_verCollocatedChromaFlag,                        false, "Specifies location of a chroma sample relatively to the luma sample in vertical direction in the cross-component linear model intra prediction and the reference picture resampling\n"
                                                                                                                "\t0:  horizontally co-sited, vertically shifted by 0.5 units of luma samples\n"
                                                                                                                "\t1:  collocated\n")
   ("MTS",                                             m_MTS,                                                0, "Multiple Transform Set (MTS)\n"
@@ -857,16 +906,19 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("MTSInterMaxCand",                                 m_MTSInterMaxCand,                                    4, "Number of additional candidates to test in encoder search for MTS in inter slices\n")
   ("MTSImplicit",                                     m_MTSImplicit,                                        0, "Enable implicit MTS (when explicit MTS is off)\n")
   ( "SBT",                                            m_SBT,                                            false, "Enable Sub-Block Transform for inter blocks\n" )
+  ( "SBTFast64WidthTh",                               m_SBTFast64WidthTh,                                1920, "Picture width threshold for testing size-64 SBT in RDO (now for HD and above sequences)\n")
+  ( "ISP",                                            m_ISP,                                            false, "Enable Intra Sub-Partitions\n" )
+  ("SMVD",                                            m_SMVD,                                           false, "Enable Symmetric MVD\n")
   ("CompositeLTReference",                            m_compositeRefEnabled,                            false, "Enable Composite Long Term Reference Frame")
-  ("GBi",                                             m_GBi,                                            false, "Enable Generalized Bi-prediction(GBi)")
-  ("GBiFast",                                         m_GBiFast,                                        false, "Fast methods for Generalized Bi-prediction(GBi)\n")
+  ("BCW",                                             m_bcw,                                            false, "Enable Generalized Bi-prediction(Bcw)")
+  ("BcwFast",                                         m_BcwFast,                                        false, "Fast methods for Generalized Bi-prediction(Bcw)\n")
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   ("LADF",                                            m_LadfEnabed,                                     false, "Luma adaptive deblocking filter QP Offset(L0414)")
   ("LadfNumIntervals",                                m_LadfNumIntervals,                                   3, "LADF number of intervals (2-5, inclusive)")
   ("LadfQpOffset",                                    cfg_LadfQpOffset,                      cfg_LadfQpOffset, "LADF QP offset")
   ("LadfIntervalLowerBound",                          cfg_LadfIntervalLowerBound,  cfg_LadfIntervalLowerBound, "LADF lower bound for 2nd lowest interval")
 #endif
-  ("MHIntra",                                         m_MHIntra,                                        false, "Enable MHIntra mode")
+  ("CIIP",                                            m_ciip,                                           false, "Enable CIIP mode")
   ("Triangle",                                        m_Triangle,                                       false, "Enable triangular shape motion vector prediction (0:off, 1:on)")
   ("HashME",                                          m_HashME,                                         false, "Enable hash motion estimation (0:off, 1:on)")
 
@@ -874,6 +926,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("AffineAmvr",                                      m_AffineAmvr,                                     false, "Eanble AMVR for affine inter mode")
   ("AffineAmvrEncOpt",                                m_AffineAmvrEncOpt,                               false, "Enable encoder optimization of affine AMVR")
   ("DMVR",                                            m_DMVR,                                           false, "Decoder-side Motion Vector Refinement")
+  ("MmvdDisNum",                                      m_MmvdDisNum,                                     8,     "Number of MMVD Distance Entries")
+  ("ColorTransform",                                  m_useColorTrans,                                  false, "Enable the color transform")
+  ("PLT",                                             m_PLTMode,                                           0u, "PLTMode (0x1:enabled, 0x0:disabled)  [default: disabled]")
+  ("JointCbCr",                                       m_JointCbCrMode,                                  false, "Enable joint coding of chroma residuals (JointCbCr, 0:off, 1:on)")
   ( "IBC",                                            m_IBCMode,                                           0u, "IBCMode (0x1:enabled, 0x0:disabled)  [default: disabled]")
   ( "IBCLocalSearchRangeX",                           m_IBCLocalSearchRangeX,                            128u, "Search range of IBC local search in x direction")
   ( "IBCLocalSearchRangeY",                           m_IBCLocalSearchRangeY,                            128u, "Search range of IBC local search in y direction")
@@ -886,9 +942,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("WrapAroundOffset",                                m_wrapAroundOffset,                                  0u, "Offset in luma samples used for computing the horizontal wrap-around position")
 
   // ADD_NEW_TOOL : (encoder app) add parsing parameters here
+  ("LoopFilterAcrossVirtualBoundariesDisabledFlag",   m_loopFilterAcrossVirtualBoundariesDisabledFlag,  false, "Disable in-loop filtering operations across the virtual boundaries (0:off, 1:on)  [default: off]")
+  ("NumVerVirtualBoundaries",                         m_numVerVirtualBoundaries,                           0u, "Number of vertical virtual boundaries (0-3, inclusive)")
+  ("NumHorVirtualBoundaries",                         m_numHorVirtualBoundaries,                           0u, "Number of horizontal virtual boundaries (0-3, inclusive)")
+  ("VirtualBoundariesPosX",                           cfg_virtualBoundariesPosX,    cfg_virtualBoundariesPosX, "Locations of the vertical virtual boundaries in units of luma samples")
+  ("VirtualBoundariesPosY",                           cfg_virtualBoundariesPosY,    cfg_virtualBoundariesPosY, "Locations of the horizontal virtual boundaries in units of luma samples")
   ("EncDbOpt",                                        m_encDbOpt,                                       false, "Encoder optimization with deblocking filter")
-  ("LumaReshapeEnable",                               m_lumaReshapeEnable,                              false, "Enable Reshaping for Luma Channel")
-  ("ReshapeSignalType",                               m_reshapeSignalType,                                 0u, "Input signal type: 0: SDR, 1:PQ, 2:HLG")
+  ("LMCSEnable",                                      m_lmcsEnabled,                                    false, "Enable LMCS (luma mapping with chroma scaling")
+  ("LMCSSignalType",                                  m_reshapeSignalType,                                 0u, "Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG")
+  ("LMCSUpdateCtrl",                                  m_updateCtrl,                                         0, "LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP")
+  ("LMCSAdpOption",                                   m_adpOption,                                          0, "LMCS adaptation options: 0:automatic(default),"
+                                                                                                               "1: rsp both (CW66 for QP<=22), 2: rsp TID0 (for all QP),"
+                                                                                                               "3: rsp inter(CW66 for QP<=22), 4: rsp inter(for all QP).")
+  ("LMCSInitialCW",                                   m_initialCW,                                         0u, "LMCS initial total codeword (0~1023) when LMCSAdpOption > 0")
+  ("LMCSOffset",                                      m_CSoffset,                                           0, "LMCS chroma residual scaling offset")
   ("IntraCMD",                                        m_intraCMD,                                          0u, "IntraChroma MD: 0: none, 1:fixed to default wPSNR weight")
   ("LCTUFast",                                        m_useFastLCTU,                                    false, "Fast methods for large CTU")
   ("FastMrg",                                         m_useFastMrg,                                     false, "Fast methods for inter merge")
@@ -896,6 +963,14 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("AMaxBT",                                          m_useAMaxBT,                                      false, "Adaptive maximal BT-size")
   ("E0023FastEnc",                                    m_e0023FastEnc,                                    true, "Fast encoding setting for QTBT (proposal E0023)")
   ("ContentBasedFastQtbt",                            m_contentBasedFastQtbt,                           false, "Signal based QTBT speed-up")
+  ("UseNonLinearAlfLuma",                             m_useNonLinearAlfLuma,                             true, "Non-linear adaptive loop filters for Luma Channel")
+  ("UseNonLinearAlfChroma",                           m_useNonLinearAlfChroma,                           true, "Non-linear adaptive loop filters for Chroma Channels")
+  ("MaxNumAlfAlternativesChroma",                     m_maxNumAlfAlternativesChroma,
+                                                                    (unsigned)MAX_NUM_ALF_ALTERNATIVES_CHROMA, std::string("Maximum number of alternative Chroma filters (1-") + std::to_string(MAX_NUM_ALF_ALTERNATIVES_CHROMA) + std::string (", inclusive)") )
+  ("MRL",                                             m_MRL,                                            false,  "Enable MRL (multiple reference line intra prediction)")
+  ("MIP",                                             m_MIP,                                             true,  "Enable MIP (matrix-based intra prediction)")
+  ("FastMIP",                                         m_useFastMIP,                                     false,  "Fast encoder search for MIP (matrix-based intra prediction)")
+  ("FastLocalDualTreeMode",                           m_fastLocalDualTreeMode,                              0,  "Fast intra pass coding for local dual-tree in intra coding region, 0: off, 1: use threshold, 2: one intra mode only")
   // Unit definition parameters
   ("MaxCUWidth",                                      m_uiMaxCUWidth,                                     64u)
   ("MaxCUHeight",                                     m_uiMaxCUHeight,                                    64u)
@@ -904,20 +979,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("MaxCUSize,s",                                     m_uiMaxCUHeight,                                    64u, "Maximum CU size")
   ("MaxPartitionDepth,h",                             m_uiMaxCUDepth,                                      4u, "CU depth")
 
-#if MAX_TB_SIZE_SIGNALLING
   ("Log2MaxTbSize",                                   m_log2MaxTbSize,                                      6, "Maximum transform block size in logarithm base 2 (Default: 6)")
-#endif
 
   // Coding structure paramters
   ("IntraPeriod,-ip",                                 m_iIntraPeriod,                                      -1, "Intra period in frames, (-1: only first frame)")
   ("DecodingRefreshType,-dr",                         m_iDecodingRefreshType,                               0, "Intra refresh type (0:none 1:CRA 2:IDR 3:RecPointSEI)")
   ("GOPSize,g",                                       m_iGOPSize,                                           1, "GOP size of temporal structure")
-#if JCTVC_Y0038_PARAMS
+  ("DRAPPeriod",                                      m_drapPeriod,                                         0, "DRAP period in frames (0: disable Dependent RAP indication SEI messages)")
   ("ReWriteParamSets",                                m_rewriteParamSets,                           false, "Enable rewriting of Parameter sets before every (intra) random access point")
-  //Alias with same name as in HM
-  ("ReWriteParamSetsFlag",                            m_rewriteParamSets,                           false, "Alias for ReWriteParamSets")
-#endif
-
+  ("IDRRefParamList",                                 m_idrRefParamList,                            false, "Enable indication of reference picture list syntax elements in slice headers of IDR pictures")
   // motion search options
   ("DisableIntraInInter",                             m_bDisableIntraPUsInInterSlices,                  false, "Flag to disable intra PUs in inter slices")
   ("FastSearch",                                      tmpMotionEstimationSearchMethod,  int(MESEARCH_DIAMOND), "0:Full search 1:Diamond 2:Selective 3:Enhanced Diamond")
@@ -967,11 +1037,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("LumaLevelToDeltaQPMappingLuma",                   cfg_lumaLeveltoDQPMappingLuma,  cfg_lumaLeveltoDQPMappingLuma, "Luma to Delta QP Mapping - luma thresholds")
   ("LumaLevelToDeltaQPMappingDQP",                    cfg_lumaLeveltoDQPMappingQP,  cfg_lumaLeveltoDQPMappingQP, "Luma to Delta QP Mapping - DQP values")
 #endif
-
+  ("UseIdentityTableForNon420Chroma",                 m_useIdentityTableForNon420Chroma,                 true, "True: Indicates that 422/444 chroma uses identity chroma QP mapping tables; False: explicit Qp table may be specified in config")
+  ("SameCQPTablesForAllChroma",                       m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag,                        true, "0: Different tables for Cb, Cr and joint Cb-Cr components, 1 (default): Same tables for all three chroma components")
+  ("QpInValCb",                                       cfg_qpInValCb,                            cfg_qpInValCb, "Input coordinates for the QP table for Cb component")
+  ("QpOutValCb",                                      cfg_qpOutValCb,                          cfg_qpOutValCb, "Output coordinates for the QP table for Cb component")
+  ("QpInValCr",                                       cfg_qpInValCr,                            cfg_qpInValCr, "Input coordinates for the QP table for Cr component")
+  ("QpOutValCr",                                      cfg_qpOutValCr,                          cfg_qpOutValCr, "Output coordinates for the QP table for Cr component")
+  ("QpInValCbCr",                                     cfg_qpInValCbCr,                        cfg_qpInValCbCr, "Input coordinates for the QP table for joint Cb-Cr component")
+  ("QpOutValCbCr",                                    cfg_qpOutValCbCr,                      cfg_qpOutValCbCr, "Output coordinates for the QP table for joint Cb-Cr component")
   ("CbQpOffset,-cbqpofs",                             m_cbQpOffset,                                         0, "Chroma Cb QP Offset")
   ("CrQpOffset,-crqpofs",                             m_crQpOffset,                                         0, "Chroma Cr QP Offset")
   ("CbQpOffsetDualTree",                              m_cbQpOffsetDualTree,                                 0, "Chroma Cb QP Offset for dual tree")
   ("CrQpOffsetDualTree",                              m_crQpOffsetDualTree,                                 0, "Chroma Cr QP Offset for dual tree")
+  ("CbCrQpOffset,-cbcrqpofs",                         m_cbCrQpOffset,                                      -1, "QP Offset for joint Cb-Cr mode")
+  ("CbCrQpOffsetDualTree",                            m_cbCrQpOffsetDualTree,                               0, "QP Offset for joint Cb-Cr mode in dual tree")
 #if ER_CHROMA_QP_WCG_PPS
   ("WCGPPSEnable",                                    m_wcgChromaQpControl.enabled,                     false, "1: Enable the WCG PPS chroma modulation scheme. 0 (default) disabled")
   ("WCGPPSCbQpScale",                                 m_wcgChromaQpControl.chromaCbQpScale,               1.0, "WCG PPS Chroma Cb QP Scale")
@@ -1017,6 +1096,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("TransformSkip",                                   m_useTransformSkip,                               false, "Intra transform skipping")
   ("TransformSkipFast",                               m_useTransformSkipFast,                           false, "Fast encoder search for transform skipping, winner takes it all mode.")
   ("TransformSkipLog2MaxSize",                        m_log2MaxTransformSkipBlockSize,                     5U, "Specify transform-skip maximum size. Minimum 2, Maximum 5. (not valid in V1 profiles)")
+  ("ChromaTS",                                        m_useChromaTS,                                    false, "Enable encoder search of chromaTS")
+  ("BDPCM",                                           m_useBDPCM,                                           0, "BDPCM (0:off, 1:lumaonly, 2:lumachroma")
   ("ISPFast",                                         m_useFastISP,                                     false, "Fast encoder search for ISP")
   ("ImplicitResidualDPCM",                            m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT],        false, "Enable implicitly signalled residual DPCM for intra (also known as sample-adaptive intra predict) (not valid in V1 profiles)")
   ("ExplicitResidualDPCM",                            m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT],        false, "Enable explicitly signalled residual DPCM for inter (not valid in V1 profiles)")
@@ -1030,76 +1111,48 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("SaoEncodingRateChroma",                           m_saoEncodingRateChroma,                            0.5, "The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma")
   ("MaxNumOffsetsPerPic",                             m_maxNumOffsetsPerPic,                             2048, "Max number of SAO offset per picture (Default: 2048)")
   ("SAOLcuBoundary",                                  m_saoCtuBoundary,                                 false, "0: right/bottom CTU boundary areas skipped from SAO parameter estimation, 1: non-deblocked pixels are used for those areas")
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   ("SAOGreedyEnc",                                    m_saoGreedyMergeEnc,                              false, "SAO greedy merge encoding algorithm")
-#endif
-#if HEVC_TILES_WPP
-  ("SliceMode",                                       tmpSliceMode,                            int(NO_SLICES), "0: Disable all Recon slice limits, 1: Enforce max # of CTUs, 2: Enforce max # of bytes, 3:specify tiles per dependent slice")
-  ("SliceArgument",                                   m_sliceArgument,                                      0, "Depending on SliceMode being:"
-                                                                                                               "\t1: max number of CTUs per slice"
-                                                                                                               "\t2: max number of bytes per slice"
-                                                                                                               "\t3: max number of tiles per slice")
-#else
-  ("SliceMode",                                       tmpSliceMode,                            int(NO_SLICES), "0: Disable all Recon slice limits, 1: Enforce max # of CTUs, 2: Enforce max # of bytes)")
-  ("SliceArgument",                                   m_sliceArgument,                                      0, "Depending on SliceMode being:"
-                                                                                                               "\t1: max number of CTUs per slice"
-                                                                                                               "\t2: max number of bytes per slice")
-#endif
-#if HEVC_DEPENDENT_SLICES
-  ("SliceSegmentMode",                                tmpSliceSegmentMode,                     int(NO_SLICES), "0: Disable all slice segment limits, 1: Enforce max # of CTUs, 2: Enforce max # of bytes, 3:specify tiles per dependent slice")
-  ("SliceSegmentArgument",                            m_sliceSegmentArgument,                               0, "Depending on SliceSegmentMode being:"
-                                                                                                               "\t1: max number of CTUs per slice segment"
-                                                                                                               "\t2: max number of bytes per slice segment"
-                                                                                                               "\t3: max number of tiles per slice segment")
-#endif
-  ("LFCrossSliceBoundaryFlag",                        m_bLFCrossSliceBoundaryFlag,                       true)
-
-  ("ConstrainedIntraPred",                            m_bUseConstrainedIntraPred,                       false, "Constrained Intra Prediction")
+  ("EnablePicPartitioning",                           m_picPartitionFlag,                               false, "Enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)")
+  ("TileColumnWidthArray",                            cfgTileColumnWidth,                  cfgTileColumnWidth, "Tile column widths in units of CTUs. Last column width in list will be repeated uniformly to cover any remaining picture width")
+  ("TileRowHeightArray",                              cfgTileRowHeight,                      cfgTileRowHeight, "Tile row heights in units of CTUs. Last row height in list will be repeated uniformly to cover any remaining picture height")
+  ("RasterScanSlices",                                m_rasterSliceFlag,                                false, "Indicates if using raster-scan or rectangular slices (0: rectangular, 1: raster-scan)")
+  ("RectSlicePositions",                              cfgRectSlicePos,                        cfgRectSlicePos, "Rectangular slice positions. List containing pairs of top-left CTU RS address followed by bottom-right CTU RS address")
+  ("RectSliceFixedWidth",                             m_rectSliceFixedWidth,                                0, "Fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead)")
+  ("RectSliceFixedHeight",                            m_rectSliceFixedHeight,                               0, "Fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead)")
+  ("RasterSliceSizes",                                cfgRasterSliceSize,                  cfgRasterSliceSize, "Raster-scan slice sizes in units of tiles. Last size in list will be repeated uniformly to cover any remaining tiles in the picture")
+  ("DisableLoopFilterAcrossTiles",                    m_disableLFCrossTileBoundaryFlag,                 false, "Loop filtering applied across tile boundaries or not (0: filter across tile boundaries  1: do not filter across tile boundaries)")
+  ("DisableLoopFilterAcrossSlices",                   m_disableLFCrossSliceBoundaryFlag,                false, "Loop filtering applied across slice boundaries or not (0: filter across slice boundaries 1: do not filter across slice boundaries)")
+  ("EnableSubPicPartitioning",                        m_subPicPartitionFlag,                             true, "Enable Sub-Picture partitioning (0: single slice per sub-picture, 1: multiple slices per sub-picture can be used)")
   ("FastUDIUseMPMEnabled",                            m_bFastUDIUseMPMEnabled,                           true, "If enabled, adapt intra direction search, accounting for MPM")
   ("FastMEForGenBLowDelayEnabled",                    m_bFastMEForGenBLowDelayEnabled,                   true, "If enabled use a fast ME for generalised B Low Delay slices")
   ("UseBLambdaForNonKeyLowDelayPictures",             m_bUseBLambdaForNonKeyLowDelayPictures,            true, "Enables use of B-Lambda for non-key low-delay pictures")
-  ("PCMEnabledFlag",                                  m_usePCM,                                         false)
-  ("PCMLog2MaxSize",                                  m_pcmLog2MaxSize,                                    5u)
-  ("PCMLog2MinSize",                                  m_uiPCMLog2MinSize,                                  3u)
-
-  ("PCMInputBitDepthFlag",                            m_bPCMInputBitDepthFlag,                           true)
-  ("PCMFilterDisableFlag",                            m_bPCMFilterDisableFlag,                          false)
   ("IntraReferenceSmoothing",                         m_enableIntraReferenceSmoothing,                   true, "0: Disable use of intra reference smoothing (not valid in V1 profiles). 1: Enable use of intra reference smoothing (same as V1)")
   ("WeightedPredP,-wpP",                              m_useWeightedPred,                                false, "Use weighted prediction in P slices")
   ("WeightedPredB,-wpB",                              m_useWeightedBiPred,                              false, "Use weighted (bidirectional) prediction in B slices")
   ("WeightedPredMethod,-wpM",                         tmpWeightedPredictionMethod, int(WP_PER_PICTURE_WITH_SIMPLE_DC_COMBINED_COMPONENT), "Weighted prediction method")
-  ("Log2ParallelMergeLevel",                          m_log2ParallelMergeLevel,                            2u, "Parallel merge estimation region")
-#if HEVC_TILES_WPP
-    //deprecated copies of renamed tile parameters
-  ("UniformSpacingIdc",                               m_tileUniformSpacingFlag,                         false,      "deprecated alias of TileUniformSpacing")
-  ("ColumnWidthArray",                                cfg_ColumnWidth,                        cfg_ColumnWidth, "deprecated alias of TileColumnWidthArray")
-  ("RowHeightArray",                                  cfg_RowHeight,                            cfg_RowHeight, "deprecated alias of TileRowHeightArray")
-
-  ("TileUniformSpacing",                              m_tileUniformSpacingFlag,                         false,      "Indicates that tile columns and rows are distributed uniformly")
-  ("NumTileColumnsMinus1",                            m_numTileColumnsMinus1,                               0,          "Number of tile columns in a picture minus 1")
-  ("NumTileRowsMinus1",                               m_numTileRowsMinus1,                                  0,          "Number of rows in a picture minus 1")
-  ("TileColumnWidthArray",                            cfg_ColumnWidth,                        cfg_ColumnWidth, "Array containing tile column width values in units of CTU")
-  ("TileRowHeightArray",                              cfg_RowHeight,                            cfg_RowHeight, "Array containing tile row height values in units of CTU")
-  ("LFCrossTileBoundaryFlag",                         m_bLFCrossTileBoundaryFlag,                        true, "1: cross-tile-boundary loop filtering. 0:non-cross-tile-boundary loop filtering")
   ("WaveFrontSynchro",                                m_entropyCodingSyncEnabledFlag,                   false, "0: entropy coding sync disabled; 1 entropy coding sync enabled")
-#endif
-#if HEVC_USE_SCALING_LISTS
   ("ScalingList",                                     m_useScalingListId,                    SCALING_LIST_OFF, "0/off: no scaling list, 1/default: default scaling lists, 2/file: scaling lists specified in ScalingListFile")
   ("ScalingListFile",                                 m_scalingListFileName,                       string(""), "Scaling list file name. Use an empty string to produce help.")
-#endif
-  ("DepQuant",                                        m_depQuantEnabledFlag,                                          true )
-#if HEVC_USE_SIGN_HIDING
-  ("SignHideFlag,-SBH",                               m_signDataHidingEnabledFlag,                                    false )
-#endif
+  ("DisableScalingMatrixForLFNST",                    m_disableScalingMatrixForLfnstBlks,                true, "Disable scaling matrices, when enabled, for LFNST-coded blocks")
+  ("DepQuant",                                        m_depQuantEnabledFlag,                                          true, "Enable  dependent quantization (Default: 1)" )
+  ("SignHideFlag,-SBH",                               m_signDataHidingEnabledFlag,                                    false,  "Enable sign hiding" )
   ("MaxNumMergeCand",                                 m_maxNumMergeCand,                                   5u, "Maximum number of merge candidates")
   ("MaxNumAffineMergeCand",                           m_maxNumAffineMergeCand,                             5u, "Maximum number of affine merge candidates")
-  /* Misc. */
+  ("MaxNumTriangleCand",                              m_maxNumTriangleCand,                                5u, "Maximum number of triangle candidates")
+  ("MaxNumIBCMergeCand",                              m_maxNumIBCMergeCand,                                6u, "Maximum number of IBC merge candidates")
+    /* Misc. */
   ("SEIDecodedPictureHash,-dph",                      tmpDecodedPictureHashSEIMappedType,                   0, "Control generation of decode picture hash SEI messages\n"
                                                                                                                "\t3: checksum\n"
                                                                                                                "\t2: CRC\n"
                                                                                                                "\t1: use MD5\n"
                                                                                                                "\t0: disable")
   ("TMVPMode",                                        m_TMVPModeId,                                         1, "TMVP mode 0: TMVP disable for all slices. 1: TMVP enable for all slices (default) 2: TMVP enable for certain slices only")
+  ("PPSorSliceMode",                                  m_PPSorSliceMode,                                     0, "Enable signalling certain parameters either in PPS or per slice\n"
+                                                                                                                "\tmode 0: Always per slice (default), 1: RA settings, 2: LDB settings, 3: LDP settings")
+  ("SliceLevelRpl",                                   m_sliceLevelRpl,                                   true, "Code reference picture lists in slice headers rather than picture header.")
+  ("SliceLevelDblk",                                  m_sliceLevelDblk,                                  true, "Code deblocking filter parameters in slice headers rather than picture header.")
+  ("SliceLevelSao",                                   m_sliceLevelSao,                                   true, "Code SAO parameters in slice headers rather than picture header.")
+  ("SliceLevelAlf",                                   m_sliceLevelAlf,                                   true, "Code ALF parameters in slice headers rather than picture header.")
   ("FEN",                                             tmpFastInterSearchMode,   int(FASTINTERSEARCH_DISABLED), "fast encoder setting")
   ("ECU",                                             m_bUseEarlyCU,                                    false, "Early CU setting")
   ("FDM",                                             m_useFastDecisionForMerge,                         true, "Fast decision for Merge RD Cost")
@@ -1117,26 +1170,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ( "RCCpbSize",                                      m_RCCpbSize,                                         0u, "Rate control: CPB size" )
   ( "RCInitialCpbFullness",                           m_RCInitialCpbFullness,                             0.9, "Rate control: initial CPB fullness" )
 #endif
-  ("TransquantBypassEnable",                          m_TransquantBypassEnabledFlag,                    false, "transquant_bypass_enabled_flag indicator in PPS")
-  ("TransquantBypassEnableFlag",                      m_TransquantBypassEnabledFlag,                    false, "deprecated and obsolete, but still needed for compatibility reasons")
-  ("CUTransquantBypassFlagForce",                     m_CUTransquantBypassFlagForce,                    false, "Force transquant bypass mode, when transquant_bypass_enabled_flag is enabled")
   ("CostMode",                                        m_costMode,                         COST_STANDARD_LOSSY, "Use alternative cost functions: choose between 'lossy', 'sequence_level_lossless', 'lossless' (which forces QP to " MACRO_TO_STRING(LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP) ") and 'mixed_lossless_lossy' (which used QP'=" MACRO_TO_STRING(LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME) " for pre-estimates of transquant-bypass blocks).")
   ("RecalculateQPAccordingToLambda",                  m_recalculateQPAccordingToLambda,                 false, "Recalculate QP values according to lambda values. Do not suggest to be enabled in all intra case")
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  ("StrongIntraSmoothing,-sis",                       m_useStrongIntraSmoothing,                         true, "Enable strong intra smoothing for 32x32 blocks")
-#endif
+#if HEVC_SEI
   ("SEIActiveParameterSets",                          m_activeParameterSetsSEIEnabled,                      0, "Enable generation of active parameter sets SEI messages");
   opts.addOptions()
+#endif
+  ("HrdParametersPresent,-hrd",                       m_hrdParametersPresentFlag,                       false, "Enable generation of hrd_parameters()")
   ("VuiParametersPresent,-vui",                       m_vuiParametersPresentFlag,                       false, "Enable generation of vui_parameters()")
   ("AspectRatioInfoPresent",                          m_aspectRatioInfoPresentFlag,                     false, "Signals whether aspect_ratio_idc is present")
   ("AspectRatioIdc",                                  m_aspectRatioIdc,                                     0, "aspect_ratio_idc")
   ("SarWidth",                                        m_sarWidth,                                           0, "horizontal size of the sample aspect ratio")
   ("SarHeight",                                       m_sarHeight,                                          0, "vertical size of the sample aspect ratio")
-  ("OverscanInfoPresent",                             m_overscanInfoPresentFlag,                        false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n")
-  ("OverscanAppropriate",                             m_overscanAppropriateFlag,                        false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n")
-  ("VideoSignalTypePresent",                          m_videoSignalTypePresentFlag,                     false, "Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present")
-  ("VideoFormat",                                     m_videoFormat,                                        5, "Indicates representation of pictures")
-  ("VideoFullRange",                                  m_videoFullRangeFlag,                             false, "Indicates the black level and range of luma and chroma signals")
   ("ColourDescriptionPresent",                        m_colourDescriptionPresentFlag,                   false, "Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present")
   ("ColourPrimaries",                                 m_colourPrimaries,                                    2, "Indicates chromaticity coordinates of the source primaries")
   ("TransferCharacteristics",                         m_transferCharacteristics,                            2, "Indicates the opto-electronic transfer characteristics of the source")
@@ -1144,70 +1189,19 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("ChromaLocInfoPresent",                            m_chromaLocInfoPresentFlag,                       false, "Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present")
   ("ChromaSampleLocTypeTopField",                     m_chromaSampleLocTypeTopField,                        0, "Specifies the location of chroma samples for top field")
   ("ChromaSampleLocTypeBottomField",                  m_chromaSampleLocTypeBottomField,                     0, "Specifies the location of chroma samples for bottom field")
-  ("NeutralChromaIndication",                         m_neutralChromaIndicationFlag,                    false, "Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1)")
-  ("DefaultDisplayWindowFlag",                        m_defaultDisplayWindowFlag,                       false, "Indicates the presence of the Default Window parameters")
-  ("DefDispWinLeftOffset",                            m_defDispWinLeftOffset,                               0, "Specifies the left offset of the default display window from the conformance window")
-  ("DefDispWinRightOffset",                           m_defDispWinRightOffset,                              0, "Specifies the right offset of the default display window from the conformance window")
-  ("DefDispWinTopOffset",                             m_defDispWinTopOffset,                                0, "Specifies the top offset of the default display window from the conformance window")
-  ("DefDispWinBottomOffset",                          m_defDispWinBottomOffset,                             0, "Specifies the bottom offset of the default display window from the conformance window")
-  ("FrameFieldInfoPresentFlag",                       m_frameFieldInfoPresentFlag,                      false, "Indicates that pic_struct and field coding related values are present in picture timing SEI messages")
-  ("PocProportionalToTimingFlag",                     m_pocProportionalToTimingFlag,                    false, "Indicates that the POC value is proportional to the output time w.r.t. first picture in CVS")
-  ("NumTicksPocDiffOneMinus1",                        m_numTicksPocDiffOneMinus1,                           0, "Number of ticks minus 1 that for a POC difference of one")
-  ("BitstreamRestriction",                            m_bitstreamRestrictionFlag,                       false, "Signals whether bitstream restriction parameters are present")
-#if HEVC_TILES_WPP
-  ("TilesFixedStructure",                             m_tilesFixedStructureFlag,                        false, "Indicates that each active picture parameter set has the same values of the syntax elements related to tiles")
-#endif
-  ("MotionVectorsOverPicBoundaries",                  m_motionVectorsOverPicBoundariesFlag,             false, "Indicates that no samples outside the picture boundaries are used for inter prediction")
-  ("MaxBytesPerPicDenom",                             m_maxBytesPerPicDenom,                                2, "Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture")
-  ("MaxBitsPerMinCuDenom",                            m_maxBitsPerMinCuDenom,                               1, "Indicates an upper bound for the number of bits of coding_unit() data")
-  ("Log2MaxMvLengthHorizontal",                       m_log2MaxMvLengthHorizontal,                         15, "Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units")
-  ("Log2MaxMvLengthVertical",                         m_log2MaxMvLengthVertical,                           15, "Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units");
+  ("ChromaSampleLocType",                             m_chromaSampleLocType,                                0, "Specifies the location of chroma samples for progressive content")
+  ("OverscanInfoPresent",                             m_overscanInfoPresentFlag,                        false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n")
+  ("OverscanAppropriate",                             m_overscanAppropriateFlag,                        false, "Indicates whether conformant decoded pictures are suitable for display using overscan\n")
+  ("VideoFullRange",                                  m_videoFullRangeFlag,                             false, "Indicates the black level and range of luma and chroma signals");
   opts.addOptions()
+#if HEVC_SEI
   ("SEIColourRemappingInfoFileRoot,-cri",             m_colourRemapSEIFileRoot,                    string(""), "Colour Remapping Information SEI parameters root file name (wo num ext)")
   ("SEIRecoveryPoint",                                m_recoveryPointSEIEnabled,                        false, "Control generation of recovery point SEI messages")
+#endif
   ("SEIBufferingPeriod",                              m_bufferingPeriodSEIEnabled,                      false, "Control generation of buffering period SEI messages")
   ("SEIPictureTiming",                                m_pictureTimingSEIEnabled,                        false, "Control generation of picture timing SEI messages")
-  ("SEIToneMappingInfo",                              m_toneMappingInfoSEIEnabled,                      false, "Control generation of Tone Mapping SEI messages")
-  ("SEIToneMapId",                                    m_toneMapId,                                          0, "Specifies Id of Tone Mapping SEI message for a given session")
-  ("SEIToneMapCancelFlag",                            m_toneMapCancelFlag,                              false, "Indicates that Tone Mapping SEI message cancels the persistence or follows")
-  ("SEIToneMapPersistenceFlag",                       m_toneMapPersistenceFlag,                          true, "Specifies the persistence of the Tone Mapping SEI message")
-  ("SEIToneMapCodedDataBitDepth",                     m_toneMapCodedDataBitDepth,                           8, "Specifies Coded Data BitDepth of Tone Mapping SEI messages")
-  ("SEIToneMapTargetBitDepth",                        m_toneMapTargetBitDepth,                              8, "Specifies Output BitDepth of Tone mapping function")
-  ("SEIToneMapModelId",                               m_toneMapModelId,                                     0, "Specifies Model utilized for mapping coded data into target_bit_depth range\n"
-                                                                                                               "\t0:  linear mapping with clipping\n"
-                                                                                                               "\t1:  sigmoidal mapping\n"
-                                                                                                               "\t2:  user-defined table mapping\n"
-                                                                                                               "\t3:  piece-wise linear mapping\n"
-                                                                                                               "\t4:  luminance dynamic range information ")
-  ("SEIToneMapMinValue",                              m_toneMapMinValue,                                    0, "Specifies the minimum value in mode 0")
-  ("SEIToneMapMaxValue",                              m_toneMapMaxValue,                                 1023, "Specifies the maximum value in mode 0")
-  ("SEIToneMapSigmoidMidpoint",                       m_sigmoidMidpoint,                                  512, "Specifies the centre point in mode 1")
-  ("SEIToneMapSigmoidWidth",                          m_sigmoidWidth,                                     960, "Specifies the distance between 5% and 95% values of the target_bit_depth in mode 1")
-  ("SEIToneMapStartOfCodedInterval",                  cfg_startOfCodedInterval,      cfg_startOfCodedInterval, "Array of user-defined mapping table")
-  ("SEIToneMapNumPivots",                             m_numPivots,                                          0, "Specifies the number of pivot points in mode 3")
-  ("SEIToneMapCodedPivotValue",                       cfg_codedPivotValue,                cfg_codedPivotValue, "Array of pivot point")
-  ("SEIToneMapTargetPivotValue",                      cfg_targetPivotValue,              cfg_targetPivotValue, "Array of pivot point")
-  ("SEIToneMapCameraIsoSpeedIdc",                     m_cameraIsoSpeedIdc,                                  0, "Indicates the camera ISO speed for daylight illumination")
-  ("SEIToneMapCameraIsoSpeedValue",                   m_cameraIsoSpeedValue,                              400, "Specifies the camera ISO speed for daylight illumination of Extended_ISO")
-  ("SEIToneMapExposureIndexIdc",                      m_exposureIndexIdc,                                   0, "Indicates the exposure index setting of the camera")
-  ("SEIToneMapExposureIndexValue",                    m_exposureIndexValue,                               400, "Specifies the exposure index setting of the camera of Extended_ISO")
-  ("SEIToneMapExposureCompensationValueSignFlag",     m_exposureCompensationValueSignFlag,               false, "Specifies the sign of ExposureCompensationValue")
-  ("SEIToneMapExposureCompensationValueNumerator",    m_exposureCompensationValueNumerator,                 0, "Specifies the numerator of ExposureCompensationValue")
-  ("SEIToneMapExposureCompensationValueDenomIdc",     m_exposureCompensationValueDenomIdc,                  2, "Specifies the denominator of ExposureCompensationValue")
-  ("SEIToneMapRefScreenLuminanceWhite",               m_refScreenLuminanceWhite,                          350, "Specifies reference screen brightness setting in units of candela per square metre")
-  ("SEIToneMapExtendedRangeWhiteLevel",               m_extendedRangeWhiteLevel,                          800, "Indicates the luminance dynamic range")
-  ("SEIToneMapNominalBlackLevelLumaCodeValue",        m_nominalBlackLevelLumaCodeValue,                    16, "Specifies luma sample value of the nominal black level assigned decoded pictures")
-  ("SEIToneMapNominalWhiteLevelLumaCodeValue",        m_nominalWhiteLevelLumaCodeValue,                   235, "Specifies luma sample value of the nominal white level assigned decoded pictures")
-  ("SEIToneMapExtendedWhiteLevelLumaCodeValue",       m_extendedWhiteLevelLumaCodeValue,                  300, "Specifies luma sample value of the extended dynamic range assigned decoded pictures")
-  ("SEIChromaResamplingFilterHint",                   m_chromaResamplingFilterSEIenabled,               false, "Control generation of the chroma sampling filter hint SEI message")
-  ("SEIChromaResamplingHorizontalFilterType",         m_chromaResamplingHorFilterIdc,                       2, "Defines the Index of the chroma sampling horizontal filter\n"
-                                                                                                               "\t0: unspecified  - Chroma filter is unknown or is determined by the application"
-                                                                                                               "\t1: User-defined - Filter coefficients are specified in the chroma sampling filter hint SEI message"
-                                                                                                               "\t2: Standards-defined - ITU-T Rec. T.800 | ISO/IEC15444-1, 5/3 filter")
-  ("SEIChromaResamplingVerticalFilterType",           m_chromaResamplingVerFilterIdc,                         2, "Defines the Index of the chroma sampling vertical filter\n"
-                                                                                                               "\t0: unspecified  - Chroma filter is unknown or is determined by the application"
-                                                                                                               "\t1: User-defined - Filter coefficients are specified in the chroma sampling filter hint SEI message"
-                                                                                                               "\t2: Standards-defined - ITU-T Rec. T.800 | ISO/IEC15444-1, 5/3 filter")
+  ("SEIDecodingUnitInfo",                             m_decodingUnitInfoSEIEnabled,                     false, "Control generation of decoding unit information SEI message.")
+  ("SEIFrameFieldInfo",                               m_frameFieldInfoSEIEnabled,                       false, "Control generation of frame field information SEI messages")
   ("SEIFramePacking",                                 m_framePackingSEIEnabled,                         false, "Control generation of frame packing SEI messages")
   ("SEIFramePackingType",                             m_framePackingSEIType,                                0, "Define frame packing arrangement\n"
                                                                                                                "\t3: side by side - frames are displayed horizontally\n"
@@ -1219,52 +1213,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
                                                                                                                "\t0: unspecified\n"
                                                                                                                "\t1: stereo pair, frame0 represents left view\n"
                                                                                                                "\t2: stereo pair, frame0 represents right view")
-  ("SEISegmentedRectFramePacking",                    m_segmentedRectFramePackingSEIEnabled,            false, "Controls generation of segmented rectangular frame packing SEI messages")
-  ("SEISegmentedRectFramePackingCancel",              m_segmentedRectFramePackingSEICancel,             false, "If equal to 1, cancels the persistence of any previous SRFPA SEI message")
-  ("SEISegmentedRectFramePackingType",                m_segmentedRectFramePackingSEIType,                   0, "Specifies the arrangement of the frames in the reconstructed picture")
-  ("SEISegmentedRectFramePackingPersistence",         m_segmentedRectFramePackingSEIPersistence,        false, "If equal to 0, the SEI applies to the current frame only")
-  ("SEIDisplayOrientation",                           m_displayOrientationSEIAngle,                         0, "Control generation of display orientation SEI messages\n"
-                                                                                                               "\tN: 0 < N < (2^16 - 1) enable display orientation SEI message with anticlockwise_rotation = N and display_orientation_repetition_period = 1\n"
-                                                                                                               "\t0: disable")
-  ("SEITemporalLevel0Index",                          m_temporalLevel0IndexSEIEnabled,                  false, "Control generation of temporal level 0 index SEI messages")
-  ("SEIGradualDecodingRefreshInfo",                   m_gradualDecodingRefreshInfoEnabled,              false, "Control generation of gradual decoding refresh information SEI message")
-  ("SEINoDisplay",                                    m_noDisplaySEITLayer,                                 0, "Control generation of no display SEI message\n"
-                                                                                                               "\tN: 0 < N enable no display SEI message for temporal layer N or higher\n"
-                                                                                                               "\t0: disable")
-  ("SEIDecodingUnitInfo",                             m_decodingUnitInfoSEIEnabled,                     false, "Control generation of decoding unit information SEI message.")
-  ("SEISOPDescription",                               m_SOPDescriptionSEIEnabled,                       false, "Control generation of SOP description SEI messages")
-  ("SEIScalableNesting",                              m_scalableNestingSEIEnabled,                      false, "Control generation of scalable nesting SEI messages")
-#if HEVC_TILES_WPP
-  ("SEITempMotionConstrainedTileSets",                m_tmctsSEIEnabled,                                false, "Control generation of temporal motion constrained tile sets SEI message")
-#endif
-  ("SEITimeCodeEnabled",                              m_timeCodeSEIEnabled,                             false, "Control generation of time code information SEI message")
-  ("SEITimeCodeNumClockTs",                           m_timeCodeSEINumTs,                                   0, "Number of clock time sets [0..3]")
-  ("SEITimeCodeTimeStampFlag",                        cfg_timeCodeSeiTimeStampFlag,          cfg_timeCodeSeiTimeStampFlag,         "Time stamp flag associated to each time set")
-  ("SEITimeCodeFieldBasedFlag",                       cfg_timeCodeSeiNumUnitFieldBasedFlag,  cfg_timeCodeSeiNumUnitFieldBasedFlag, "Field based flag associated to each time set")
-  ("SEITimeCodeCountingType",                         cfg_timeCodeSeiCountingType,           cfg_timeCodeSeiCountingType,          "Counting type associated to each time set")
-  ("SEITimeCodeFullTsFlag",                           cfg_timeCodeSeiFullTimeStampFlag,      cfg_timeCodeSeiFullTimeStampFlag,     "Full time stamp flag associated to each time set")
-  ("SEITimeCodeDiscontinuityFlag",                    cfg_timeCodeSeiDiscontinuityFlag,      cfg_timeCodeSeiDiscontinuityFlag,     "Discontinuity flag associated to each time set")
-  ("SEITimeCodeCntDroppedFlag",                       cfg_timeCodeSeiCntDroppedFlag,         cfg_timeCodeSeiCntDroppedFlag,        "Counter dropped flag associated to each time set")
-  ("SEITimeCodeNumFrames",                            cfg_timeCodeSeiNumberOfFrames,         cfg_timeCodeSeiNumberOfFrames,        "Number of frames associated to each time set")
-  ("SEITimeCodeSecondsValue",                         cfg_timeCodeSeiSecondsValue,           cfg_timeCodeSeiSecondsValue,          "Seconds value for each time set")
-  ("SEITimeCodeMinutesValue",                         cfg_timeCodeSeiMinutesValue,           cfg_timeCodeSeiMinutesValue,          "Minutes value for each time set")
-  ("SEITimeCodeHoursValue",                           cfg_timeCodeSeiHoursValue,             cfg_timeCodeSeiHoursValue,            "Hours value for each time set")
-  ("SEITimeCodeSecondsFlag",                          cfg_timeCodeSeiSecondsFlag,            cfg_timeCodeSeiSecondsFlag,           "Flag to signal seconds value presence in each time set")
-  ("SEITimeCodeMinutesFlag",                          cfg_timeCodeSeiMinutesFlag,            cfg_timeCodeSeiMinutesFlag,           "Flag to signal minutes value presence in each time set")
-  ("SEITimeCodeHoursFlag",                            cfg_timeCodeSeiHoursFlag,              cfg_timeCodeSeiHoursFlag,             "Flag to signal hours value presence in each time set")
-  ("SEITimeCodeOffsetLength",                         cfg_timeCodeSeiTimeOffsetLength,       cfg_timeCodeSeiTimeOffsetLength,      "Time offset length associated to each time set")
-  ("SEITimeCodeTimeOffset",                           cfg_timeCodeSeiTimeOffsetValue,        cfg_timeCodeSeiTimeOffsetValue,       "Time offset associated to each time set")
-  ("SEIKneeFunctionInfo",                             m_kneeSEIEnabled,                                 false, "Control generation of Knee function SEI messages")
-  ("SEIKneeFunctionId",                               m_kneeSEIId,                                          0, "Specifies Id of Knee function SEI message for a given session")
-  ("SEIKneeFunctionCancelFlag",                       m_kneeSEICancelFlag,                              false, "Indicates that Knee function SEI message cancels the persistence or follows")
-  ("SEIKneeFunctionPersistenceFlag",                  m_kneeSEIPersistenceFlag,                          true, "Specifies the persistence of the Knee function SEI message")
-  ("SEIKneeFunctionInputDrange",                      m_kneeSEIInputDrange,                              1000, "Specifies the peak luminance level for the input picture of Knee function SEI messages")
-  ("SEIKneeFunctionInputDispLuminance",               m_kneeSEIInputDispLuminance,                        100, "Specifies the expected display brightness for the input picture of Knee function SEI messages")
-  ("SEIKneeFunctionOutputDrange",                     m_kneeSEIOutputDrange,                             4000, "Specifies the peak luminance level for the output picture of Knee function SEI messages")
-  ("SEIKneeFunctionOutputDispLuminance",              m_kneeSEIOutputDispLuminance,                       800, "Specifies the expected display brightness for the output picture of Knee function SEI messages")
-  ("SEIKneeFunctionNumKneePointsMinus1",              m_kneeSEINumKneePointsMinus1,                         2, "Specifies the number of knee points - 1")
-  ("SEIKneeFunctionInputKneePointValue",              cfg_kneeSEIInputKneePointValue,   cfg_kneeSEIInputKneePointValue, "Array of input knee point")
-  ("SEIKneeFunctionOutputKneePointValue",             cfg_kneeSEIOutputKneePointValue, cfg_kneeSEIOutputKneePointValue, "Array of output knee point")
+
   ("SEIMasteringDisplayColourVolume",                 m_masteringDisplay.colourVolumeSEIEnabled,         false, "Control generation of mastering display colour volume SEI messages")
   ("SEIMasteringDisplayMaxLuminance",                 m_masteringDisplay.maxLuminance,                  10000u, "Specifies the mastering display maximum luminance value in units of 1/10000 candela per square metre (32-bit code value)")
   ("SEIMasteringDisplayMinLuminance",                 m_masteringDisplay.minLuminance,                      0u, "Specifies the mastering display minimum luminance value in units of 1/10000 candela per square metre (32-bit code value)")
@@ -1273,14 +1222,124 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   ("SEIPreferredTransferCharacterisics",              m_preferredTransferCharacteristics,                   -1, "Value for the preferred_transfer_characteristics field of the Alternative transfer characteristics SEI which will override the corresponding entry in the VUI. If negative, do not produce the respective SEI message")
 #endif
-  ("SEIGreenMetadataType",                            m_greenMetadataType,                                  0u, "Value for the green_metadata_type specifies the type of metadata that is present in the SEI message. If green_metadata_type is 1, then metadata enabling quality recovery after low-power encoding is present")
-  ("SEIXSDMetricType",                                m_xsdMetricType,                                      0u, "Value for the xsd_metric_type indicates the type of the objective quality metric. PSNR is the only type currently supported")
+
+  ("SEIErpEnabled",                                   m_erpSEIEnabled,                                   false, "Control generation of equirectangular projection SEI messages")
+  ("SEIErpCancelFlag",                                m_erpSEICancelFlag,                                 true, "Indicate that equirectangular projection SEI message cancels the persistence or follows")
+  ("SEIErpPersistenceFlag",                           m_erpSEIPersistenceFlag,                           false, "Specifies the persistence of the equirectangular projection SEI messages")
+  ("SEIErpGuardBandFlag",                             m_erpSEIGuardBandFlag,                             false, "Indicate the existence of guard band areas in the constituent picture")
+  ("SEIErpGuardBandType",                             m_erpSEIGuardBandType,                                0u, "Indicate the type of the guard band")
+  ("SEIErpLeftGuardBandWidth",                        m_erpSEILeftGuardBandWidth,                           0u, "Indicate the width of the guard band on the left side of the constituent picture")
+  ("SEIErpRightGuardBandWidth",                       m_erpSEIRightGuardBandWidth,                          0u, "Indicate the width of the guard band on the right side of the constituent picture")
+  ("SEISphereRotationEnabled",                        m_sphereRotationSEIEnabled,                        false, "Control generation of sphere rotation SEI messages")
+  ("SEISphereRotationCancelFlag",                     m_sphereRotationSEICancelFlag,                      true, "Indicate that sphere rotation SEI message cancels the persistence or follows")
+  ("SEISphereRotationPersistenceFlag",                m_sphereRotationSEIPersistenceFlag,                false, "Specifies the persistence of the sphere rotation SEI messages")
+  ("SEISphereRotationYaw",                            m_sphereRotationSEIYaw,                                0, "Specifies the value of the yaw rotation angle")
+  ("SEISphereRotationPitch",                          m_sphereRotationSEIPitch,                              0, "Specifies the value of the pitch rotation angle")
+  ("SEISphereRotationRoll",                           m_sphereRotationSEIRoll,                               0, "Specifies the value of the roll rotation angle")
+  ("SEIOmniViewportEnabled",                          m_omniViewportSEIEnabled,                          false, "Control generation of omni viewport SEI messages")
+  ("SEIOmniViewportId",                               m_omniViewportSEIId,                                  0u, "An identifying number that may be used to identify the purpose of the one or more recommended viewport regions")
+  ("SEIOmniViewportCancelFlag",                       m_omniViewportSEICancelFlag,                        true, "Indicate that omni viewport SEI message cancels the persistence or follows")
+  ("SEIOmniViewportPersistenceFlag",                  m_omniViewportSEIPersistenceFlag,                  false, "Specifies the persistence of the omni viewport SEI messages")
+  ("SEIOmniViewportCntMinus1",                        m_omniViewportSEICntMinus1,                           0u, "specifies the number of recommended viewport regions minus 1")
+  ("SEIOmniViewportAzimuthCentre",                    cfg_omniViewportSEIAzimuthCentre,     cfg_omniViewportSEIAzimuthCentre,     "Indicate the centre of the i-th recommended viewport region")
+  ("SEIOmniViewportElevationCentre",                  cfg_omniViewportSEIElevationCentre,   cfg_omniViewportSEIElevationCentre,   "Indicate the centre of the i-th recommended viewport region")
+  ("SEIOmniViewportTiltCentre",                       cfg_omniViewportSEITiltCentre,        cfg_omniViewportSEITiltCentre,        "Indicates the tilt angle of the i-th recommended viewport region")
+  ("SEIOmniViewportHorRange",                         cfg_omniViewportSEIHorRange,          cfg_omniViewportSEIHorRange,          "Indicates the azimuth range of the i-th recommended viewport region")
+  ("SEIOmniViewportVerRange",                         cfg_omniViewportSEIVerRange,          cfg_omniViewportSEIVerRange,          "Indicates the elevation range of the i-th recommended viewport region")
+  ("SEIRwpEnabled",                                   m_rwpSEIEnabled,                          false,                                    "Controls if region-wise packing SEI message enabled")
+  ("SEIRwpCancelFlag",                                m_rwpSEIRwpCancelFlag,                    true,                                    "Specifies the persistence of any previous region-wise packing SEI message in output order.")
+  ("SEIRwpPersistenceFlag",                           m_rwpSEIRwpPersistenceFlag,               false,                                    "Specifies the persistence of the region-wise packing SEI message for the current layer.")
+  ("SEIRwpConstituentPictureMatchingFlag",            m_rwpSEIConstituentPictureMatchingFlag,   false,                                    "Specifies the information in the SEI message apply individually to each constituent picture or to the projected picture.")
+  ("SEIRwpNumPackedRegions",                          m_rwpSEINumPackedRegions,                 0,                                        "specifies the number of packed regions when constituent picture matching flag is equal to 0.")
+  ("SEIRwpProjPictureWidth",                          m_rwpSEIProjPictureWidth,                 0,                                        "Specifies the width of the projected picture.")
+  ("SEIRwpProjPictureHeight",                         m_rwpSEIProjPictureHeight,                0,                                        "Specifies the height of the projected picture.")
+  ("SEIRwpPackedPictureWidth",                        m_rwpSEIPackedPictureWidth,               0,                                        "specifies the width of the packed picture.")
+  ("SEIRwpPackedPictureHeight",                       m_rwpSEIPackedPictureHeight,              0,                                        "Specifies the height of the packed picture.")
+  ("SEIRwpTransformType",                             cfg_rwpSEIRwpTransformType,               cfg_rwpSEIRwpTransformType,               "specifies the rotation and mirroring to be applied to the i-th packed region.")
+  ("SEIRwpGuardBandFlag",                             cfg_rwpSEIRwpGuardBandFlag,               cfg_rwpSEIRwpGuardBandFlag,               "specifies the existence of guard band in the i-th packed region.")
+  ("SEIRwpProjRegionWidth",                           cfg_rwpSEIProjRegionWidth,                cfg_rwpSEIProjRegionWidth,                "specifies the width of the i-th projected region.")
+  ("SEIRwpProjRegionHeight",                          cfg_rwpSEIProjRegionHeight,               cfg_rwpSEIProjRegionHeight,               "specifies the height of the i-th projected region.")
+  ("SEIRwpProjRegionTop",                             cfg_rwpSEIRwpSEIProjRegionTop,            cfg_rwpSEIRwpSEIProjRegionTop,            "specifies the top sample row of the i-th projected region.")
+  ("SEIRwpProjRegionLeft",                            cfg_rwpSEIProjRegionLeft,                 cfg_rwpSEIProjRegionLeft,                 "specifies the left-most sample column of the i-th projected region.")
+  ("SEIRwpPackedRegionWidth",                         cfg_rwpSEIPackedRegionWidth,              cfg_rwpSEIPackedRegionWidth,              "specifies the width of the i-th packed region.")
+  ("SEIRwpPackedRegionHeight",                        cfg_rwpSEIPackedRegionHeight,             cfg_rwpSEIPackedRegionHeight,             "specifies the height of the i-th packed region.")
+  ("SEIRwpPackedRegionTop",                           cfg_rwpSEIPackedRegionTop,                cfg_rwpSEIPackedRegionTop,                "specifies the top luma sample row of the i-th packed region.")
+  ("SEIRwpPackedRegionLeft",                          cfg_rwpSEIPackedRegionLeft,               cfg_rwpSEIPackedRegionLeft,               "specifies the left-most luma sample column of the i-th packed region.")
+  ("SEIRwpLeftGuardBandWidth",                        cfg_rwpSEIRwpLeftGuardBandWidth,          cfg_rwpSEIRwpLeftGuardBandWidth,          "specifies the width of the guard band on the left side of the i-th packed region.")
+  ("SEIRwpRightGuardBandWidth",                       cfg_rwpSEIRwpRightGuardBandWidth,         cfg_rwpSEIRwpRightGuardBandWidth,         "specifies the width of the guard band on the right side of the i-th packed region.")
+  ("SEIRwpTopGuardBandHeight",                        cfg_rwpSEIRwpTopGuardBandHeight,          cfg_rwpSEIRwpTopGuardBandHeight,          "specifies the height of the guard band above the i-th packed region.")
+  ("SEIRwpBottomGuardBandHeight",                     cfg_rwpSEIRwpBottomGuardBandHeight,       cfg_rwpSEIRwpBottomGuardBandHeight,       "specifies the height of the guard band below the i-th packed region.")
+  ("SEIRwpGuardBandNotUsedForPredFlag",               cfg_rwpSEIRwpGuardBandNotUsedForPredFlag, cfg_rwpSEIRwpGuardBandNotUsedForPredFlag, "Specifies if the guard bands is used in the inter prediction process.")
+  ("SEIRwpGuardBandType",                             cfg_rwpSEIRwpGuardBandType,               cfg_rwpSEIRwpGuardBandType,               "Specifies the type of the guard bands for the i-th packed region.")
+  ("SEIGcmpEnabled",                                  m_gcmpSEIEnabled,                         false,                                    "Control generation of generalized cubemap projection SEI messages")
+  ("SEIGcmpCancelFlag",                               m_gcmpSEICancelFlag,                      true,                                     "Indicate that generalized cubemap projection SEI message cancels the persistence or follows")
+  ("SEIGcmpPersistenceFlag",                          m_gcmpSEIPersistenceFlag,                 false,                                    "Specifies the persistence of the generalized cubemap projection SEI messages")
+  ("SEIGcmpPackingType",                              m_gcmpSEIPackingType,                     0u,                                       "Specifies the packing type")
+  ("SEIGcmpMappingFunctionType",                      m_gcmpSEIMappingFunctionType,             0u,                                       "Specifies the mapping function used to adjust the sample locations of the cubemap projection")
+  ("SEIGcmpFaceIndex",                                cfg_gcmpSEIFaceIndex,                     cfg_gcmpSEIFaceIndex,                     "Specifies the face index for the i-th face")
+  ("SEIGcmpFaceRotation",                             cfg_gcmpSEIFaceRotation,                  cfg_gcmpSEIFaceRotation,                  "Specifies the rotation to be applied to the i-th face")
+  ("SEIGcmpFunctionCoeffU",                           cfg_gcmpSEIFunctionCoeffU,                cfg_gcmpSEIFunctionCoeffU,                "Specifies the coefficient used in the cubemap mapping function of the u-axis of the i-th face")
+  ("SEIGcmpFunctionUAffectedByVFlag",                 cfg_gcmpSEIFunctionUAffectedByVFlag,      cfg_gcmpSEIFunctionUAffectedByVFlag,      "Specifies whether the cubemap mapping function of the u-axis refers to the v position of the sample location")
+  ("SEIGcmpFunctionCoeffV",                           cfg_gcmpSEIFunctionCoeffV,                cfg_gcmpSEIFunctionCoeffV,                "Specifies the coefficient used in the cubemap mapping function of the v-axis of the i-th face")
+  ("SEIGcmpFunctionVAffectedByUFlag",                 cfg_gcmpSEIFunctionVAffectedByUFlag,      cfg_gcmpSEIFunctionVAffectedByUFlag,      "Specifies whether the cubemap mapping function of the v-axis refers to the u position of the sample location")
+  ("SEIGcmpGuardBandFlag",                            m_gcmpSEIGuardBandFlag,                   false,                                    "Indicate the existence of guard band areas in the picture")
+  ("SEIGcmpGuardBandBoundaryType",                    m_gcmpSEIGuardBandBoundaryType,           false,                                    "Indicate which face boundaries contain guard bands")
+  ("SEIGcmpGuardBandSamplesMinus1",                   m_gcmpSEIGuardBandSamplesMinus1,          0u,                                       "Specifies the number of guard band samples minus1 used in the cubemap projected picture")
+  ("SEISubpicureLevelInfo",                           m_subpicureLevelInfoSEIEnabled,           false, "Control generation of Subpicture Level Information SEI messages")
+  ("SEISampleAspectRatioInfo",                        m_sampleAspectRatioInfoSEIEnabled,        false, "Control generation of Sample Aspect Ratio Information SEI messages")
+  ("SEISARICancelFlag",                               m_sariCancelFlag,                         false, "Indicates that Sample Aspect Ratio Information SEI message cancels the persistence or follows")
+  ("SEISARIPersistenceFlag",                          m_sariPersistenceFlag,                    true, "Specifies the persistence of the Sample Aspect Ratio Information SEI message")
+  ("SEISARIAspectRatioIdc",                           m_sariAspectRatioIdc,                     0, "Specifies the Sample Aspect Ratio IDC of Sample Aspect Ratio Information SEI messages")
+  ("SEISARISarWidth",                                 m_sariSarWidth,                           0, "Specifies the Sample Aspect Ratio Width of Sample Aspect Ratio Information SEI messages, if extended SAR is chosen.")
+  ("SEISARISarHeight",                                m_sariSarHeight,                          0, "Specifies the Sample Aspect Ratio Height of Sample Aspect Ratio Information SEI messages, if extended SAR is chosen.")
   ("MCTSEncConstraint",                               m_MCTSEncConstraint,                               false, "For MCTS, constrain motion vectors at tile boundaries")
 #if ENABLE_TRACING
   ("TraceChannelsList",                               bTracingChannelsList,                              false, "List all available tracing channels")
   ("TraceRule",                                       sTracingRule,                               string( "" ), "Tracing rule (ex: \"D_CABAC:poc==8\" or \"D_REC_CB_LUMA:poc==8\")")
   ("TraceFile",                                       sTracingFile,                               string( "" ), "Tracing file")
 #endif
+// film grain characteristics SEI
+  ("SEIFGCEnabled",                                   m_fgcSEIEnabled,                                   false, "Control generation of the film grain characteristics SEI message")
+  ("SEIFGCCancelFlag",                                m_fgcSEICancelFlag,                                 true, "Specifies the persistence of any previous film grain characteristics SEI message in output order.")
+  ("SEIFGCPersistenceFlag",                           m_fgcSEIPersistenceFlag,                           false, "Specifies the persistence of the film grain characteristics SEI message for the current layer.")
+  ("SEIFGCModelID",                                   m_fgcSEIModelID,                                      0u, "Specifies the film grain simulation model. 0: frequency filtering; 1: auto-regression.")
+  ("SEIFGCSepColourDescPresentFlag",                  m_fgcSEISepColourDescPresentFlag,                  false, "Specifies the presence of a distinct colour space description for the film grain characteristics specified in the SEI message.")
+  ("SEIFGCBlendingModeID",                            m_fgcSEIBlendingModeID,                               0u, "Specifies the blending mode used to blend the simulated film grain with the decoded images. 0: additive; 1: multiplicative.")
+  ("SEIFGCLog2ScaleFactor",                           m_fgcSEILog2ScaleFactor,                              0u, "Specifies a scale factor used in the film grain characterization equations.")
+  ("SEIFGCCompModelPresentComp0",                     m_fgcSEICompModelPresent[0],                       false, "Specifies the presence of film grain modelling on colour component 0.")
+  ("SEIFGCCompModelPresentComp1",                     m_fgcSEICompModelPresent[1],                       false, "Specifies the presence of film grain modelling on colour component 1.")
+  ("SEIFGCCompModelPresentComp2",                     m_fgcSEICompModelPresent[2],                       false, "Specifies the presence of film grain modelling on colour component 2.")
+
+// content light level SEI
+  ("SEICLLEnabled",                                   m_cllSEIEnabled,                                   false, "Control generation of the content light level SEI message")
+  ("SEICLLMaxContentLightLevel",                      m_cllSEIMaxContentLevel,                              0u, "When not equal to 0, specifies an upper bound on the maximum light level among all individual samples in a 4:4:4 representation "
+                                                                                                                "of red, green, and blue colour primary intensities in the linear light domain for the pictures of the CLVS, "
+                                                                                                                "in units of candelas per square metre.When equal to 0, no such upper bound is indicated.")
+  ("SEICLLMaxPicAvgLightLevel",                       m_cllSEIMaxPicAvgLevel,                               0u, "When not equal to 0, specifies an upper bound on the maximum average light level among the samples in a 4:4:4 representation "
+                                                                                                                "of red, green, and blue colour primary intensities in the linear light domain for any individual picture of the CLVS, "
+                                                                                                                "in units of candelas per square metre.When equal to 0, no such upper bound is indicated.")
+// ambient viewing environment SEI
+  ("SEIAVEEnabled",                                   m_aveSEIEnabled,                                   false, "Control generation of the ambient viewing environment SEI message")
+  ("SEIAVEAmbientIlluminance",                        m_aveSEIAmbientIlluminance,                      100000u, "Specifies the environmental illluminance of the ambient viewing environment in units of 1/10000 lux for the ambient viewing environment SEI message")
+  ("SEIAVEAmbientLightX",                             m_aveSEIAmbientLightX,                            15635u, "Specifies the normalized x chromaticity coordinate of the environmental ambient light in the nominal viewing enviornment according to the CIE 1931 definition in units of 1/50000 lux for the ambient viewing enviornment SEI message")
+  ("SEIAVEAmbientLightY",                             m_aveSEIAmbientLightY,                            16450u, "Specifies the normalized y chromaticity coordinate of the environmental ambient light in the nominal viewing enviornment according to the CIE 1931 definition in units of 1/50000 lux for the ambient viewing enviornment SEI message")
+// content colour volume SEI
+  ("SEICCVEnabled",                                   m_ccvSEIEnabled,                                   false, "Control generation of the Content Colour Volume SEI message")
+  ("SEICCVCancelFlag",                                m_ccvSEICancelFlag,                                 true, "Specifies the persistence of any previous content colour volume SEI message in output order.")
+  ("SEICCVPersistenceFlag",                           m_ccvSEIPersistenceFlag,                           false, "Specifies the persistence of the content colour volume SEI message for the current layer.")
+  ("SEICCVPrimariesPresent",                          m_ccvSEIPrimariesPresentFlag,                       true, "Specifies whether the CCV primaries are present in the content colour volume SEI message.")
+  ("m_ccvSEIPrimariesX0",                             m_ccvSEIPrimariesX[0],                             0.300, "Specifies the x coordinate of the first (green) primary for the content colour volume SEI message")
+  ("m_ccvSEIPrimariesY0",                             m_ccvSEIPrimariesY[0],                             0.600, "Specifies the y coordinate of the first (green) primary for the content colour volume SEI message")
+  ("m_ccvSEIPrimariesX1",                             m_ccvSEIPrimariesX[1],                             0.150, "Specifies the x coordinate of the second (blue) primary for the content colour volume SEI message")
+  ("m_ccvSEIPrimariesY1",                             m_ccvSEIPrimariesY[1],                             0.060, "Specifies the y coordinate of the second (blue) primary for the content colour volume SEI message")
+  ("m_ccvSEIPrimariesX2",                             m_ccvSEIPrimariesX[2],                             0.640, "Specifies the x coordinate of the third (red) primary for the content colour volume SEI message")
+  ("m_ccvSEIPrimariesY2",                             m_ccvSEIPrimariesY[2],                             0.330, "Specifies the y coordinate of the third (red) primary for the content colour volume SEI message")
+  ("SEICCVMinLuminanceValuePresent",                  m_ccvSEIMinLuminanceValuePresentFlag,               true, "Specifies whether the CCV min luminance value is present in the content colour volume SEI message")
+  ("SEICCVMinLuminanceValue",                         m_ccvSEIMinLuminanceValue,                           0.0, "specifies the CCV min luminance value  in the content colour volume SEI message")
+  ("SEICCVMaxLuminanceValuePresent",                  m_ccvSEIMaxLuminanceValuePresentFlag,               true, "Specifies whether the CCV max luminance value is present in the content colour volume SEI message")
+  ("SEICCVMaxLuminanceValue",                         m_ccvSEIMaxLuminanceValue,                           0.1, "specifies the CCV max luminance value  in the content colour volume SEI message")
+  ("SEICCVAvgLuminanceValuePresent",                  m_ccvSEIAvgLuminanceValuePresentFlag,               true, "Specifies whether the CCV avg luminance value is present in the content colour volume SEI message")
+  ("SEICCVAvgLuminanceValue",                         m_ccvSEIAvgLuminanceValue,                          0.01, "specifies the CCV avg luminance value  in the content colour volume SEI message")
 
   ("DebugBitstream",                                  m_decodeBitstreams[0],             string( "" ), "Assume the frames up to POC DebugPOC will be the same as in this bitstream. Load those frames from the bitstream instead of encoding them." )
   ("DebugPOC",                                        m_switchPOC,                                 -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC, return to normal encoding." )
@@ -1297,14 +1356,35 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("NumWppThreads",                                   m_numWppThreads,                              1, "Number of threads used to run WPP-style parallelization")
   ("NumWppExtraLines",                                m_numWppExtraLines,                           0, "Number of additional wpp lines to switch when threads are blocked")
   ("DebugCTU",                                        m_debugCTU,                                  -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC-frame at CTUline containin debug CTU.")
-#if ENABLE_WPP_PARALLELISM
-  ("EnsureWppBitEqual",                               m_ensureWppBitEqual,                       true, "Ensure the results are equal to results with WPP-style parallelism, even if WPP is off")
-#else
   ("EnsureWppBitEqual",                               m_ensureWppBitEqual,                      false, "Ensure the results are equal to results with WPP-style parallelism, even if WPP is off")
-#endif
   ( "ALF",                                             m_alf,                                    true, "Adpative Loop Filter\n" )
+  ( "ScalingRatioHor",                                m_scalingRatioHor,                          1.0, "Scaling ratio in hor direction" )
+  ( "ScalingRatioVer",                                m_scalingRatioVer,                          1.0, "Scaling ratio in ver direction" )
+  ( "FractionNumFrames",                              m_fractionOfFrames,                         1.0, "Encode a fraction of the specified in FramesToBeEncoded frames" )
+  ( "SwitchPocPeriod",                                m_switchPocPeriod,                            0, "Switch POC period for RPR" )
+  ( "UpscaledOutput",                                 m_upscaledOutput,                             0, "Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR" )
+  ( "MaxLayers",                                      m_maxLayers,                                  1, "Max number of layers" )
+
+  ;
+  opts.addOptions()
+  ( "MaxSublayers",                                   m_maxSublayers,                               1, "Max number of Sublayers")
+  ( "AllLayersSameNumSublayersFlag",                  m_allLayersSameNumSublayersFlag,           true, "All layers same num sublayersflag")
+  ( "AllIndependentLayersFlag",                       m_allIndependentLayersFlag,                true, "All layers are independent layer")
+  ( "LayerId%d",                                      m_layerId,                    0, MAX_VPS_LAYERS, "Max number of Sublayers")
+  ( "NumRefLayers%d",                                 m_numRefLayers,               0, MAX_VPS_LAYERS, "Number of direct reference layer index of i-th layer")
+  ( "RefLayerIdx%d",                                  m_refLayerIdxStr,    string(""), MAX_VPS_LAYERS, "Reference layer index(es)")
+  ( "EachLayerIsAnOlsFlag",                           m_eachLayerIsAnOlsFlag,                    true, "Each layer is an OLS layer flag")
+  ( "OlsModeIdc",                                     m_olsModeIdc,                                 0, "Output layer set mode")
+  ( "NumOutputLayerSets",                             m_numOutputLayerSets,                         1, "Number of output layer sets")
+  ( "OlsOutputLayer%d",                               m_olsOutputLayerStr, string(""), MAX_VPS_LAYERS, "Output layer index of i-th OLS")
     ;
 
+  opts.addOptions()
+    ("TemporalFilter",                                m_gopBasedTemporalFilterEnabled,          false,            "Enable GOP based temporal filter. Disabled per default")
+    ("TemporalFilterFutureReference",                 m_gopBasedTemporalFilterFutureReference,   true,            "Enable referencing of future frames in the GOP based temporal filter. This is typically disabled for Low Delay configurations.")
+    ("TemporalFilterStrengthFrame*",                  m_gopBasedTemporalFilterStrengths, std::map<int, double>(), "Strength for every * frame in GOP based temporal filter, where * is an integer."
+                                                                                                                  " E.g. --TemporalFilterStrengthFrame8 0.95 will enable GOP based temporal filter at every 8th frame with strength 0.95");
+
 #if EXTENSION_360_VIDEO
   TExt360AppEncCfg::TExt360AppEncCfgContext ext360CfgContext;
   m_ext360.addOptions(opts, ext360CfgContext);
@@ -1316,19 +1396,96 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     cOSS<<"Frame"<<i;
     opts.addOptions()(cOSS.str(), m_GOPList[i-1], GOPEntry());
   }
+
   po::setDefaults(opts);
   po::ErrorReporter err;
   const list<const char*>& argv_unhandled = po::scanArgv(opts, argc, (const char**) argv, err);
 
+  m_rprEnabled = m_scalingRatioHor != 1.0 || m_scalingRatioVer != 1.0;
+  if( m_fractionOfFrames != 1.0 )
+  {
+    m_framesToBeEncoded = int( m_framesToBeEncoded * m_fractionOfFrames );
+  }
+
+  if( m_rprEnabled && !m_switchPocPeriod )
+  {
+    m_switchPocPeriod = m_iFrameRate / 2 / m_iGOPSize * m_iGOPSize;
+  }
+  m_bpDeltasGOPStructure = false;
+  if(m_iGOPSize == 16)
+  {
+    if ((m_GOPList[0].m_POC == 16 && m_GOPList[0].m_temporalId == 0 )
+        && (m_GOPList[1].m_POC == 8 && m_GOPList[1].m_temporalId == 1 )
+        && (m_GOPList[2].m_POC == 4 && m_GOPList[2].m_temporalId == 2 )
+        && (m_GOPList[3].m_POC == 2 && m_GOPList[3].m_temporalId == 3 )
+        && (m_GOPList[4].m_POC == 1 && m_GOPList[4].m_temporalId == 4 )
+        && (m_GOPList[5].m_POC == 3 && m_GOPList[5].m_temporalId == 4 )
+        && (m_GOPList[6].m_POC == 6 && m_GOPList[6].m_temporalId == 3 )
+        && (m_GOPList[7].m_POC == 5 && m_GOPList[7].m_temporalId == 4 )
+        && (m_GOPList[8].m_POC == 7 && m_GOPList[8].m_temporalId == 4 )
+        && (m_GOPList[9].m_POC == 12 && m_GOPList[9].m_temporalId == 2 )
+        && (m_GOPList[10].m_POC == 10 && m_GOPList[10].m_temporalId == 3 )
+        && (m_GOPList[11].m_POC == 9 && m_GOPList[11].m_temporalId == 4 )
+        && (m_GOPList[12].m_POC == 11 && m_GOPList[12].m_temporalId == 4 )
+        && (m_GOPList[13].m_POC == 14 && m_GOPList[13].m_temporalId == 3 )
+        && (m_GOPList[14].m_POC == 13 && m_GOPList[14].m_temporalId == 4 )
+        && (m_GOPList[15].m_POC == 15 && m_GOPList[15].m_temporalId == 4 ))
+    {
+      m_bpDeltasGOPStructure = true;
+    }
+  }
+  else if(m_iGOPSize == 8)
+  {
+    if ((m_GOPList[0].m_POC == 8 && m_GOPList[0].m_temporalId == 0 )
+        && (m_GOPList[1].m_POC == 4 && m_GOPList[1].m_temporalId == 1 )
+        && (m_GOPList[2].m_POC == 2 && m_GOPList[2].m_temporalId == 2 )
+        && (m_GOPList[3].m_POC == 1 && m_GOPList[3].m_temporalId == 3 )
+        && (m_GOPList[4].m_POC == 3 && m_GOPList[4].m_temporalId == 3 )
+        && (m_GOPList[5].m_POC == 6 && m_GOPList[5].m_temporalId == 2 )
+        && (m_GOPList[6].m_POC == 5 && m_GOPList[6].m_temporalId == 3 )
+        && (m_GOPList[7].m_POC == 7 && m_GOPList[7].m_temporalId == 3 ))
+    {
+      m_bpDeltasGOPStructure = true;
+    }
+  }
+  else
+  {
+    m_bpDeltasGOPStructure = false;
+  }
+  for (int i = 0; m_GOPList[i].m_POC != -1 && i < MAX_GOP + 1; i++)
+  {
+    m_RPLList0[i].m_POC = m_RPLList1[i].m_POC = m_GOPList[i].m_POC;
+    m_RPLList0[i].m_temporalId = m_RPLList1[i].m_temporalId = m_GOPList[i].m_temporalId;
+    m_RPLList0[i].m_refPic = m_RPLList1[i].m_refPic = m_GOPList[i].m_refPic;
+    m_RPLList0[i].m_sliceType = m_RPLList1[i].m_sliceType = m_GOPList[i].m_sliceType;
+    m_RPLList0[i].m_isEncoded = m_RPLList1[i].m_isEncoded = m_GOPList[i].m_isEncoded;
+
+    m_RPLList0[i].m_numRefPicsActive = m_GOPList[i].m_numRefPicsActive0;
+    m_RPLList1[i].m_numRefPicsActive = m_GOPList[i].m_numRefPicsActive1;
+    m_RPLList0[i].m_numRefPics = m_GOPList[i].m_numRefPics0;
+    m_RPLList1[i].m_numRefPics = m_GOPList[i].m_numRefPics1;
+    m_RPLList0[i].m_ltrp_in_slice_header_flag = m_GOPList[i].m_ltrp_in_slice_header_flag;
+    m_RPLList1[i].m_ltrp_in_slice_header_flag = m_GOPList[i].m_ltrp_in_slice_header_flag;
+    for (int j = 0; j < m_GOPList[i].m_numRefPics0; j++)
+      m_RPLList0[i].m_deltaRefPics[j] = m_GOPList[i].m_deltaRefPics0[j];
+    for (int j = 0; j < m_GOPList[i].m_numRefPics1; j++)
+      m_RPLList1[i].m_deltaRefPics[j] = m_GOPList[i].m_deltaRefPics1[j];
+  }
+
   if (m_compositeRefEnabled)
   {
     for (int i = 0; i < m_iGOPSize; i++)
     {
       m_GOPList[i].m_POC *= 2;
-      m_GOPList[i].m_deltaRPS *= 2;
-      for (int j = 0; j < m_GOPList[i].m_numRefPics; j++)
+      m_RPLList0[i].m_POC *= 2;
+      m_RPLList1[i].m_POC *= 2;
+      for (int j = 0; j < m_RPLList0[i].m_numRefPics; j++)
       {
-        m_GOPList[i].m_referencePics[j] *= 2;
+        m_RPLList0[i].m_deltaRefPics[j] *= 2;
+      }
+      for (int j = 0; j < m_RPLList1[i].m_numRefPics; j++)
+      {
+        m_RPLList1[i].m_deltaRefPics[j] *= 2;
       }
     }
   }
@@ -1371,6 +1528,12 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     inputPathPrefix += "/";
   }
   m_inputFileName   = inputPathPrefix + m_inputFileName;
+
+  if( m_temporalSubsampleRatio < 1)
+  {
+    EXIT ( "Error: TemporalSubsampleRatio must be greater than 0" );
+  }
+
   m_framesToBeEncoded = ( m_framesToBeEncoded + m_temporalSubsampleRatio - 1 ) / m_temporalSubsampleRatio;
   m_adIntraLambdaModifier = cfg_adIntraLambdaModifier.values;
   if(m_isField)
@@ -1382,57 +1545,89 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     //number of fields to encode
     m_framesToBeEncoded *= 2;
   }
-
-#if HEVC_TILES_WPP
-  if( !m_tileUniformSpacingFlag && m_numTileColumnsMinus1 > 0 )
+  if ( m_subPicPresentFlag )
   {
-    if (cfg_ColumnWidth.values.size() > m_numTileColumnsMinus1)
-    {
-      EXIT( "Error: The number of columns whose width are defined is larger than the allowed number of columns." );
-    }
-    else if (cfg_ColumnWidth.values.size() < m_numTileColumnsMinus1)
+    CHECK( m_numSubPics > 255 || m_numSubPics < 1, "Number of subpicture must be within 1 to 255" );
+    m_subPicCtuTopLeftX                 = cfg_subPicCtuTopLeftX.values;
+    m_subPicCtuTopLeftY                 = cfg_subPicCtuTopLeftY.values;
+    m_subPicWidth                       = cfg_subPicWidth.values;
+    m_subPicHeight                      = cfg_subPicHeight.values;
+    m_subPicTreatedAsPicFlag            = cfg_subPicTreatedAsPicFlag.values;
+    m_loopFilterAcrossSubpicEnabledFlag = cfg_loopFilterAcrossSubpicEnabledFlag.values;
+    m_subPicId                          = cfg_subPicId.values;
+    for(int i = 0; i < m_numSubPics; i++)
     {
-      EXIT( "Error: The width of some columns is not defined." );
+      CHECK(m_subPicCtuTopLeftX[i] + m_subPicWidth[i] > (m_iSourceWidth + m_uiCTUSize - 1) / m_uiCTUSize, "subpicture must not exceed picture boundary");
+      CHECK(m_subPicCtuTopLeftY[i] + m_subPicHeight[i] > (m_iSourceHeight + m_uiCTUSize - 1) / m_uiCTUSize, "subpicture must not exceed picture boundary");
     }
-    else
+    if (m_subPicIdPresentFlag) 
     {
-      m_tileColumnWidth.resize(m_numTileColumnsMinus1);
-      for(uint32_t i=0; i<cfg_ColumnWidth.values.size(); i++)
+      if (m_subPicIdSignallingPresentFlag) 
       {
-        m_tileColumnWidth[i]=cfg_ColumnWidth.values[i];
+        CHECK( m_subPicIdLen > 16, "sibpic ID length must not exceed 16 bits" );
       }
     }
   }
-  else
+  if( m_picPartitionFlag ) 
   {
-    m_tileColumnWidth.clear();
-  }
+    // store tile column widths
+    m_tileColumnWidth.resize(cfgTileColumnWidth.values.size());
+    for(uint32_t i=0; i<cfgTileColumnWidth.values.size(); i++)
+    {
+      m_tileColumnWidth[i]=cfgTileColumnWidth.values[i];
+    }
 
-  if( !m_tileUniformSpacingFlag && m_numTileRowsMinus1 > 0 )
-  {
-    if (cfg_RowHeight.values.size() > m_numTileRowsMinus1)
+    // store tile row heights
+    m_tileRowHeight.resize(cfgTileRowHeight.values.size());
+    for(uint32_t i=0; i<cfgTileRowHeight.values.size(); i++)
     {
-      EXIT( "Error: The number of rows whose height are defined is larger than the allowed number of rows." );
+      m_tileRowHeight[i]=cfgTileRowHeight.values[i];
     }
-    else if (cfg_RowHeight.values.size() < m_numTileRowsMinus1)
+
+    // store rectangular slice positions
+    if( !m_rasterSliceFlag ) 
     {
-      EXIT( "Error: The height of some rows is not defined." );
+      m_rectSlicePos.resize(cfgRectSlicePos.values.size());
+      for(uint32_t i=0; i<cfgRectSlicePos.values.size(); i++)
+      {
+        m_rectSlicePos[i]=cfgRectSlicePos.values[i];
+      }
     }
-    else
+
+    // store raster-scan slice sizes
+    else 
     {
-      m_tileRowHeight.resize(m_numTileRowsMinus1);
-      for(uint32_t i=0; i<cfg_RowHeight.values.size(); i++)
+      m_rasterSliceSize.resize(cfgRasterSliceSize.values.size());
+      for(uint32_t i=0; i<cfgRasterSliceSize.values.size(); i++)
       {
-        m_tileRowHeight[i]=cfg_RowHeight.values[i];
+        m_rasterSliceSize[i]=cfgRasterSliceSize.values[i];
       }
     }
   }
-  else
+  else 
   {
+    m_tileColumnWidth.clear();
     m_tileRowHeight.clear();
+    m_rectSlicePos.clear();
+    m_rasterSliceSize.clear();
+    m_rectSliceFixedWidth = 0;
+    m_rectSliceFixedHeight = 0;
   }
-#endif
 
+  m_numSubProfile = (uint8_t) cfg_SubProfile.values.size();
+  m_subProfile.resize(m_numSubProfile);
+  for (uint8_t i = 0; i < m_numSubProfile; ++i)
+  {
+    m_subProfile[i] = cfg_SubProfile.values[i];
+  }
+  if (m_subPicPartitionFlag)
+  {
+    m_singleSlicePerSubPicFlag = false;
+  }
+  else
+  {
+    m_singleSlicePerSubPicFlag = true;
+  }
   /* rules for input, output and internal bitdepths as per help text */
   if (m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA  ] == 0)
   {
@@ -1446,10 +1641,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   {
     m_internalBitDepth   [CHANNEL_TYPE_LUMA  ] = m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA  ];
   }
-  if (m_internalBitDepth   [CHANNEL_TYPE_CHROMA] == 0)
-  {
     m_internalBitDepth   [CHANNEL_TYPE_CHROMA] = m_internalBitDepth   [CHANNEL_TYPE_LUMA  ];
-  }
   if (m_inputBitDepth      [CHANNEL_TYPE_CHROMA] == 0)
   {
     m_inputBitDepth      [CHANNEL_TYPE_CHROMA] = m_inputBitDepth      [CHANNEL_TYPE_LUMA  ];
@@ -1478,22 +1670,11 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   CHECK( tmpMotionEstimationSearchMethod < 0 || tmpMotionEstimationSearchMethod >= MESEARCH_NUMBER_OF_METHODS, "Error in cfg" );
   m_motionEstimationSearchMethod=MESearchMethod(tmpMotionEstimationSearchMethod);
 
-  if (extendedProfile >= 1000 && extendedProfile <= 12316)
+  if (extendedProfile == ExtendedProfileName::AUTO)
   {
-    m_profile = Profile::MAINREXT;
-    if (m_bitDepthConstraint != 0 || tmpConstraintChromaFormat != 0)
-    {
-      EXIT( "Error: The bit depth and chroma format constraints are not used when an explicit RExt profile is specified");
-    }
-    m_bitDepthConstraint           = (extendedProfile%100);
-    m_intraConstraintFlag          = ((extendedProfile%10000)>=2000);
-    m_onePictureOnlyConstraintFlag = (extendedProfile >= 10000);
-    switch ((extendedProfile/100)%10)
+    if (xAutoDetermineProfile())
     {
-      case 0:  tmpConstraintChromaFormat=400; break;
-      case 1:  tmpConstraintChromaFormat=420; break;
-      case 2:  tmpConstraintChromaFormat=422; break;
-      default: tmpConstraintChromaFormat=444; break;
+      EXIT( "Unable to determine profile from configured settings");
     }
   }
   else
@@ -1501,68 +1682,24 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     m_profile = Profile::Name(extendedProfile);
   }
 
-  if (m_profile == Profile::HIGHTHROUGHPUTREXT )
   {
+    m_chromaFormatConstraint       = (tmpConstraintChromaFormat == 0) ? m_chromaFormatIDC : numberToChromaFormat(tmpConstraintChromaFormat);
     if (m_bitDepthConstraint == 0)
     {
-      m_bitDepthConstraint = 16;
-    }
-    m_chromaFormatConstraint = (tmpConstraintChromaFormat == 0) ? CHROMA_444 : numberToChromaFormat(tmpConstraintChromaFormat);
-  }
-  else if (m_profile == Profile::MAINREXT)
-  {
-    if (m_bitDepthConstraint == 0 && tmpConstraintChromaFormat == 0)
-    {
-      // produce a valid combination, if possible.
-      const bool bUsingGeneralRExtTools  = m_transformSkipRotationEnabledFlag        ||
-                                           m_transformSkipContextEnabledFlag         ||
-                                           m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT] ||
-                                           m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT] ||
-                                           !m_enableIntraReferenceSmoothing          ||
-                                           m_persistentRiceAdaptationEnabledFlag     ||
-                                           m_log2MaxTransformSkipBlockSize!=2;
-      const bool bUsingChromaQPAdjustment= m_cuChromaQpOffsetSubdiv >= 0;
-      const bool bUsingExtendedPrecision = m_extendedPrecisionProcessingFlag;
-      if (m_onePictureOnlyConstraintFlag)
+      if (m_profile == Profile::MAIN_10 || m_profile == Profile::MAIN_444_10)
       {
-        m_chromaFormatConstraint = CHROMA_444;
-        if (m_intraConstraintFlag != true)
-        {
-          EXIT( "Error: Intra constraint flag must be true when one_picture_only_constraint_flag is true");
-        }
-        const int maxBitDepth = m_chromaFormatIDC==CHROMA_400 ? m_internalBitDepth[CHANNEL_TYPE_LUMA] : std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA]);
-        m_bitDepthConstraint = maxBitDepth>8 ? 16:8;
+        m_bitDepthConstraint = 10;
       }
-      else
+      else // m_profile == Profile::NONE
       {
-        m_chromaFormatConstraint = NUM_CHROMA_FORMAT;
-        automaticallySelectRExtProfile(bUsingGeneralRExtTools,
-                                       bUsingChromaQPAdjustment,
-                                       bUsingExtendedPrecision,
-                                       m_intraConstraintFlag,
-                                       m_bitDepthConstraint,
-                                       m_chromaFormatConstraint,
-                                       m_chromaFormatIDC==CHROMA_400 ? m_internalBitDepth[CHANNEL_TYPE_LUMA] : std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA]),
-                                       m_chromaFormatIDC);
+        m_bitDepthConstraint = 8+15; // max value - unconstrained.
       }
     }
-    else if (m_bitDepthConstraint == 0 || tmpConstraintChromaFormat == 0)
-    {
-      EXIT( "Error: The bit depth and chroma format constraints must either both be specified or both be configured automatically");
-    }
-    else
-    {
-      m_chromaFormatConstraint = numberToChromaFormat(tmpConstraintChromaFormat);
-    }
-  }
-  else
-  {
-    m_chromaFormatConstraint = (tmpConstraintChromaFormat == 0) ? m_chromaFormatIDC : numberToChromaFormat(tmpConstraintChromaFormat);
-    m_bitDepthConstraint     = ( ( m_profile == Profile::MAIN10 || m_profile == Profile::NEXT ) ? 10 : 8 );
   }
 
 
   m_inputColourSpaceConvert = stringToInputColourSpaceConvert(inputColourSpaceConvert, true);
+  m_rgbFormat = (m_inputColourSpaceConvert == IPCOLOURSPACE_RGBtoGBR && m_chromaFormatIDC == CHROMA_444) ? true : false;
 
   switch (m_conformanceWindowMode)
   {
@@ -1627,19 +1764,6 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     }
   }
 
-  if (tmpSliceMode<0 || tmpSliceMode>=int(NUMBER_OF_SLICE_CONSTRAINT_MODES))
-  {
-    EXIT( "Error: bad slice mode");
-  }
-  m_sliceMode = SliceConstraint(tmpSliceMode);
-
-#if HEVC_DEPENDENT_SLICES
-  if (tmpSliceSegmentMode<0 || tmpSliceSegmentMode>=int(NUMBER_OF_SLICE_CONSTRAINT_MODES))
-  {
-    EXIT( "Error: bad slice segment mode");
-  }
-  m_sliceSegmentMode = SliceConstraint(tmpSliceSegmentMode);
-#endif
 
   if (tmpDecodedPictureHashSEIMappedType<0 || tmpDecodedPictureHashSEIMappedType>=int(NUMBER_OF_HASHTYPES))
   {
@@ -1726,12 +1850,70 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   }
 #endif
 
+  CHECK(cfg_qpInValCb.values.size() != cfg_qpOutValCb.values.size(), "Chroma QP table for Cb is incomplete.");
+  CHECK(cfg_qpInValCr.values.size() != cfg_qpOutValCr.values.size(), "Chroma QP table for Cr is incomplete.");
+  CHECK(cfg_qpInValCbCr.values.size() != cfg_qpOutValCbCr.values.size(), "Chroma QP table for CbCr is incomplete.");
+  if (m_useIdentityTableForNon420Chroma && m_chromaFormatIDC != CHROMA_420)
+  {
+    m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag = true;
+    cfg_qpInValCb.values = { 0 };
+    cfg_qpInValCr.values = { 0 };
+    cfg_qpInValCbCr.values = { 0 };
+    cfg_qpOutValCb.values = { 0 };
+    cfg_qpOutValCr.values = { 0 };
+    cfg_qpOutValCbCr.values = { 0 };
+  }
+  int qpBdOffsetC = 6 * (m_internalBitDepth[CHANNEL_TYPE_CHROMA] - 8);
+  m_chromaQpMappingTableParams.m_deltaQpInValMinus1[0].resize(cfg_qpInValCb.values.size());
+  m_chromaQpMappingTableParams.m_deltaQpOutVal[0].resize(cfg_qpOutValCb.values.size());
+  m_chromaQpMappingTableParams.m_numPtsInCQPTableMinus1[0] = (cfg_qpOutValCb.values.size() > 1) ? (int)cfg_qpOutValCb.values.size() - 2 : 0;
+  m_chromaQpMappingTableParams.m_qpTableStartMinus26[0] = (cfg_qpOutValCb.values.size() > 1) ? -26 + cfg_qpInValCb.values[0] : 0;
+  CHECK(m_chromaQpMappingTableParams.m_qpTableStartMinus26[0] < -26 - qpBdOffsetC || m_chromaQpMappingTableParams.m_qpTableStartMinus26[0] > 36, "qpTableStartMinus26[0] is out of valid range of -26 -qpBdOffsetC to 36, inclusive.")
+  CHECK(cfg_qpInValCb.values[0] != cfg_qpOutValCb.values[0], "First qpInValCb value should be equal to first qpOutValCb value");
+  for (int i = 0; i < cfg_qpInValCb.values.size() - 1; i++)
+  {
+    CHECK(cfg_qpInValCb.values[i] < -qpBdOffsetC || cfg_qpInValCb.values[i] > MAX_QP, "Some entries cfg_qpInValCb are out of valid range of -qpBdOffsetC to 63, inclusive.");
+    CHECK(cfg_qpOutValCb.values[i] < -qpBdOffsetC || cfg_qpOutValCb.values[i] > MAX_QP, "Some entries cfg_qpOutValCb are out of valid range of -qpBdOffsetC to 63, inclusive.");
+    m_chromaQpMappingTableParams.m_deltaQpInValMinus1[0][i] = cfg_qpInValCb.values[i + 1] - cfg_qpInValCb.values[i] - 1;
+    m_chromaQpMappingTableParams.m_deltaQpOutVal[0][i] = cfg_qpOutValCb.values[i + 1] - cfg_qpOutValCb.values[i];
+  }
+  if (!m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag)
+  {
+    m_chromaQpMappingTableParams.m_deltaQpInValMinus1[1].resize(cfg_qpInValCr.values.size());
+    m_chromaQpMappingTableParams.m_deltaQpOutVal[1].resize(cfg_qpOutValCr.values.size());
+    m_chromaQpMappingTableParams.m_numPtsInCQPTableMinus1[1] = (cfg_qpOutValCr.values.size() > 1) ? (int)cfg_qpOutValCr.values.size() - 2 : 0;
+    m_chromaQpMappingTableParams.m_qpTableStartMinus26[1] = (cfg_qpOutValCr.values.size() > 1) ? -26 + cfg_qpInValCr.values[0] : 0;
+    CHECK(m_chromaQpMappingTableParams.m_qpTableStartMinus26[1] < -26 - qpBdOffsetC || m_chromaQpMappingTableParams.m_qpTableStartMinus26[1] > 36, "qpTableStartMinus26[1] is out of valid range of -26 -qpBdOffsetC to 36, inclusive.")
+    CHECK(cfg_qpInValCr.values[0] != cfg_qpOutValCr.values[0], "First qpInValCr value should be equal to first qpOutValCr value");
+    for (int i = 0; i < cfg_qpInValCr.values.size() - 1; i++)
+    {
+      CHECK(cfg_qpInValCr.values[i] < -qpBdOffsetC || cfg_qpInValCr.values[i] > MAX_QP, "Some entries cfg_qpInValCr are out of valid range of -qpBdOffsetC to 63, inclusive.");
+      CHECK(cfg_qpOutValCr.values[i] < -qpBdOffsetC || cfg_qpOutValCr.values[i] > MAX_QP, "Some entries cfg_qpOutValCr are out of valid range of -qpBdOffsetC to 63, inclusive.");
+      m_chromaQpMappingTableParams.m_deltaQpInValMinus1[1][i] = cfg_qpInValCr.values[i + 1] - cfg_qpInValCr.values[i] - 1;
+      m_chromaQpMappingTableParams.m_deltaQpOutVal[1][i] = cfg_qpOutValCr.values[i + 1] - cfg_qpOutValCr.values[i];
+    }
+    m_chromaQpMappingTableParams.m_deltaQpInValMinus1[2].resize(cfg_qpInValCbCr.values.size());
+    m_chromaQpMappingTableParams.m_deltaQpOutVal[2].resize(cfg_qpOutValCbCr.values.size());
+    m_chromaQpMappingTableParams.m_numPtsInCQPTableMinus1[2] = (cfg_qpOutValCbCr.values.size() > 1) ? (int)cfg_qpOutValCbCr.values.size() - 2 : 0;
+    m_chromaQpMappingTableParams.m_qpTableStartMinus26[2] = (cfg_qpOutValCbCr.values.size() > 1) ? -26 + cfg_qpInValCbCr.values[0] : 0;
+    CHECK(m_chromaQpMappingTableParams.m_qpTableStartMinus26[2] < -26 - qpBdOffsetC || m_chromaQpMappingTableParams.m_qpTableStartMinus26[2] > 36, "qpTableStartMinus26[2] is out of valid range of -26 -qpBdOffsetC to 36, inclusive.")
+    CHECK(cfg_qpInValCbCr.values[0] != cfg_qpInValCbCr.values[0], "First qpInValCbCr value should be equal to first qpOutValCbCr value");
+    for (int i = 0; i < cfg_qpInValCbCr.values.size() - 1; i++)
+    {
+      CHECK(cfg_qpInValCbCr.values[i] < -qpBdOffsetC || cfg_qpInValCbCr.values[i] > MAX_QP, "Some entries cfg_qpInValCbCr are out of valid range of -qpBdOffsetC to 63, inclusive.");
+      CHECK(cfg_qpOutValCbCr.values[i] < -qpBdOffsetC || cfg_qpOutValCbCr.values[i] > MAX_QP, "Some entries cfg_qpOutValCbCr are out of valid range of -qpBdOffsetC to 63, inclusive.");
+      m_chromaQpMappingTableParams.m_deltaQpInValMinus1[2][i] = cfg_qpInValCbCr.values[i + 1] - cfg_qpInValCbCr.values[i] - 1;
+      m_chromaQpMappingTableParams.m_deltaQpOutVal[2][i] = cfg_qpInValCbCr.values[i + 1] - cfg_qpInValCbCr.values[i];
+    }
+  }
+
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   if ( m_LadfEnabed )
   {
     CHECK( m_LadfNumIntervals != cfg_LadfQpOffset.values.size(), "size of LadfQpOffset must be equal to LadfNumIntervals");
     CHECK( m_LadfNumIntervals - 1 != cfg_LadfIntervalLowerBound.values.size(), "size of LadfIntervalLowerBound must be equal to LadfNumIntervals - 1");
     m_LadfQpOffset = cfg_LadfQpOffset.values;
+    m_LadfIntervalLowerBound[0] = 0;
     for (int k = 1; k < m_LadfNumIntervals; k++)
     {
       m_LadfIntervalLowerBound[k] = cfg_LadfIntervalLowerBound.values[k - 1];
@@ -1739,6 +1921,56 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   }
 #endif
 
+
+#if JVET_O0756_CONFIG_HDRMETRICS && !JVET_O0756_CALCULATE_HDRMETRICS
+  if ( m_calculateHdrMetrics == true)
+  {
+    printf ("Warning: Configuration enables HDR metric calculations.  However, HDR metric support was not linked when compiling the VTM.\n");
+    m_calculateHdrMetrics = false;
+  }
+#endif
+
+  if ( m_loopFilterAcrossVirtualBoundariesDisabledFlag )
+  {
+    CHECK( m_numVerVirtualBoundaries > 3, "Number of vertical virtual boundaries must be comprised between 0 and 3 included" );
+    CHECK( m_numHorVirtualBoundaries > 3, "Number of horizontal virtual boundaries must be comprised between 0 and 3 included" );
+    CHECK( m_numVerVirtualBoundaries != cfg_virtualBoundariesPosX.values.size(), "Size of VirtualBoundariesPosX must be equal to NumVerVirtualBoundaries");
+    CHECK( m_numHorVirtualBoundaries != cfg_virtualBoundariesPosY.values.size(), "Size of VirtualBoundariesPosY must be equal to NumHorVirtualBoundaries");
+    m_virtualBoundariesPosX = cfg_virtualBoundariesPosX.values;
+    if (m_numVerVirtualBoundaries > 1)
+    {
+      sort(m_virtualBoundariesPosX.begin(), m_virtualBoundariesPosX.end());
+    }
+    for (unsigned i = 0; i < m_numVerVirtualBoundaries; i++)
+    {
+      CHECK( m_virtualBoundariesPosX[i] == 0 || m_virtualBoundariesPosX[i] >= m_iSourceWidth, "The vertical virtual boundary must be within the picture" );
+      CHECK( m_virtualBoundariesPosX[i] % 8, "The vertical virtual boundary must be a multiple of 8 luma samples" );
+      if (i > 0)
+      {
+        CHECK( m_virtualBoundariesPosX[i] - m_virtualBoundariesPosX[i-1] < m_uiCTUSize, "The distance between any two vertical virtual boundaries shall be greater than or equal to the CTU size" );
+      }
+    }
+    m_virtualBoundariesPosY = cfg_virtualBoundariesPosY.values;
+    if (m_numHorVirtualBoundaries > 1)
+    {
+      sort(m_virtualBoundariesPosY.begin(), m_virtualBoundariesPosY.end());
+    }
+    for (unsigned i = 0; i < m_numHorVirtualBoundaries; i++)
+    {
+      CHECK( m_virtualBoundariesPosY[i] == 0 || m_virtualBoundariesPosY[i] >= m_iSourceHeight, "The horizontal virtual boundary must be within the picture" );
+      CHECK( m_virtualBoundariesPosY[i] % 8, "The horizontal virtual boundary must be a multiple of 8 luma samples" );
+      if (i > 0)
+      {
+        CHECK( m_virtualBoundariesPosY[i] - m_virtualBoundariesPosY[i-1] < m_uiCTUSize, "The distance between any two horizontal virtual boundaries shall be greater than or equal to the CTU size" );
+      }
+    }
+  }
+
+  if ( m_alf )
+  {
+    CHECK( m_maxNumAlfAlternativesChroma < 1 || m_maxNumAlfAlternativesChroma > MAX_NUM_ALF_ALTERNATIVES_CHROMA, std::string("The maximum number of ALF Chroma filter alternatives must be in the range (1-") + std::to_string(MAX_NUM_ALF_ALTERNATIVES_CHROMA) + std::string (", inclusive)") );
+  }
+
   // reading external dQP description from file
   if ( !m_dQPFileName.empty() )
   {
@@ -1771,82 +2003,122 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
       m_masteringDisplay.whitePoint[idx] = uint16_t((cfg_DisplayWhitePointCode.values.size() > idx) ? cfg_DisplayWhitePointCode.values[idx] : 0);
     }
   }
-
-  if( m_toneMappingInfoSEIEnabled && !m_toneMapCancelFlag )
-  {
-    if( m_toneMapModelId == 2 && !cfg_startOfCodedInterval.values.empty() )
-    {
-      const uint32_t num = 1u<< m_toneMapTargetBitDepth;
-      m_startOfCodedInterval = new int[num];
-      for(uint32_t i=0; i<num; i++)
-      {
-        m_startOfCodedInterval[i] = cfg_startOfCodedInterval.values.size() > i ? cfg_startOfCodedInterval.values[i] : 0;
-      }
-    }
-    else
-    {
-      m_startOfCodedInterval = NULL;
-    }
-    if( ( m_toneMapModelId == 3 ) && ( m_numPivots > 0 ) )
-    {
-      if( !cfg_codedPivotValue.values.empty() && !cfg_targetPivotValue.values.empty() )
+  if ( m_omniViewportSEIEnabled && !m_omniViewportSEICancelFlag )
+  {
+    CHECK (!( m_omniViewportSEICntMinus1 >= 0 && m_omniViewportSEICntMinus1 < 16 ), "SEIOmniViewportCntMinus1 must be in the range of 0 to 16");
+    m_omniViewportSEIAzimuthCentre.resize  (m_omniViewportSEICntMinus1+1);
+    m_omniViewportSEIElevationCentre.resize(m_omniViewportSEICntMinus1+1);
+    m_omniViewportSEITiltCentre.resize     (m_omniViewportSEICntMinus1+1);
+    m_omniViewportSEIHorRange.resize       (m_omniViewportSEICntMinus1+1);
+    m_omniViewportSEIVerRange.resize       (m_omniViewportSEICntMinus1+1);
+    for(int i=0; i<(m_omniViewportSEICntMinus1+1); i++)
+    {
+      m_omniViewportSEIAzimuthCentre[i]   = cfg_omniViewportSEIAzimuthCentre  .values.size() > i ? cfg_omniViewportSEIAzimuthCentre  .values[i] : 0;
+      m_omniViewportSEIElevationCentre[i] = cfg_omniViewportSEIElevationCentre.values.size() > i ? cfg_omniViewportSEIElevationCentre.values[i] : 0;
+      m_omniViewportSEITiltCentre[i]      = cfg_omniViewportSEITiltCentre     .values.size() > i ? cfg_omniViewportSEITiltCentre     .values[i] : 0;
+      m_omniViewportSEIHorRange[i]        = cfg_omniViewportSEIHorRange       .values.size() > i ? cfg_omniViewportSEIHorRange       .values[i] : 0;
+      m_omniViewportSEIVerRange[i]        = cfg_omniViewportSEIVerRange       .values.size() > i ? cfg_omniViewportSEIVerRange       .values[i] : 0;
+    }
+  }
+
+  if(!m_rwpSEIRwpCancelFlag && m_rwpSEIEnabled)
+  {
+    CHECK (!( m_rwpSEINumPackedRegions > 0 && m_rwpSEINumPackedRegions <= std::numeric_limits<uint8_t>::max() ), "SEIRwpNumPackedRegions must be in the range of 1 to 255");
+    CHECK (!(cfg_rwpSEIRwpTransformType.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIRwpTransformType values be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIRwpGuardBandFlag.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIRwpGuardBandFlag values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIProjRegionWidth.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIProjRegionWidth values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIProjRegionHeight.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIProjRegionHeight values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIRwpSEIProjRegionTop.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIRwpSEIProjRegionTop values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIProjRegionLeft.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIProjRegionLeft values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIPackedRegionWidth.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionWidth values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIPackedRegionHeight.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionHeight values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIPackedRegionTop.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionTop values must be equal to SEIRwpNumPackedRegions");
+    CHECK (!(cfg_rwpSEIPackedRegionLeft.values.size() == m_rwpSEINumPackedRegions), "Number of must SEIPackedRegionLeft values must be equal to SEIRwpNumPackedRegions");
+
+    m_rwpSEIRwpTransformType.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpGuardBandFlag.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIProjRegionWidth.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIProjRegionHeight.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpSEIProjRegionTop.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIProjRegionLeft.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIPackedRegionWidth.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIPackedRegionHeight.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIPackedRegionTop.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIPackedRegionLeft.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpLeftGuardBandWidth.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpRightGuardBandWidth.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpTopGuardBandHeight.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpBottomGuardBandHeight.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpGuardBandNotUsedForPredFlag.resize(m_rwpSEINumPackedRegions);
+    m_rwpSEIRwpGuardBandType.resize(4*m_rwpSEINumPackedRegions);
+    for( int i=0; i < m_rwpSEINumPackedRegions; i++ )
+    {
+      m_rwpSEIRwpTransformType[i]                     = cfg_rwpSEIRwpTransformType.values[i];
+      CHECK (!( m_rwpSEIRwpTransformType[i] >= 0 && m_rwpSEIRwpTransformType[i] <= 7 ), "SEIRwpTransformType must be in the range of 0 to 7");
+      m_rwpSEIRwpGuardBandFlag[i]                     = cfg_rwpSEIRwpGuardBandFlag.values[i];
+      m_rwpSEIProjRegionWidth[i]                      = cfg_rwpSEIProjRegionWidth.values[i];
+      m_rwpSEIProjRegionHeight[i]                     = cfg_rwpSEIProjRegionHeight.values[i];
+      m_rwpSEIRwpSEIProjRegionTop[i]                  = cfg_rwpSEIRwpSEIProjRegionTop.values[i];
+      m_rwpSEIProjRegionLeft[i]                       = cfg_rwpSEIProjRegionLeft.values[i];
+      m_rwpSEIPackedRegionWidth[i]                    = cfg_rwpSEIPackedRegionWidth.values[i];
+      m_rwpSEIPackedRegionHeight[i]                   = cfg_rwpSEIPackedRegionHeight.values[i];
+      m_rwpSEIPackedRegionTop[i]                      = cfg_rwpSEIPackedRegionTop.values[i];
+      m_rwpSEIPackedRegionLeft[i]                     = cfg_rwpSEIPackedRegionLeft.values[i]; 
+      if( m_rwpSEIRwpGuardBandFlag[i] )
       {
-        m_codedPivotValue  = new int[m_numPivots];
-        m_targetPivotValue = new int[m_numPivots];
-        for(uint32_t i=0; i<m_numPivots; i++)
+        m_rwpSEIRwpLeftGuardBandWidth[i]              =  cfg_rwpSEIRwpLeftGuardBandWidth.values[i];
+        m_rwpSEIRwpRightGuardBandWidth[i]             =  cfg_rwpSEIRwpRightGuardBandWidth.values[i];
+        m_rwpSEIRwpTopGuardBandHeight[i]              =  cfg_rwpSEIRwpTopGuardBandHeight.values[i];
+        m_rwpSEIRwpBottomGuardBandHeight[i]           =  cfg_rwpSEIRwpBottomGuardBandHeight.values[i];
+        CHECK (! ( m_rwpSEIRwpLeftGuardBandWidth[i] > 0 || m_rwpSEIRwpRightGuardBandWidth[i] > 0 || m_rwpSEIRwpTopGuardBandHeight[i] >0 || m_rwpSEIRwpBottomGuardBandHeight[i] >0 ), "At least one of the RWP guard band parameters mut be greater than zero");
+        m_rwpSEIRwpGuardBandNotUsedForPredFlag[i]     =  cfg_rwpSEIRwpGuardBandNotUsedForPredFlag.values[i];
+        for( int j=0; j < 4; j++ )
         {
-          m_codedPivotValue[i]  = cfg_codedPivotValue.values.size()  > i ? cfg_codedPivotValue.values [i] : 0;
-          m_targetPivotValue[i] = cfg_targetPivotValue.values.size() > i ? cfg_targetPivotValue.values[i] : 0;
+          m_rwpSEIRwpGuardBandType[i*4 + j]           =  cfg_rwpSEIRwpGuardBandType.values[i*4 + j];
         }
+
       }
     }
-    else
-    {
-      m_codedPivotValue = NULL;
-      m_targetPivotValue = NULL;
-    }
   }
-
-  if( m_kneeSEIEnabled && !m_kneeSEICancelFlag )
+  if (m_gcmpSEIEnabled && !m_gcmpSEICancelFlag)
   {
-    CHECK(!( m_kneeSEINumKneePointsMinus1 >= 0 && m_kneeSEINumKneePointsMinus1 < 999 ), "Inconsistent config");
-    m_kneeSEIInputKneePoint  = new int[m_kneeSEINumKneePointsMinus1+1];
-    m_kneeSEIOutputKneePoint = new int[m_kneeSEINumKneePointsMinus1+1];
-    for(int i=0; i<(m_kneeSEINumKneePointsMinus1+1); i++)
+    int numFace = m_gcmpSEIPackingType == 4 || m_gcmpSEIPackingType == 5 ? 5 : 6;
+    CHECK (!(cfg_gcmpSEIFaceIndex.values.size()                  == numFace), "Number of SEIGcmpFaceIndex must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6");
+    CHECK (!(cfg_gcmpSEIFaceRotation.values.size()               == numFace), "Number of SEIGcmpFaceRotation must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6");
+    m_gcmpSEIFaceIndex.resize(numFace);
+    m_gcmpSEIFaceRotation.resize(numFace);
+    if (m_gcmpSEIMappingFunctionType == 2)
     {
-      m_kneeSEIInputKneePoint[i]  = cfg_kneeSEIInputKneePointValue.values.size()  > i ? cfg_kneeSEIInputKneePointValue.values[i]  : 1;
-      m_kneeSEIOutputKneePoint[i] = cfg_kneeSEIOutputKneePointValue.values.size() > i ? cfg_kneeSEIOutputKneePointValue.values[i] : 0;
+      CHECK (!(cfg_gcmpSEIFunctionCoeffU.values.size()           == numFace), "Number of SEIGcmpFunctionCoeffU must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6");
+      CHECK (!(cfg_gcmpSEIFunctionUAffectedByVFlag.values.size() == numFace), "Number of SEIGcmpFunctionUAffectedByVFlag must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6");
+      CHECK (!(cfg_gcmpSEIFunctionCoeffV.values.size()           == numFace), "Number of SEIGcmpFunctionCoeffV must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6");
+      CHECK (!(cfg_gcmpSEIFunctionVAffectedByUFlag.values.size() == numFace), "Number of SEIGcmpFunctionVAffectedByUFlag must be equal to 5 when SEIGcmpPackingType is equal to 4 or 5, otherwise, it must be equal to 6");
+      m_gcmpSEIFunctionCoeffU.resize(numFace);
+      m_gcmpSEIFunctionUAffectedByVFlag.resize(numFace);
+      m_gcmpSEIFunctionCoeffV.resize(numFace);
+      m_gcmpSEIFunctionVAffectedByUFlag.resize(numFace);
     }
-  }
-
-  if(m_timeCodeSEIEnabled)
-  {
-    for(int i = 0; i < m_timeCodeSEINumTs && i < MAX_TIMECODE_SEI_SETS; i++)
+    for (int i = 0; i < numFace; i++)
     {
-      m_timeSetArray[i].clockTimeStampFlag    = cfg_timeCodeSeiTimeStampFlag        .values.size()>i ? cfg_timeCodeSeiTimeStampFlag        .values [i] : false;
-      m_timeSetArray[i].numUnitFieldBasedFlag = cfg_timeCodeSeiNumUnitFieldBasedFlag.values.size()>i ? cfg_timeCodeSeiNumUnitFieldBasedFlag.values [i] : 0;
-      m_timeSetArray[i].countingType          = cfg_timeCodeSeiCountingType         .values.size()>i ? cfg_timeCodeSeiCountingType         .values [i] : 0;
-      m_timeSetArray[i].fullTimeStampFlag     = cfg_timeCodeSeiFullTimeStampFlag    .values.size()>i ? cfg_timeCodeSeiFullTimeStampFlag    .values [i] : 0;
-      m_timeSetArray[i].discontinuityFlag     = cfg_timeCodeSeiDiscontinuityFlag    .values.size()>i ? cfg_timeCodeSeiDiscontinuityFlag    .values [i] : 0;
-      m_timeSetArray[i].cntDroppedFlag        = cfg_timeCodeSeiCntDroppedFlag       .values.size()>i ? cfg_timeCodeSeiCntDroppedFlag       .values [i] : 0;
-      m_timeSetArray[i].numberOfFrames        = cfg_timeCodeSeiNumberOfFrames       .values.size()>i ? cfg_timeCodeSeiNumberOfFrames       .values [i] : 0;
-      m_timeSetArray[i].secondsValue          = cfg_timeCodeSeiSecondsValue         .values.size()>i ? cfg_timeCodeSeiSecondsValue         .values [i] : 0;
-      m_timeSetArray[i].minutesValue          = cfg_timeCodeSeiMinutesValue         .values.size()>i ? cfg_timeCodeSeiMinutesValue         .values [i] : 0;
-      m_timeSetArray[i].hoursValue            = cfg_timeCodeSeiHoursValue           .values.size()>i ? cfg_timeCodeSeiHoursValue           .values [i] : 0;
-      m_timeSetArray[i].secondsFlag           = cfg_timeCodeSeiSecondsFlag          .values.size()>i ? cfg_timeCodeSeiSecondsFlag          .values [i] : 0;
-      m_timeSetArray[i].minutesFlag           = cfg_timeCodeSeiMinutesFlag          .values.size()>i ? cfg_timeCodeSeiMinutesFlag          .values [i] : 0;
-      m_timeSetArray[i].hoursFlag             = cfg_timeCodeSeiHoursFlag            .values.size()>i ? cfg_timeCodeSeiHoursFlag            .values [i] : 0;
-      m_timeSetArray[i].timeOffsetLength      = cfg_timeCodeSeiTimeOffsetLength     .values.size()>i ? cfg_timeCodeSeiTimeOffsetLength     .values [i] : 0;
-      m_timeSetArray[i].timeOffsetValue       = cfg_timeCodeSeiTimeOffsetValue      .values.size()>i ? cfg_timeCodeSeiTimeOffsetValue      .values [i] : 0;
+      m_gcmpSEIFaceIndex[i]                = cfg_gcmpSEIFaceIndex.values[i];
+      m_gcmpSEIFaceRotation[i]             = cfg_gcmpSEIFaceRotation.values[i];
+      if (m_gcmpSEIMappingFunctionType == 2)
+      {
+        m_gcmpSEIFunctionCoeffU[i]           = cfg_gcmpSEIFunctionCoeffU.values[i];
+        m_gcmpSEIFunctionUAffectedByVFlag[i] = cfg_gcmpSEIFunctionUAffectedByVFlag.values[i];
+        m_gcmpSEIFunctionCoeffV[i]           = cfg_gcmpSEIFunctionCoeffV.values[i];
+        m_gcmpSEIFunctionVAffectedByUFlag[i] = cfg_gcmpSEIFunctionVAffectedByUFlag.values[i];
+      }
     }
   }
-
   m_reshapeCW.binCW.resize(3);
   m_reshapeCW.rspFps = m_iFrameRate;
-  m_reshapeCW.rspIntraPeriod = m_iIntraPeriod;
   m_reshapeCW.rspPicSize = m_iSourceWidth*m_iSourceHeight;
   m_reshapeCW.rspFpsToIp = std::max(16, 16 * (int)(round((double)m_iFrameRate /16.0)));
   m_reshapeCW.rspBaseQP = m_iQP;
+  m_reshapeCW.updateCtrl = m_updateCtrl;
+  m_reshapeCW.adpOption = m_adpOption;
+  m_reshapeCW.initialCW = m_initialCW;
 #if ENABLE_TRACING
   g_trace_ctx = tracing_init(sTracingFile, sTracingRule);
   if( bTracingChannelsList && g_trace_ctx )
@@ -1858,7 +2130,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 
 #if ENABLE_QPA
-  if (m_bUsePerceptQPA && !m_bUseAdaptiveQP && m_dualTree && (m_cbQpOffsetDualTree != 0 || m_crQpOffsetDualTree != 0))
+  if (m_bUsePerceptQPA && !m_bUseAdaptiveQP && m_dualTree && (m_cbQpOffsetDualTree != 0 || m_crQpOffsetDualTree != 0 || m_cbCrQpOffsetDualTree != 0))
   {
     msg( WARNING, "*************************************************************************\n" );
     msg( WARNING, "* WARNING: chroma QPA on, ignoring nonzero dual-tree chroma QP offsets! *\n" );
@@ -1871,11 +2143,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
  #else
   if (((int)m_fQP < 38) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && (m_iSourceWidth <= 2048) && (m_iSourceHeight <= 1280)
  #endif
-#if MAX_TB_SIZE_SIGNALLING
+ #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
+      && (!m_wcgChromaQpControl.enabled)
+ #endif
       && ((1 << (m_log2MaxTbSize + 1)) == m_uiCTUSize) && (m_iSourceWidth > 512 || m_iSourceHeight > 320))
-#else
-    && ((1 << (MAX_TB_LOG2_SIZEY + 1)) == m_uiCTUSize) && (m_iSourceWidth > 512 || m_iSourceHeight > 320))
-#endif
   {
     m_cuQpDeltaSubdiv = 2;
   }
@@ -1891,13 +2162,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     msg( WARNING, "*************************************************************************\n" );
 
     m_uiCTUSize = m_uiMaxCUWidth;
-#if MAX_TB_SIZE_SIGNALLING
     if( ( 1u << m_log2MaxTbSize         ) > m_uiCTUSize ) m_log2MaxTbSize--;
-#endif
   }
 #endif
 #endif // ENABLE_QPA
 
+#if JVET_AHG14_LOSSLESS_ENC_QP_FIX
+  if( m_costMode == COST_LOSSLESS_CODING )
+  {
+    m_iQP = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP - ( ( m_internalBitDepth[CHANNEL_TYPE_LUMA] - 8 ) * 6 );
+  }
+#endif
+
   const int minCuSize = 1 << MIN_CU_LOG2;
   m_uiMaxCodingDepth = 0;
   while( ( m_uiCTUSize >> m_uiMaxCodingDepth ) > minCuSize )
@@ -1926,6 +2202,34 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 // Private member functions
 // ====================================================================================================================
 
+///< auto determine the profile to use given the other configuration settings. Returns 1 if erred. Can select profile 'NONE'
+
+int EncAppCfg::xAutoDetermineProfile()
+{
+  const int maxBitDepth= std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[m_chromaFormatIDC==ChromaFormat::CHROMA_400 ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA]);
+  m_profile=Profile::NONE;
+
+  if (m_chromaFormatIDC==ChromaFormat::CHROMA_400 || m_chromaFormatIDC==ChromaFormat::CHROMA_420)
+  {
+    if (maxBitDepth<=10)
+    {
+      m_profile=Profile::MAIN_10;
+    }
+  }
+  else if (m_chromaFormatIDC==ChromaFormat::CHROMA_422 || m_chromaFormatIDC==ChromaFormat::CHROMA_444)
+  {
+    if (maxBitDepth<=10)
+    {
+      m_profile=Profile::MAIN_444_10;
+    }
+  }
+  else
+  {
+    return 1; // unknown chroma format
+  }
+  return 0;
+}
+
 bool EncAppCfg::xCheckParameter()
 {
   msg( NOTICE, "\n" );
@@ -1954,69 +2258,29 @@ bool EncAppCfg::xCheckParameter()
 #define xConfirmPara(a,b) check_failed |= confirmPara(a,b)
 
 
-  if( m_profile != Profile::NEXT )
+  if( m_depQuantEnabledFlag )
   {
-    THROW( "Next profile with an alternative partitioner has to be enabled if HEVC_USE_RQT is off!" );
-#if ENABLE_WPP_PARALLELISM
-    xConfirmPara( m_numWppThreads > 1, "WPP-style parallelization only supported with NEXT profile" );
-#endif
-    xConfirmPara( m_LMChroma, "LMChroma only allowed with NEXT profile" );
-    xConfirmPara( m_ImvMode, "IMV is only allowed with NEXT profile" );
-    xConfirmPara(m_IBCMode, "IBC Mode only allowed with NEXT profile");
-    xConfirmPara( m_HashME, "Hash motion estimation only allowed with NEXT profile" );
-    xConfirmPara( m_useFastLCTU, "Fast large CTU can only be applied when encoding with NEXT profile" );
-    xConfirmPara( m_MTS, "MTS only allowed with NEXT profile" );
-    xConfirmPara( m_MTSIntraMaxCand, "MTS only allowed with NEXT profile" );
-    xConfirmPara( m_MTSInterMaxCand, "MTS only allowed with NEXT profile" );
-    xConfirmPara( m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile" );
-    xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" );
-    xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" );
-    xConfirmPara( m_Triangle, "Triangle is only allowed with NEXT profile" );
-    xConfirmPara(m_DMVR, "DMVR only allowed with NEXT profile");
-    // ADD_NEW_TOOL : (parameter check) add a check for next tools here
-  }
-  else
-  {
-    if( m_depQuantEnabledFlag )
-    {
-      xConfirmPara( !m_useRDOQ || !m_useRDOQTS, "RDOQ and RDOQTS must be equal to 1 if dependent quantization is enabled" );
-#if HEVC_USE_SIGN_HIDING
-      xConfirmPara( m_signDataHidingEnabledFlag, "SignHideFlag must be equal to 0 if dependent quantization is enabled" );
-#endif
-    }
-
+    xConfirmPara( !m_useRDOQ || !m_useRDOQTS, "RDOQ and RDOQTS must be equal to 1 if dependent quantization is enabled" );
+    xConfirmPara( m_signDataHidingEnabledFlag, "SignHideFlag must be equal to 0 if dependent quantization is enabled" );
   }
 
   if( m_wrapAround )
   {
-    xConfirmPara( m_wrapAroundOffset == 0, "Wrap-around offset must be greater than 0" );
+    xConfirmPara( m_wrapAroundOffset <= m_uiCTUSize + (m_uiMaxCUWidth >> m_uiLog2DiffMaxMinCodingBlockSize), "Wrap-around offset must be greater than CtbSizeY + MinCbSize" );
     xConfirmPara( m_wrapAroundOffset > m_iSourceWidth, "Wrap-around offset must not be greater than the source picture width" );
-    xConfirmPara( m_wrapAroundOffset % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Wrap-around offset must be an integer multiple of the specified chroma subsampling" );
+    int minCUSize =  m_uiCTUSize >> m_uiLog2DiffMaxMinCodingBlockSize;
+    xConfirmPara( m_wrapAroundOffset % minCUSize != 0, "Wrap-around offset must be an integer multiple of the specified minimum CU size" );
   }
 
 #if ENABLE_SPLIT_PARALLELISM
   xConfirmPara( m_numSplitThreads < 1, "Number of used threads cannot be smaller than 1" );
   xConfirmPara( m_numSplitThreads > PARL_SPLIT_MAX_NUM_THREADS, "Number of used threads cannot be higher than the number of actual jobs" );
-#if _MSC_VER && ENABLE_WPP_PARALLELISM
-  xConfirmPara( m_numSplitThreads > 1 && m_numSplitThreads != NUM_SPLIT_THREADS_IF_MSVC, "Due to poor implementation by Microsoft, NumSplitThreads cannot be set dynamically on runtime!" );
-#endif
 #else
   xConfirmPara( m_numSplitThreads != 1, "ENABLE_SPLIT_PARALLELISM is disabled, numSplitThreads has to be 1" );
 #endif
 
-#if ENABLE_WPP_PARALLELISM
-  xConfirmPara( m_numWppThreads < 1, "Number of threads used for WPP-style parallelization cannot be smaller than 1" );
-  xConfirmPara( m_numWppThreads > PARL_WPP_MAX_NUM_THREADS, "Number of threads used for WPP-style parallelization cannot be bigger than PARL_WPP_MAX_NUM_THREADS" );
-  xConfirmPara( !m_ensureWppBitEqual && m_numWppThreads > 1, "WPP bit equality is implied when using WPP-style parallelism" );
-#if ENABLE_WPP_STATIC_LINK
-  xConfirmPara( m_numWppExtraLines != 0, "WPP-style extra lines out of range" );
-#else
-  xConfirmPara( m_numWppExtraLines < 0, "WPP-style extra lines out of range" );
-#endif
-#else
   xConfirmPara( m_numWppThreads != 1, "ENABLE_WPP_PARALLELISM is disabled, numWppThreads has to be 1" );
   xConfirmPara( m_ensureWppBitEqual, "ENABLE_WPP_PARALLELISM is disabled, cannot ensure being WPP bit-equal" );
-#endif
 
 
 #if SHARP_LUMA_DELTA_QP && ENABLE_QPA
@@ -2036,76 +2300,22 @@ bool EncAppCfg::xCheckParameter()
 
 
   xConfirmPara(m_bitstreamFileName.empty(), "A bitstream file name must be specified (BitstreamFile)");
-  const uint32_t maxBitDepth=(m_chromaFormatIDC==CHROMA_400) ? m_internalBitDepth[CHANNEL_TYPE_LUMA] : std::max(m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA]);
-  xConfirmPara(m_bitDepthConstraint<maxBitDepth, "The internalBitDepth must not be greater than the bitDepthConstraint value");
-  xConfirmPara(m_chromaFormatConstraint<m_chromaFormatIDC, "The chroma format used must not be greater than the chromaFormatConstraint value");
-
-  if (m_profile==Profile::MAINREXT || m_profile==Profile::HIGHTHROUGHPUTREXT)
-  {
-    xConfirmPara(m_lowerBitRateConstraintFlag==false && m_intraConstraintFlag==false, "The lowerBitRateConstraint flag cannot be false when intraConstraintFlag is false");
-    xConfirmPara(m_cabacBypassAlignmentEnabledFlag && m_profile!=Profile::HIGHTHROUGHPUTREXT, "AlignCABACBeforeBypass must not be enabled unless the high throughput profile is being used.");
-    if (m_profile == Profile::MAINREXT)
-    {
-      const uint32_t intraIdx = m_intraConstraintFlag ? 1:0;
-      const uint32_t bitDepthIdx = (m_bitDepthConstraint == 8 ? 0 : (m_bitDepthConstraint ==10 ? 1 : (m_bitDepthConstraint == 12 ? 2 : (m_bitDepthConstraint == 16 ? 3 : 4 ))));
-      const uint32_t chromaFormatIdx = uint32_t(m_chromaFormatConstraint);
-      const bool bValidProfile = (bitDepthIdx > 3 || chromaFormatIdx>3) ? false : (validRExtProfileNames[intraIdx][bitDepthIdx][chromaFormatIdx] != NONE);
-      xConfirmPara(!bValidProfile, "Invalid intra constraint flag, bit depth constraint flag and chroma format constraint flag combination for a RExt profile");
-      const bool bUsingGeneralRExtTools  = m_transformSkipRotationEnabledFlag        ||
-                                           m_transformSkipContextEnabledFlag         ||
-                                           m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT] ||
-                                           m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT] ||
-                                           !m_enableIntraReferenceSmoothing          ||
-                                           m_persistentRiceAdaptationEnabledFlag     ||
-                                           m_log2MaxTransformSkipBlockSize!=2;
-      const bool bUsingChromaQPTool      = m_cuChromaQpOffsetSubdiv >= 0;
-      const bool bUsingExtendedPrecision = m_extendedPrecisionProcessingFlag;
-
-      xConfirmPara((m_chromaFormatConstraint==CHROMA_420 || m_chromaFormatConstraint==CHROMA_400) && bUsingChromaQPTool, "CU Chroma QP adjustment cannot be used for 4:0:0 or 4:2:0 RExt profiles");
-      xConfirmPara(m_bitDepthConstraint != 16 && bUsingExtendedPrecision, "Extended precision can only be used in 16-bit RExt profiles");
-      if (!(m_chromaFormatConstraint == CHROMA_400 && m_bitDepthConstraint == 16) && m_chromaFormatConstraint!=CHROMA_444)
-      {
-        xConfirmPara(bUsingGeneralRExtTools, "Combination of tools and profiles are not possible in the specified RExt profile.");
-      }
-      xConfirmPara( m_onePictureOnlyConstraintFlag && m_chromaFormatConstraint!=CHROMA_444, "chroma format constraint must be 4:4:4 when one-picture-only constraint flag is 1");
-      xConfirmPara( m_onePictureOnlyConstraintFlag && m_bitDepthConstraint != 8 && m_bitDepthConstraint != 16, "bit depth constraint must be 8 or 16 when one-picture-only constraint flag is 1");
-      xConfirmPara( m_onePictureOnlyConstraintFlag && m_framesToBeEncoded > 1, "Number of frames to be encoded must be 1 when one-picture-only constraint flag is 1.");
-
-      if (!m_intraConstraintFlag && m_bitDepthConstraint==16 && m_chromaFormatConstraint==CHROMA_444)
-      {
-        msg( WARNING, "********************************************************************************************************\n");
-        msg( WARNING, "** WARNING: The RExt constraint flags describe a non standard combination (used for development only) **\n");
-        msg( WARNING, "********************************************************************************************************\n");
-      }
-    }
-    else
-    {
-      xConfirmPara( m_chromaFormatConstraint != CHROMA_444, "chroma format constraint must be 4:4:4 in the High Throughput 4:4:4 16-bit Intra profile.");
-      xConfirmPara( m_bitDepthConstraint     != 16,         "bit depth constraint must be 4:4:4 in the High Throughput 4:4:4 16-bit Intra profile.");
-      xConfirmPara( m_intraConstraintFlag    != 1,          "intra constraint flag must be 1 in the High Throughput 4:4:4 16-bit Intra profile.");
-    }
-  }
-  else
+  xConfirmPara(m_internalBitDepth[CHANNEL_TYPE_CHROMA] != m_internalBitDepth[CHANNEL_TYPE_LUMA], "The internalBitDepth must be the same for luma and chroma");
+  if (m_profile==Profile::MAIN_10 || m_profile==Profile::MAIN_444_10)
   {
-    xConfirmPara(m_bitDepthConstraint!=((m_profile==Profile::MAIN10 || m_profile==Profile::NEXT)?10:8), "BitDepthConstraint must be 8 for MAIN profile and 10 for MAIN10 profile.");
-    xConfirmPara(m_chromaFormatConstraint!=CHROMA_420 && m_profile!=Profile::NEXT, "ChromaFormatConstraint must be 420 for non main-RExt and non-Next profiles.");
-    xConfirmPara(m_intraConstraintFlag==true, "IntraConstraintFlag must be false for non main_RExt profiles.");
-    xConfirmPara(m_lowerBitRateConstraintFlag==false, "LowerBitrateConstraintFlag must be true for non main-RExt profiles.");
-    xConfirmPara(m_profile == Profile::MAINSTILLPICTURE && m_framesToBeEncoded > 1, "Number of frames to be encoded must be 1 when main still picture profile is used.");
-
-    xConfirmPara(m_crossComponentPredictionEnabledFlag==true, "CrossComponentPrediction must not be used for non main-RExt profiles.");
-    xConfirmPara(m_log2MaxTransformSkipBlockSize>=6, "Transform Skip Log2 Max Size must be less or equal to 5.");
-    xConfirmPara(m_transformSkipRotationEnabledFlag==true, "UseResidualRotation must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_transformSkipContextEnabledFlag==true, "UseSingleSignificanceMapContext must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT]==true, "ImplicitResidualDPCM must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT]==true, "ExplicitResidualDPCM must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_persistentRiceAdaptationEnabledFlag==true, "GolombRiceParameterAdaption must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_extendedPrecisionProcessingFlag==true, "UseExtendedPrecision must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_highPrecisionOffsetsEnabledFlag==true, "UseHighPrecisionPredictionWeighting must not be enabled for non main-RExt profiles.");
-    xConfirmPara(m_enableIntraReferenceSmoothing==false, "EnableIntraReferenceSmoothing must be enabled for non main-RExt profiles.");
-    xConfirmPara(m_cabacBypassAlignmentEnabledFlag, "AlignCABACBeforeBypass cannot be enabled for non main-RExt profiles.");
-  }
-  xConfirmPara( m_chromaFormatIDC==CHROMA_422, "4:2:2 chroma sampling format not supported with current compiler setting. Set compiler flag \"ENABLE_CHROMA_422\" equal to 1 for enabling 4:2:2.\n\n" );
+    xConfirmPara(m_crossComponentPredictionEnabledFlag==true, "CrossComponentPrediction must not be used for given profile.");
+    xConfirmPara(m_log2MaxTransformSkipBlockSize>=6, "Transform Skip Log2 Max Size must be less or equal to 5 for given profile.");
+    xConfirmPara(m_transformSkipRotationEnabledFlag==true, "UseResidualRotation must not be enabled for given profile.");
+    xConfirmPara(m_transformSkipContextEnabledFlag==true, "UseSingleSignificanceMapContext must not be enabled for given profile.");
+    xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_IMPLICIT]==true, "ImplicitResidualDPCM must not be enabled for given profile.");
+    xConfirmPara(m_rdpcmEnabledFlag[RDPCM_SIGNAL_EXPLICIT]==true, "ExplicitResidualDPCM must not be enabled for given profile.");
+    xConfirmPara(m_persistentRiceAdaptationEnabledFlag==true, "GolombRiceParameterAdaption must not be enabled for given profile.");
+    xConfirmPara(m_extendedPrecisionProcessingFlag==true, "UseExtendedPrecision must not be enabled for given profile.");
+    xConfirmPara(m_highPrecisionOffsetsEnabledFlag==true, "UseHighPrecisionPredictionWeighting must not be enabled for given profile.");
+    xConfirmPara(m_enableIntraReferenceSmoothing==false, "EnableIntraReferenceSmoothing must be enabled for given profile.");
+    xConfirmPara(m_cabacBypassAlignmentEnabledFlag, "AlignCABACBeforeBypass cannot be enabled for given profile.");
+  }
+
 
   // check range of parameters
   xConfirmPara( m_inputBitDepth[CHANNEL_TYPE_LUMA  ] < 8,                                   "InputBitDepth must be at least 8" );
@@ -2132,55 +2342,56 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( (m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] < m_inputBitDepth[CHANNEL_TYPE_CHROMA]), "MSB-extended bit depth for chroma channel (--MSBExtendedBitDepthC) must be greater than or equal to input bit depth for chroma channel (--InputBitDepthC)" );
 
   xConfirmPara( m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA]   > (m_internalBitDepth[CHANNEL_TYPE_LUMA  ]<10?0:(m_internalBitDepth[CHANNEL_TYPE_LUMA  ]-10)), "SaoLumaOffsetBitShift must be in the range of 0 to InternalBitDepth-10, inclusive");
-  xConfirmPara( m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] > (m_internalBitDepth[CHANNEL_TYPE_CHROMA]<10?0:(m_internalBitDepth[CHANNEL_TYPE_CHROMA]-10)), "SaoChromaOffsetBitShift must be in the range of 0 to InternalBitDepthC-10, inclusive");
+  xConfirmPara( m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] > (m_internalBitDepth[CHANNEL_TYPE_CHROMA]<10?0:(m_internalBitDepth[CHANNEL_TYPE_CHROMA]-10)), "SaoChromaOffsetBitShift must be in the range of 0 to InternalBitDepth-10, inclusive");
 
   xConfirmPara( m_chromaFormatIDC >= NUM_CHROMA_FORMAT,                                     "ChromaFormatIDC must be either 400, 420, 422 or 444" );
   std::string sTempIPCSC="InputColourSpaceConvert must be empty, "+getListOfColourSpaceConverts(true);
   xConfirmPara( m_inputColourSpaceConvert >= NUMBER_INPUT_COLOUR_SPACE_CONVERSIONS,         sTempIPCSC.c_str() );
   xConfirmPara( m_InputChromaFormatIDC >= NUM_CHROMA_FORMAT,                                "InputChromaFormatIDC must be either 400, 420, 422 or 444" );
   xConfirmPara( m_iFrameRate <= 0,                                                          "Frame rate must be more than 1" );
-  xConfirmPara( m_temporalSubsampleRatio < 1,                                               "Temporal subsample rate must be no less than 1" );
   xConfirmPara( m_framesToBeEncoded <= 0,                                                   "Total Number Of Frames encoded must be more than 0" );
   xConfirmPara( m_framesToBeEncoded < m_switchPOC,                                          "debug POC out of range" );
 
   xConfirmPara( m_iGOPSize < 1 ,                                                            "GOP Size must be greater or equal to 1" );
   xConfirmPara( m_iGOPSize > 1 &&  m_iGOPSize % 2,                                          "GOP Size must be a multiple of 2, if GOP Size is greater than 1" );
   xConfirmPara( (m_iIntraPeriod > 0 && m_iIntraPeriod < m_iGOPSize) || m_iIntraPeriod == 0, "Intra period must be more than GOP size, or -1 , not 0" );
+  xConfirmPara( m_drapPeriod < 0,                                                           "DRAP period must be greater or equal to 0" );
   xConfirmPara( m_iDecodingRefreshType < 0 || m_iDecodingRefreshType > 3,                   "Decoding Refresh Type must be comprised between 0 and 3 included" );
+#if HEVC_SEI
   if(m_iDecodingRefreshType == 3)
   {
     xConfirmPara( !m_recoveryPointSEIEnabled,                                               "When using RecoveryPointSEI messages as RA points, recoveryPointSEI must be enabled" );
   }
+#endif
 
   if (m_isField)
   {
-    if (!m_pictureTimingSEIEnabled)
+    if (!m_frameFieldInfoSEIEnabled)
     {
-      msg( WARNING, "****************************************************************************\n");
-      msg( WARNING, "** WARNING: Picture Timing SEI should be enabled for field coding!        **\n");
-      msg( WARNING, "****************************************************************************\n");
+      msg( WARNING, "*************************************************************************************\n");
+      msg( WARNING, "** WARNING: Frame field information SEI should be enabled for field coding!        **\n");
+      msg( WARNING, "*************************************************************************************\n");
     }
   }
-
-  if(m_crossComponentPredictionEnabledFlag && (m_chromaFormatIDC != CHROMA_444))
+  if ( m_pictureTimingSEIEnabled && (!m_bufferingPeriodSEIEnabled))
   {
     msg( WARNING, "****************************************************************************\n");
-    msg( WARNING, "** WARNING: Cross-component prediction is specified for 4:4:4 format only **\n");
+    msg( WARNING, "** WARNING: Picture Timing SEI requires Buffering Period SEI. Disabling.  **\n");
     msg( WARNING, "****************************************************************************\n");
-
-    m_crossComponentPredictionEnabledFlag = false;
+    m_pictureTimingSEIEnabled = false;
   }
 
-  if ( m_CUTransquantBypassFlagForce && m_bUseHADME )
+  if(m_crossComponentPredictionEnabledFlag && (m_chromaFormatIDC != CHROMA_444))
   {
     msg( WARNING, "****************************************************************************\n");
-    msg( WARNING, "** WARNING: --HadamardME has been disabled due to the enabling of         **\n");
-    msg( WARNING, "**          --CUTransquantBypassFlagForce                                 **\n");
+    msg( WARNING, "** WARNING: Cross-component prediction is specified for 4:4:4 format only **\n");
     msg( WARNING, "****************************************************************************\n");
 
-    m_bUseHADME = false; // this has been disabled so that the lambda is calculated slightly differently for lossless modes (as a result of JCTVC-R0104).
+    m_crossComponentPredictionEnabledFlag = false;
   }
 
+  xConfirmPara( m_bufferingPeriodSEIEnabled == true && m_RCCpbSize == 0,  "RCCpbSize must be greater than zero, when buffering period SEI is enabled" );
+
   xConfirmPara (m_log2MaxTransformSkipBlockSize < 2, "Transform Skip Log2 Max Size must be at least 2 (4x4)");
 
 
@@ -2216,27 +2427,39 @@ bool EncAppCfg::xCheckParameter()
 #if SHARP_LUMA_DELTA_QP
   xConfirmPara( m_lumaLevelToDeltaQPMapping.mode && m_uiDeltaQpRD > 0,                      "Luma-level-based Delta QP cannot be used together with slice level multiple-QP optimization\n" );
 #endif
-  if (m_lumaLevelToDeltaQPMapping.mode && m_lumaReshapeEnable)
+  if (m_lumaLevelToDeltaQPMapping.mode && m_lmcsEnabled)
   {
-    msg(WARNING, "For HDR-PQ, reshaper should be used mutual-exclusively with Luma-level-based Delta QP. If use luma DQP, turn reshaper off.\n");
-    m_lumaReshapeEnable = false;
+    msg(WARNING, "For HDR-PQ, LMCS should be used mutual-exclusively with Luma-level-based Delta QP. If use LMCS, turn lumaDQP off.\n");
+    m_lumaLevelToDeltaQPMapping.mode = LUMALVL_TO_DQP_DISABLED;
   }
-  if (!m_lumaReshapeEnable)
+  if (!m_lmcsEnabled)
   {
     m_reshapeSignalType = RESHAPE_SIGNAL_NULL;
     m_intraCMD = 0;
   }
-  if (m_lumaReshapeEnable && m_reshapeSignalType == RESHAPE_SIGNAL_PQ)
+  if (m_lmcsEnabled && m_reshapeSignalType == RESHAPE_SIGNAL_PQ)
   {
     m_intraCMD = 1;
   }
-  else if (m_lumaReshapeEnable && m_reshapeSignalType == RESHAPE_SIGNAL_SDR)
+  else if (m_lmcsEnabled && (m_reshapeSignalType == RESHAPE_SIGNAL_SDR || m_reshapeSignalType == RESHAPE_SIGNAL_HLG))
   {
     m_intraCMD = 0;
   }
   else
   {
-    m_lumaReshapeEnable = false;
+    m_lmcsEnabled = false;
+  }
+  if (m_lmcsEnabled)
+  {
+    xConfirmPara(m_updateCtrl < 0, "Min. LMCS Update Control is 0");
+    xConfirmPara(m_updateCtrl > 2, "Max. LMCS Update Control is 2");
+    xConfirmPara(m_adpOption < 0, "Min. LMCS Adaptation Option is 0");
+    xConfirmPara(m_adpOption > 4, "Max. LMCS Adaptation Option is 4");
+    xConfirmPara(m_initialCW < 0, "Min. Initial Total Codeword is 0");
+    xConfirmPara(m_initialCW > 1023, "Max. Initial Total Codeword is 1023");
+    xConfirmPara(m_CSoffset < -7, "Min. LMCS Offset value is -7");
+    xConfirmPara(m_CSoffset > 7, "Max. LMCS Offset value is 7");
+    if (m_updateCtrl > 0 && m_adpOption > 2) { m_adpOption -= 2; }
   }
 
   xConfirmPara( m_cbQpOffset < -12,   "Min. Chroma Cb QP Offset is -12" );
@@ -2247,7 +2470,20 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_cbQpOffsetDualTree >  12,   "Max. Chroma Cb QP Offset for dual tree is  12" );
   xConfirmPara( m_crQpOffsetDualTree < -12,   "Min. Chroma Cr QP Offset for dual tree is -12" );
   xConfirmPara( m_crQpOffsetDualTree >  12,   "Max. Chroma Cr QP Offset for dual tree is  12" );
-
+  if (m_JointCbCrMode && (m_chromaFormatIDC == CHROMA_400))
+  {
+    msg( WARNING, "****************************************************************************\n");
+    msg( WARNING, "** WARNING: --JointCbCr has been disabled because the chromaFormat is 400 **\n");
+    msg( WARNING, "****************************************************************************\n");
+    m_JointCbCrMode = false;
+  }
+  if (m_JointCbCrMode)
+  {
+    xConfirmPara( m_cbCrQpOffset < -12, "Min. Joint Cb-Cr QP Offset is -12");
+    xConfirmPara( m_cbCrQpOffset >  12, "Max. Joint Cb-Cr QP Offset is  12");
+    xConfirmPara( m_cbCrQpOffsetDualTree < -12, "Min. Joint Cb-Cr QP Offset for dual tree is -12");
+    xConfirmPara( m_cbCrQpOffsetDualTree >  12, "Max. Joint Cb-Cr QP Offset for dual tree is  12");
+  }
   xConfirmPara( m_iQPAdaptationRange <= 0,                                                  "QP Adaptation Range must be more than 0" );
   if (m_iDecodingRefreshType == 2)
   {
@@ -2258,70 +2494,46 @@ bool EncAppCfg::xCheckParameter()
 
   xConfirmPara( m_uiMinQT[0] < 1<<MIN_CU_LOG2,                                              "Minimum QT size should be larger than or equal to 4");
   xConfirmPara( m_uiMinQT[1] < 1<<MIN_CU_LOG2,                                              "Minimum QT size should be larger than or equal to 4");
-  xConfirmPara( m_uiCTUSize < 16,                                                           "Maximum partition width size should be larger than or equal to 16");
-  xConfirmPara( m_uiCTUSize < 16,                                                           "Maximum partition height size should be larger than or equal to 16");
-  xConfirmPara( (m_iSourceWidth  % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame width must be a multiple of the minimum unit size");
-  xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame height must be a multiple of the minimum unit size");
-  xConfirmPara( (m_iSourceWidth  % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame width must be a multiple of the minimum unit size");
-  xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame height must be a multiple of the minimum unit size");
-  xConfirmPara( (m_iSourceWidth  % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame width must be a multiple of the minimum unit size");
-  xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame height must be a multiple of the minimum unit size");
+  xConfirmPara( m_uiCTUSize < 32,                                                           "CTUSize must be greater than or equal to 32");
+  xConfirmPara( m_uiCTUSize > 128,                                                          "CTUSize must be less than or equal to 128");
+  xConfirmPara( m_uiCTUSize != 32 && m_uiCTUSize != 64 && m_uiCTUSize != 128,               "CTUSize must be a power of 2 (32, 64, or 128)");
   xConfirmPara( m_uiMaxCUDepth < 1,                                                         "MaxPartitionDepth must be greater than zero");
   xConfirmPara( (m_uiMaxCUWidth  >> m_uiMaxCUDepth) < 4,                                    "Minimum partition width size should be larger than or equal to 8");
   xConfirmPara( (m_uiMaxCUHeight >> m_uiMaxCUDepth) < 4,                                    "Minimum partition height size should be larger than or equal to 8");
   xConfirmPara( m_uiMaxCUWidth < 16,                                                        "Maximum partition width size should be larger than or equal to 16");
   xConfirmPara( m_uiMaxCUHeight < 16,                                                       "Maximum partition height size should be larger than or equal to 16");
-  xConfirmPara( (m_iSourceWidth  % (m_uiMaxCUWidth  >> (m_uiMaxCUDepth-1)))!=0,             "Resulting coded frame width must be a multiple of the minimum CU size");
-  xConfirmPara( (m_iSourceHeight % (m_uiMaxCUHeight >> (m_uiMaxCUDepth-1)))!=0,             "Resulting coded frame height must be a multiple of the minimum CU size");
-
-#if MAX_TB_SIZE_SIGNALLING
+  xConfirmPara( (m_iSourceWidth  % (std::max(8, int(m_uiMaxCUWidth  >> (m_uiMaxCUDepth - 1))))) != 0, "Resulting coded frame width must be a multiple of Max(8, the minimum CU size)");
+  xConfirmPara( (m_iSourceHeight % (std::max(8, int(m_uiMaxCUHeight >> (m_uiMaxCUDepth - 1))))) != 0, "Resulting coded frame height must be a multiple of Max(8, the minimum CU size)");
   xConfirmPara( m_log2MaxTbSize > 6, "Log2MaxTbSize must be 6 or smaller." );
-#endif
+  xConfirmPara( m_log2MaxTbSize < 5,  "Log2MaxTbSize must be 5 or greater." );
   xConfirmPara( m_maxNumMergeCand < 1,  "MaxNumMergeCand must be 1 or greater.");
   xConfirmPara( m_maxNumMergeCand > MRG_MAX_NUM_CANDS, "MaxNumMergeCand must be no more than MRG_MAX_NUM_CANDS." );
-
+  xConfirmPara( m_maxNumTriangleCand > TRIANGLE_MAX_NUM_UNI_CANDS, "MaxNumTriangleCand must be no more than TRIANGLE_MAX_NUM_UNI_CANDS." );
+  xConfirmPara( m_maxNumTriangleCand > m_maxNumMergeCand, "MaxNumTriangleCand must be no more than MaxNumMergeCand." );
+  xConfirmPara( 0 < m_maxNumTriangleCand && m_maxNumTriangleCand < 2, "MaxNumTriangleCand must be no less than 2 unless MaxNumTriangleCand is 0." );
+  xConfirmPara( m_maxNumIBCMergeCand < 1, "MaxNumIBCMergeCand must be 1 or greater." );
+  xConfirmPara( m_maxNumIBCMergeCand > IBC_MRG_MAX_NUM_CANDS, "MaxNumIBCMergeCand must be no more than IBC_MRG_MAX_NUM_CANDS." );
   xConfirmPara( m_maxNumAffineMergeCand < 1, "MaxNumAffineMergeCand must be 1 or greater." );
   xConfirmPara( m_maxNumAffineMergeCand > AFFINE_MRG_MAX_NUM_CANDS, "MaxNumAffineMergeCand must be no more than AFFINE_MRG_MAX_NUM_CANDS." );
   if ( m_Affine == 0 )
   {
     m_maxNumAffineMergeCand = m_SubPuMvpMode;
+    if (m_PROF) msg(WARNING, "PROF is forcefully disabled when Affine is off \n");
+    m_PROF = false;
   }
 
   xConfirmPara( m_MTS < 0 || m_MTS > 3, "MTS must be greater than 0 smaller than 4" );
   xConfirmPara( m_MTSIntraMaxCand < 0 || m_MTSIntraMaxCand > 5, "m_MTSIntraMaxCand must be greater than 0 and smaller than 6" );
   xConfirmPara( m_MTSInterMaxCand < 0 || m_MTSInterMaxCand > 5, "m_MTSInterMaxCand must be greater than 0 and smaller than 6" );
   xConfirmPara( m_MTS != 0 && m_MTSImplicit != 0, "Both explicit and implicit MTS cannot be enabled at the same time" );
-  if( m_usePCM)
-  {
-    for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++)
-    {
-      xConfirmPara(((m_MSBExtendedBitDepth[channelType] > m_internalBitDepth[channelType]) && m_bPCMInputBitDepthFlag), "PCM bit depth cannot be greater than internal bit depth (PCMInputBitDepthFlag cannot be used when InputBitDepth or MSBExtendedBitDepth > InternalBitDepth)");
-    }
-    xConfirmPara(  m_uiPCMLog2MinSize < 3,                                      "PCMLog2MinSize must be 3 or greater.");
-    xConfirmPara(  m_uiPCMLog2MinSize > 5,                                      "PCMLog2MinSize must be 5 or smaller.");
-    xConfirmPara(  m_pcmLog2MaxSize > 5,                                        "PCMLog2MaxSize must be 5 or smaller.");
-    xConfirmPara(  m_pcmLog2MaxSize < m_uiPCMLog2MinSize,                       "PCMLog2MaxSize must be equal to or greater than m_uiPCMLog2MinSize.");
-  }
 
-  if (m_sliceMode!=NO_SLICES)
+  if (m_useBDPCM)
   {
-    xConfirmPara( m_sliceArgument < 1 ,         "SliceArgument should be larger than or equal to 1" );
+    xConfirmPara(!m_useTransformSkip, "BDPCM cannot be used when transform skip is disabled.");
   }
 
-#if HEVC_DEPENDENT_SLICES
-  if (m_sliceSegmentMode!=NO_SLICES)
-  {
-    xConfirmPara( m_sliceSegmentArgument < 1 ,         "SliceSegmentArgument should be larger than or equal to 1" );
-  }
-#endif
 
-#if HEVC_TILES_WPP
-  bool tileFlag = (m_numTileColumnsMinus1 > 0 || m_numTileRowsMinus1 > 0 );
-  if (m_profile!=Profile::HIGHTHROUGHPUTREXT)
-  {
-    xConfirmPara( tileFlag && m_entropyCodingSyncEnabledFlag, "Tiles and entropy-coding-sync (Wavefronts) can not be applied together, except in the High Throughput Intra 4:4:4 16 profile");
-  }
-#endif
+
 
   xConfirmPara( m_iSourceWidth  % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Picture width must be an integer multiple of the specified chroma subsampling");
   xConfirmPara( m_iSourceHeight % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Picture height must be an integer multiple of the specified chroma subsampling");
@@ -2334,15 +2546,6 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_confWinTop    % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Top conformance window offset must be an integer multiple of the specified chroma subsampling");
   xConfirmPara( m_confWinBottom % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Bottom conformance window offset must be an integer multiple of the specified chroma subsampling");
 
-  xConfirmPara( m_defaultDisplayWindowFlag && !m_vuiParametersPresentFlag, "VUI needs to be enabled for default display window");
-
-  if (m_defaultDisplayWindowFlag)
-  {
-    xConfirmPara( m_defDispWinLeftOffset   % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Left default display window offset must be an integer multiple of the specified chroma subsampling");
-    xConfirmPara( m_defDispWinRightOffset  % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Right default display window offset must be an integer multiple of the specified chroma subsampling");
-    xConfirmPara( m_defDispWinTopOffset    % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Top default display window offset must be an integer multiple of the specified chroma subsampling");
-    xConfirmPara( m_defDispWinBottomOffset % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Bottom default display window offset must be an integer multiple of the specified chroma subsampling");
-  }
 
   // max CU width and height should be power of 2
   uint32_t ui = m_uiMaxCUWidth;
@@ -2373,7 +2576,11 @@ bool EncAppCfg::xCheckParameter()
     m_GOPList[0].m_betaOffsetDiv2 = 0;
     m_GOPList[0].m_tcOffsetDiv2 = 0;
     m_GOPList[0].m_POC = 1;
-    m_GOPList[0].m_numRefPicsActive = 4;
+    m_RPLList0[0] = RPLEntry();
+    m_RPLList1[0] = RPLEntry();
+    m_RPLList0[0].m_POC = m_RPLList1[0].m_POC = 1;
+    m_RPLList0[0].m_numRefPicsActive = 4;
+    m_GOPList[0].m_numRefPicsActive0 = 4;
   }
   else
   {
@@ -2385,8 +2592,7 @@ bool EncAppCfg::xCheckParameter()
   bool errorGOP=false;
   int checkGOP=1;
   int numRefs = m_isField ? 2 : 1;
-  int refList[MAX_NUM_REF_PICS+1];
-  refList[0]=0;
+  int refList[MAX_NUM_REF_PICS+1] = {0};
   if(m_isField)
   {
     refList[1] = 1;
@@ -2430,202 +2636,253 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( abs(m_sliceChromaQpOffsetIntraOrPeriodic[1]  + m_crQpOffset ) > 12, "Intra/periodic Cr QP Offset, when combined with the PPS Cr offset, exceeds supported range (-12 to 12)" );
 #endif
 
-  m_extraRPSs=0;
+  xConfirmPara( m_fastLocalDualTreeMode < 0 || m_fastLocalDualTreeMode > 2, "FastLocalDualTreeMode must be in range [0..2]" );
+
+  int extraRPLs = 0;
   //start looping through frames in coding order until we can verify that the GOP structure is correct.
-  while(!verifiedGOP&&!errorGOP)
+  while (!verifiedGOP && !errorGOP)
   {
-    int curGOP = (checkGOP-1)%m_iGOPSize;
-    int curPOC = ((checkGOP - 1) / m_iGOPSize)*m_iGOPSize * multipleFactor + m_GOPList[curGOP].m_POC;
-    if(m_GOPList[curGOP].m_POC<0)
+    int curGOP = (checkGOP - 1) % m_iGOPSize;
+    int curPOC = ((checkGOP - 1) / m_iGOPSize)*m_iGOPSize * multipleFactor + m_RPLList0[curGOP].m_POC;
+    if (m_RPLList0[curGOP].m_POC < 0 || m_RPLList1[curGOP].m_POC < 0)
     {
-      msg( WARNING, "\nError: found fewer Reference Picture Sets than GOPSize\n");
-      errorGOP=true;
+      msg(WARNING, "\nError: found fewer Reference Picture Sets than GOPSize\n");
+      errorGOP = true;
     }
     else
     {
       //check that all reference pictures are available, or have a POC < 0 meaning they might be available in the next GOP.
       bool beforeI = false;
-      for(int i = 0; i< m_GOPList[curGOP].m_numRefPics; i++)
+      for (int i = 0; i< m_RPLList0[curGOP].m_numRefPics; i++)
       {
-        int absPOC = curPOC+m_GOPList[curGOP].m_referencePics[i];
-        if(absPOC < 0)
+        int absPOC = curPOC - m_RPLList0[curGOP].m_deltaRefPics[i];
+        if (absPOC < 0)
         {
-          beforeI=true;
+          beforeI = true;
         }
         else
         {
-          bool found=false;
-          for(int j=0; j<numRefs; j++)
+          bool found = false;
+          for (int j = 0; j<numRefs; j++)
           {
-            if(refList[j]==absPOC)
+            if (refList[j] == absPOC)
             {
-              found=true;
-              for(int k=0; k<m_iGOPSize; k++)
+              found = true;
+              for (int k = 0; k<m_iGOPSize; k++)
               {
-                if (absPOC % (m_iGOPSize * multipleFactor) == m_GOPList[k].m_POC % (m_iGOPSize * multipleFactor))
+                if (absPOC % (m_iGOPSize * multipleFactor) == m_RPLList0[k].m_POC % (m_iGOPSize * multipleFactor))
                 {
-                  if(m_GOPList[k].m_temporalId==m_GOPList[curGOP].m_temporalId)
+                  if (m_RPLList0[k].m_temporalId == m_RPLList0[curGOP].m_temporalId)
                   {
-                    m_GOPList[k].m_refPic = true;
+                    m_RPLList0[k].m_refPic = true;
                   }
-                  m_GOPList[curGOP].m_usedByCurrPic[i]=m_GOPList[k].m_temporalId<=m_GOPList[curGOP].m_temporalId;
                 }
               }
             }
           }
-          if(!found)
+          if (!found)
           {
-            msg( WARNING, "\nError: ref pic %d is not available for GOP frame %d\n",m_GOPList[curGOP].m_referencePics[i],curGOP+1);
-            errorGOP=true;
+            msg(WARNING, "\nError: ref pic %d is not available for GOP frame %d\n", m_RPLList0[curGOP].m_deltaRefPics[i], curGOP + 1);
+            errorGOP = true;
           }
         }
       }
-      if(!beforeI&&!errorGOP)
+      if (!beforeI && !errorGOP)
       {
         //all ref frames were present
-        if(!isOK[curGOP])
+        if (!isOK[curGOP])
         {
           numOK++;
-          isOK[curGOP]=true;
-          if(numOK==m_iGOPSize)
+          isOK[curGOP] = true;
+          if (numOK == m_iGOPSize)
           {
-            verifiedGOP=true;
+            verifiedGOP = true;
           }
         }
       }
       else
       {
-        //create a new GOPEntry for this frame containing all the reference pictures that were available (POC > 0)
-        m_GOPList[m_iGOPSize+m_extraRPSs]=m_GOPList[curGOP];
-        int newRefs=0;
-        for(int i = 0; i< m_GOPList[curGOP].m_numRefPics; i++)
+        //create a new RPLEntry for this frame containing all the reference pictures that were available (POC > 0)
+        m_RPLList0[m_iGOPSize + extraRPLs] = m_RPLList0[curGOP];
+        m_RPLList1[m_iGOPSize + extraRPLs] = m_RPLList1[curGOP];
+        int newRefs0 = 0;
+        for (int i = 0; i< m_RPLList0[curGOP].m_numRefPics; i++)
         {
-          int absPOC = curPOC+m_GOPList[curGOP].m_referencePics[i];
-          if(absPOC>=0)
+          int absPOC = curPOC - m_RPLList0[curGOP].m_deltaRefPics[i];
+          if (absPOC >= 0)
           {
-            m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[newRefs]=m_GOPList[curGOP].m_referencePics[i];
-            m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[newRefs]=m_GOPList[curGOP].m_usedByCurrPic[i];
-            newRefs++;
+            m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[newRefs0] = m_RPLList0[curGOP].m_deltaRefPics[i];
+            newRefs0++;
           }
         }
-        int numPrefRefs = m_GOPList[curGOP].m_numRefPicsActive;
+        int numPrefRefs0 = m_RPLList0[curGOP].m_numRefPicsActive;
 
-        for(int offset = -1; offset>-checkGOP; offset--)
+        int newRefs1 = 0;
+        for (int i = 0; i< m_RPLList1[curGOP].m_numRefPics; i++)
+        {
+          int absPOC = curPOC - m_RPLList1[curGOP].m_deltaRefPics[i];
+          if (absPOC >= 0)
+          {
+            m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[newRefs1] = m_RPLList1[curGOP].m_deltaRefPics[i];
+            newRefs1++;
+          }
+        }
+        int numPrefRefs1 = m_RPLList1[curGOP].m_numRefPicsActive;
+
+        for (int offset = -1; offset>-checkGOP; offset--)
         {
           //step backwards in coding order and include any extra available pictures we might find useful to replace the ones with POC < 0.
-          int offGOP = (checkGOP-1+offset)%m_iGOPSize;
-          int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_GOPList[offGOP].m_POC;
-          if(offPOC>=0&&m_GOPList[offGOP].m_temporalId<=m_GOPList[curGOP].m_temporalId)
+          int offGOP = (checkGOP - 1 + offset) % m_iGOPSize;
+          int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_RPLList0[offGOP].m_POC;
+          if (offPOC >= 0 && m_RPLList0[offGOP].m_temporalId <= m_RPLList0[curGOP].m_temporalId)
           {
-            bool newRef=false;
-            for(int i=0; i<numRefs; i++)
+            bool newRef = false;
+            for (int i = 0; i<(newRefs0 + newRefs1); i++)
             {
-              if(refList[i]==offPOC)
+              if (refList[i] == offPOC)
               {
-                newRef=true;
+                newRef = true;
               }
             }
-            for(int i=0; i<newRefs; i++)
+            for (int i = 0; i<newRefs0; i++)
             {
-              if(m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[i]==offPOC-curPOC)
+              if (m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[i] == curPOC - offPOC)
               {
-                newRef=false;
+                newRef = false;
               }
             }
-            if(newRef)
+            if (newRef)
             {
-              int insertPoint=newRefs;
+              int insertPoint = newRefs0;
               //this picture can be added, find appropriate place in list and insert it.
-              if(m_GOPList[offGOP].m_temporalId==m_GOPList[curGOP].m_temporalId)
+              if (m_RPLList0[offGOP].m_temporalId == m_RPLList0[curGOP].m_temporalId)
               {
-                m_GOPList[offGOP].m_refPic = true;
+                m_RPLList0[offGOP].m_refPic = true;
               }
-              for(int j=0; j<newRefs; j++)
+              for (int j = 0; j<newRefs0; j++)
               {
-                if(m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]<offPOC-curPOC||m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]>0)
+                if (m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[j] > curPOC - offPOC && curPOC - offPOC > 0)
                 {
                   insertPoint = j;
                   break;
                 }
               }
-              int prev = offPOC-curPOC;
-              int prevUsed = m_GOPList[offGOP].m_temporalId<=m_GOPList[curGOP].m_temporalId;
-              for(int j=insertPoint; j<newRefs+1; j++)
+              int prev = curPOC - offPOC;
+              for (int j = insertPoint; j<newRefs0 + 1; j++)
               {
-                int newPrev = m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j];
-                int newUsed = m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[j];
-                m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j]=prev;
-                m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[j]=prevUsed;
-                prevUsed=newUsed;
-                prev=newPrev;
+                int newPrev = m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[j];
+                m_RPLList0[m_iGOPSize + extraRPLs].m_deltaRefPics[j] = prev;
+                prev = newPrev;
               }
-              newRefs++;
+              newRefs0++;
             }
           }
-          if(newRefs>=numPrefRefs)
+          if (newRefs0 >= numPrefRefs0)
           {
             break;
           }
         }
-        m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefPics=newRefs;
-        m_GOPList[m_iGOPSize+m_extraRPSs].m_POC = curPOC;
-        if (m_extraRPSs == 0)
-        {
-          m_GOPList[m_iGOPSize+m_extraRPSs].m_interRPSPrediction = 0;
-          m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefIdc = 0;
-        }
-        else
+
+        for (int offset = -1; offset>-checkGOP; offset--)
         {
-          int rIdx =  m_iGOPSize + m_extraRPSs - 1;
-          int refPOC = m_GOPList[rIdx].m_POC;
-          int refPics = m_GOPList[rIdx].m_numRefPics;
-          int newIdc=0;
-          for(int i = 0; i<= refPics; i++)
+          //step backwards in coding order and include any extra available pictures we might find useful to replace the ones with POC < 0.
+          int offGOP = (checkGOP - 1 + offset) % m_iGOPSize;
+          int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_RPLList1[offGOP].m_POC;
+          if (offPOC >= 0 && m_RPLList1[offGOP].m_temporalId <= m_RPLList1[curGOP].m_temporalId)
           {
-            int deltaPOC = ((i != refPics)? m_GOPList[rIdx].m_referencePics[i] : 0);  // check if the reference abs POC is >= 0
-            int absPOCref = refPOC+deltaPOC;
-            int refIdc = 0;
-            for (int j = 0; j < m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefPics; j++)
+            bool newRef = false;
+            for (int i = 0; i<(newRefs0 + newRefs1); i++)
             {
-              if ( (absPOCref - curPOC) == m_GOPList[m_iGOPSize+m_extraRPSs].m_referencePics[j])
+              if (refList[i] == offPOC)
               {
-                if (m_GOPList[m_iGOPSize+m_extraRPSs].m_usedByCurrPic[j])
-                {
-                  refIdc = 1;
-                }
-                else
+                newRef = true;
+              }
+            }
+            for (int i = 0; i<newRefs1; i++)
+            {
+              if (m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[i] == curPOC - offPOC)
+              {
+                newRef = false;
+              }
+            }
+            if (newRef)
+            {
+              int insertPoint = newRefs1;
+              //this picture can be added, find appropriate place in list and insert it.
+              if (m_RPLList1[offGOP].m_temporalId == m_RPLList1[curGOP].m_temporalId)
+              {
+                m_RPLList1[offGOP].m_refPic = true;
+              }
+              for (int j = 0; j<newRefs1; j++)
+              {
+                if (m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[j] > curPOC - offPOC && curPOC - offPOC > 0)
                 {
-                  refIdc = 2;
+                  insertPoint = j;
+                  break;
                 }
               }
+              int prev = curPOC - offPOC;
+              for (int j = insertPoint; j<newRefs1 + 1; j++)
+              {
+                int newPrev = m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[j];
+                m_RPLList1[m_iGOPSize + extraRPLs].m_deltaRefPics[j] = prev;
+                prev = newPrev;
+              }
+              newRefs1++;
             }
-            m_GOPList[m_iGOPSize+m_extraRPSs].m_refIdc[newIdc]=refIdc;
-            newIdc++;
           }
-          m_GOPList[m_iGOPSize+m_extraRPSs].m_interRPSPrediction = 1;
-          m_GOPList[m_iGOPSize+m_extraRPSs].m_numRefIdc = newIdc;
-          m_GOPList[m_iGOPSize+m_extraRPSs].m_deltaRPS = refPOC - m_GOPList[m_iGOPSize+m_extraRPSs].m_POC;
+          if (newRefs1 >= numPrefRefs1)
+          {
+            break;
+          }
         }
-        curGOP=m_iGOPSize+m_extraRPSs;
-        m_extraRPSs++;
+
+        m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPics = newRefs0;
+        m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPicsActive = min(m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPics, m_RPLList0[m_iGOPSize + extraRPLs].m_numRefPicsActive);
+        m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPics = newRefs1;
+        m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPicsActive = min(m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPics, m_RPLList1[m_iGOPSize + extraRPLs].m_numRefPicsActive);
+        curGOP = m_iGOPSize + extraRPLs;
+        extraRPLs++;
       }
-      numRefs=0;
-      for(int i = 0; i< m_GOPList[curGOP].m_numRefPics; i++)
+      numRefs = 0;
+      for (int i = 0; i< m_RPLList0[curGOP].m_numRefPics; i++)
       {
-        int absPOC = curPOC+m_GOPList[curGOP].m_referencePics[i];
-        if(absPOC >= 0)
+        int absPOC = curPOC - m_RPLList0[curGOP].m_deltaRefPics[i];
+        if (absPOC >= 0)
         {
-          refList[numRefs]=absPOC;
+          refList[numRefs] = absPOC;
           numRefs++;
         }
       }
-      refList[numRefs]=curPOC;
+      for (int i = 0; i< m_RPLList1[curGOP].m_numRefPics; i++)
+      {
+        int absPOC = curPOC - m_RPLList1[curGOP].m_deltaRefPics[i];
+        if (absPOC >= 0)
+        {
+          bool alreadyExist = false;
+          for (int j = 0; !alreadyExist && j < numRefs; j++)
+          {
+            if (refList[j] == absPOC)
+            {
+              alreadyExist = true;
+            }
+          }
+          if (!alreadyExist)
+          {
+            refList[numRefs] = absPOC;
+            numRefs++;
+          }
+        }
+      }
+      refList[numRefs] = curPOC;
       numRefs++;
     }
     checkGOP++;
   }
-  xConfirmPara(errorGOP,"Invalid GOP structure given");
+  xConfirmPara(errorGOP, "Invalid GOP structure given");
+
   m_maxTempLayer = 1;
+
   for(int i=0; i<m_iGOPSize; i++)
   {
     if(m_GOPList[i].m_temporalId >= m_maxTempLayer)
@@ -2641,9 +2898,19 @@ bool EncAppCfg::xCheckParameter()
   }
   for(int i=0; i<m_iGOPSize; i++)
   {
-    if(m_GOPList[i].m_numRefPics+1 > m_maxDecPicBuffering[m_GOPList[i].m_temporalId])
+    int numRefPic = m_RPLList0[i].m_numRefPics;
+    for (int tmp = 0; tmp < m_RPLList1[i].m_numRefPics; tmp++)
+    {
+      bool notSame = true;
+      for (int jj = 0; notSame && jj < m_RPLList0[i].m_numRefPics; jj++)
+      {
+        if (m_RPLList1[i].m_deltaRefPics[tmp] == m_RPLList0[i].m_deltaRefPics[jj]) notSame = false;
+      }
+      if (notSame) numRefPic++;
+    }
+    if (numRefPic + 1 > m_maxDecPicBuffering[m_GOPList[i].m_temporalId])
     {
-      m_maxDecPicBuffering[m_GOPList[i].m_temporalId] = m_GOPList[i].m_numRefPics + 1;
+      m_maxDecPicBuffering[m_GOPList[i].m_temporalId] = numRefPic + 1;
     }
     int highestDecodingNumberWithLowerPOC = 0;
     for(int j=0; j<m_iGOPSize; j++)
@@ -2667,6 +2934,7 @@ bool EncAppCfg::xCheckParameter()
       m_numReorderPics[m_GOPList[i].m_temporalId] = numReorder;
     }
   }
+
   for(int i=0; i<MAX_TLAYER-1; i++)
   {
     // a lower layer can not have higher value of m_numReorderPics than a higher layer
@@ -2692,91 +2960,267 @@ bool EncAppCfg::xCheckParameter()
     m_maxDecPicBuffering[MAX_TLAYER-1] = m_numReorderPics[MAX_TLAYER-1] + 1;
   }
 
-  if(m_vuiParametersPresentFlag && m_bitstreamRestrictionFlag)
+  if( m_picPartitionFlag ) 
   {
-    int PicSizeInSamplesY =  m_iSourceWidth * m_iSourceHeight;
-#if HEVC_TILES_WPP
-    if(tileFlag)
+    PPS pps;
+    uint32_t colIdx, rowIdx;
+    uint32_t remSize;
+ 
+    pps.setPicWidthInLumaSamples( m_iSourceWidth );
+    pps.setPicHeightInLumaSamples( m_iSourceHeight );
+    pps.setLog2CtuSize( floorLog2(m_uiCTUSize) );
+
+    // set default tile column if not provided
+    if( m_tileColumnWidth.size() == 0 ) 
+    {
+      m_tileColumnWidth.push_back( pps.getPicWidthInCtu() );
+    }
+    // set default tile row if not provided
+    if( m_tileRowHeight.size() == 0 ) 
+    {
+      m_tileRowHeight.push_back( pps.getPicHeightInCtu() );
+    }
+
+    // remove any tile columns that can be specified implicitly
+    while( m_tileColumnWidth.size() > 1 && m_tileColumnWidth.end()[-1] == m_tileColumnWidth.end()[-2] )
+    {
+      m_tileColumnWidth.pop_back();
+    }
+
+    // remove any tile rows that can be specified implicitly
+    while( m_tileRowHeight.size() > 1 && m_tileRowHeight.end()[-1] == m_tileRowHeight.end()[-2] )
+    {
+      m_tileRowHeight.pop_back();
+    }
+
+    // setup tiles in temporary PPS structure
+    remSize = pps.getPicWidthInCtu();
+    for( colIdx=0; remSize > 0 && colIdx<m_tileColumnWidth.size(); colIdx++ )
     {
-      int maxTileWidth = 0;
-      int maxTileHeight = 0;
-      int widthInCU = (m_iSourceWidth % m_uiMaxCUWidth) ? m_iSourceWidth/m_uiMaxCUWidth + 1: m_iSourceWidth/m_uiMaxCUWidth;
-      int heightInCU = (m_iSourceHeight % m_uiMaxCUHeight) ? m_iSourceHeight/m_uiMaxCUHeight + 1: m_iSourceHeight/m_uiMaxCUHeight;
-      if(m_tileUniformSpacingFlag)
+      xConfirmPara(m_tileColumnWidth[ colIdx ] == 0, "Tile column widths cannot be equal to 0");
+      m_tileColumnWidth[ colIdx ] = std::min( remSize, m_tileColumnWidth[ colIdx ]);
+      pps.addTileColumnWidth( m_tileColumnWidth[ colIdx ] );
+      remSize -= m_tileColumnWidth[ colIdx ];
+    }
+    m_tileColumnWidth.resize( colIdx );
+    pps.setNumExpTileColumns( (uint32_t)m_tileColumnWidth.size() );    
+    remSize = pps.getPicHeightInCtu();
+    for( rowIdx=0; remSize > 0 && rowIdx<m_tileRowHeight.size(); rowIdx++ )
+    {
+      xConfirmPara(m_tileRowHeight[ rowIdx ] == 0, "Tile row heights cannot be equal to 0");
+      m_tileRowHeight[ rowIdx ] = std::min( remSize, m_tileRowHeight[ rowIdx ]);
+      pps.addTileRowHeight( m_tileRowHeight[ rowIdx ] );
+      remSize -= m_tileRowHeight[ rowIdx ];
+    }
+    m_tileRowHeight.resize( rowIdx );
+    pps.setNumExpTileRows( (uint32_t)m_tileRowHeight.size() );
+    pps.initTiles();
+    xConfirmPara(pps.getNumTileColumns() > getMaxTileColsByLevel( m_level ), "Number of tile columns exceeds maximum number allowed according to specified level");
+    xConfirmPara(pps.getNumTileRows()    > getMaxTileRowsByLevel( m_level ), "Number of tile rows exceeds maximum number allowed according to specified level");
+    m_numTileCols = pps.getNumTileColumns();
+    m_numTileRows = pps.getNumTileRows();
+
+    // rectangular slices
+    if( !m_rasterSliceFlag )
+    {
+      uint32_t sliceIdx;
+      bool     needTileIdxDelta = false;
+
+      // generate slice list for the simplified fixed-rectangular-slice-size config option
+      if( m_rectSliceFixedWidth > 0 && m_rectSliceFixedHeight > 0 )
       {
-        maxTileWidth = m_uiMaxCUWidth*((widthInCU+m_numTileColumnsMinus1)/(m_numTileColumnsMinus1+1));
-        maxTileHeight = m_uiMaxCUHeight*((heightInCU+m_numTileRowsMinus1)/(m_numTileRowsMinus1+1));
-        // if only the last tile-row is one treeblock higher than the others
-        // the maxTileHeight becomes smaller if the last row of treeblocks has lower height than the others
-        if(!((heightInCU-1)%(m_numTileRowsMinus1+1)))
-        {
-          maxTileHeight = maxTileHeight - m_uiMaxCUHeight + (m_iSourceHeight % m_uiMaxCUHeight);
-        }
-        // if only the last tile-column is one treeblock wider than the others
-        // the maxTileWidth becomes smaller if the last column of treeblocks has lower width than the others
-        if(!((widthInCU-1)%(m_numTileColumnsMinus1+1)))
+        int tileIdx = 0;
+        m_rectSlicePos.clear();
+        while( tileIdx < pps.getNumTiles() ) 
         {
-          maxTileWidth = maxTileWidth - m_uiMaxCUWidth + (m_iSourceWidth % m_uiMaxCUWidth);
+          uint32_t startTileX = tileIdx % pps.getNumTileColumns();
+          uint32_t startTileY = tileIdx / pps.getNumTileColumns();
+          uint32_t startCtuX  = pps.getTileColumnBd( startTileX );
+          uint32_t startCtuY  = pps.getTileRowBd( startTileY );
+          uint32_t stopCtuX   = (startTileX + m_rectSliceFixedWidth)  >= pps.getNumTileColumns() ? pps.getPicWidthInCtu() - 1  : pps.getTileColumnBd( startTileX + m_rectSliceFixedWidth ) - 1;
+          uint32_t stopCtuY   = (startTileY + m_rectSliceFixedHeight) >= pps.getNumTileRows()    ? pps.getPicHeightInCtu() - 1 : pps.getTileRowBd( startTileY + m_rectSliceFixedHeight ) - 1;
+          uint32_t stopTileX  = pps.ctuToTileCol( stopCtuX );
+          uint32_t stopTileY  = pps.ctuToTileRow( stopCtuY );
+          
+          // add rectangular slice to list
+          m_rectSlicePos.push_back( startCtuY * pps.getPicWidthInCtu() + startCtuX );          
+          m_rectSlicePos.push_back( stopCtuY  * pps.getPicWidthInCtu() + stopCtuX  );
+          
+          // get slice size in tiles
+          uint32_t sliceWidth  = stopTileX - startTileX + 1;
+          uint32_t sliceHeight = stopTileY - startTileY + 1;
+
+          // move to next tile in raster scan order
+          tileIdx += sliceWidth;
+          if( tileIdx % pps.getNumTileColumns() == 0 )
+          {
+            tileIdx += (sliceHeight - 1) * pps.getNumTileColumns();
+          }
         }
       }
-      else // not uniform spacing
+
+      xConfirmPara( m_rectSlicePos.size() & 1, "Odd number of rectangular slice positions provided. Rectangular slice positions must be specified in pairs of (top-left / bottom-right) raster-scan CTU addresses.");
+      
+      // set default slice size if not provided
+      if( m_rectSlicePos.size() == 0 ) 
       {
-        if(m_numTileColumnsMinus1<1)
+        m_rectSlicePos.push_back( 0 );
+        m_rectSlicePos.push_back( pps.getPicWidthInCtu() * pps.getPicHeightInCtu() - 1 );
+      }
+      pps.setNumSlicesInPic( (uint32_t)(m_rectSlicePos.size() >> 1) );
+      xConfirmPara(pps.getNumSlicesInPic() > getMaxSlicesByLevel( m_level ), "Number of rectangular slices exceeds maximum number allowed according to specified level");
+      pps.initRectSlices();
+
+      // set slice parameters from CTU addresses
+      for( sliceIdx = 0; sliceIdx < pps.getNumSlicesInPic(); sliceIdx++ )
+      {
+        xConfirmPara( m_rectSlicePos[2*sliceIdx]     >= pps.getPicWidthInCtu() * pps.getPicHeightInCtu(), "Rectangular slice position exceeds total number of CTU in picture.");
+        xConfirmPara( m_rectSlicePos[2*sliceIdx + 1] >= pps.getPicWidthInCtu() * pps.getPicHeightInCtu(), "Rectangular slice position exceeds total number of CTU in picture.");
+
+        // map raster scan CTU address to X/Y position
+        uint32_t startCtuX = m_rectSlicePos[2*sliceIdx]     % pps.getPicWidthInCtu();
+        uint32_t startCtuY = m_rectSlicePos[2*sliceIdx]     / pps.getPicWidthInCtu();
+        uint32_t stopCtuX  = m_rectSlicePos[2*sliceIdx + 1] % pps.getPicWidthInCtu();
+        uint32_t stopCtuY  = m_rectSlicePos[2*sliceIdx + 1] / pps.getPicWidthInCtu();
+        
+        // get corresponding tile index
+        uint32_t startTileX = pps.ctuToTileCol( startCtuX );
+        uint32_t startTileY = pps.ctuToTileRow( startCtuY );
+        uint32_t stopTileX  = pps.ctuToTileCol( stopCtuX );
+        uint32_t stopTileY  = pps.ctuToTileRow( stopCtuY );
+        uint32_t tileIdx    = startTileY * pps.getNumTileColumns() + startTileX;
+
+        // get slice size in tiles
+        uint32_t sliceWidth  = stopTileX - startTileX + 1;
+        uint32_t sliceHeight = stopTileY - startTileY + 1;
+        
+        // check for slice / tile alignment
+        xConfirmPara( startCtuX != pps.getTileColumnBd( startTileX ), "Rectangular slice position does not align with a left tile edge.");
+        xConfirmPara( stopCtuX  != (pps.getTileColumnBd( stopTileX + 1 ) - 1), "Rectangular slice position does not align with a right tile edge.");
+        if( sliceWidth > 1 || sliceHeight > 1 )
         {
-          maxTileWidth = m_iSourceWidth;
+          xConfirmPara( startCtuY != pps.getTileRowBd( startTileY ), "Rectangular slice position does not align with a top tile edge.");
+          xConfirmPara( stopCtuY  != (pps.getTileRowBd( stopTileY + 1 ) - 1), "Rectangular slice position does not align with a bottom tile edge.");
         }
-        else
+
+        // set slice size and tile index
+        pps.setSliceWidthInTiles( sliceIdx, sliceWidth );
+        pps.setSliceHeightInTiles( sliceIdx, sliceHeight );
+        pps.setSliceTileIdx( sliceIdx, tileIdx );
+        if( sliceIdx > 0 && !needTileIdxDelta )
         {
-          int accColumnWidth = 0;
-          for(int col=0; col<(m_numTileColumnsMinus1); col++)
+          uint32_t lastTileIdx = pps.getSliceTileIdx( sliceIdx-1 );
+          lastTileIdx += pps.getSliceWidthInTiles( sliceIdx-1 );
+          if( lastTileIdx % pps.getNumTileColumns() == 0)
           {
-            maxTileWidth = m_tileColumnWidth[col]>maxTileWidth ? m_tileColumnWidth[col]:maxTileWidth;
-            accColumnWidth += m_tileColumnWidth[col];
+            lastTileIdx += (pps.getSliceHeightInTiles( sliceIdx-1 ) - 1) * pps.getNumTileColumns();
+          }
+          if( lastTileIdx != tileIdx )
+          {
+            needTileIdxDelta = true;
           }
-          maxTileWidth = (widthInCU-accColumnWidth)>maxTileWidth ? m_uiMaxCUWidth*(widthInCU-accColumnWidth):m_uiMaxCUWidth*maxTileWidth;
-        }
-        if(m_numTileRowsMinus1<1)
-        {
-          maxTileHeight = m_iSourceHeight;
         }
-        else
+
+        // special case for multiple slices within a single tile
+        if( sliceWidth == 1 && sliceHeight == 1 )
         {
-          int accRowHeight = 0;
-          for(int row=0; row<(m_numTileRowsMinus1); row++)
+          uint32_t firstSliceIdx = sliceIdx;
+          uint32_t numSlicesInTile = 1;
+          pps.setSliceHeightInCtu( sliceIdx, stopCtuY - startCtuY + 1 );
+          
+          while( sliceIdx < pps.getNumSlicesInPic()-1 ) 
           {
-            maxTileHeight = m_tileRowHeight[row]>maxTileHeight ? m_tileRowHeight[row]:maxTileHeight;
-            accRowHeight += m_tileRowHeight[row];
+            uint32_t nextTileIdx;
+            startCtuX   = m_rectSlicePos[2*(sliceIdx+1)]     % pps.getPicWidthInCtu();
+            startCtuY   = m_rectSlicePos[2*(sliceIdx+1)]     / pps.getPicWidthInCtu();
+            stopCtuX    = m_rectSlicePos[2*(sliceIdx+1) + 1] % pps.getPicWidthInCtu();
+            stopCtuY    = m_rectSlicePos[2*(sliceIdx+1) + 1] / pps.getPicWidthInCtu();          
+            startTileX  = pps.ctuToTileCol( startCtuX );
+            startTileY  = pps.ctuToTileRow( startCtuY );
+            stopTileX   = pps.ctuToTileCol( stopCtuX );
+            stopTileY   = pps.ctuToTileRow( stopCtuY );
+            nextTileIdx = startTileY * pps.getNumTileColumns() + startTileX;
+            sliceWidth  = stopTileX - startTileX + 1;
+            sliceHeight = stopTileY - startTileY + 1;
+            if(nextTileIdx != tileIdx || sliceWidth != 1 || sliceHeight != 1) 
+            {
+              break;
+            }
+            numSlicesInTile++;
+            sliceIdx++;
+            pps.setSliceWidthInTiles( sliceIdx, 1 );
+            pps.setSliceHeightInTiles( sliceIdx, 1 );
+            pps.setSliceTileIdx( sliceIdx, tileIdx );    
+            pps.setSliceHeightInCtu( sliceIdx, stopCtuY - startCtuY + 1 );
           }
-          maxTileHeight = (heightInCU-accRowHeight)>maxTileHeight ? m_uiMaxCUHeight*(heightInCU-accRowHeight):m_uiMaxCUHeight*maxTileHeight;
+          pps.setNumSlicesInTile( firstSliceIdx, numSlicesInTile );
         }
       }
-      int maxSizeInSamplesY = maxTileWidth*maxTileHeight;
-      m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/maxSizeInSamplesY-4;
-    }
-    else if(m_entropyCodingSyncEnabledFlag)
-    {
-      m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/((2*m_iSourceHeight+m_iSourceWidth)*m_uiMaxCUHeight)-4;
-    }
-    else if(m_sliceMode == FIXED_NUMBER_OF_CTU)
-    {
-      m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/(m_sliceArgument*m_uiMaxCUWidth*m_uiMaxCUHeight)-4;
+      pps.setTileIdxDeltaPresentFlag( needTileIdxDelta );
+      m_tileIdxDeltaPresentFlag = needTileIdxDelta;
+      
+      // check rectangular slice mapping and full picture CTU coverage
+      pps.initRectSliceMap();
+
+      // store rectangular slice parameters from temporary PPS structure
+      m_numSlicesInPic = pps.getNumSlicesInPic();
+      m_rectSlices.resize( pps.getNumSlicesInPic() );
+      for( sliceIdx = 0; sliceIdx < pps.getNumSlicesInPic(); sliceIdx++ )
+      {
+        m_rectSlices[sliceIdx].setSliceWidthInTiles( pps.getSliceWidthInTiles(sliceIdx) );
+        m_rectSlices[sliceIdx].setSliceHeightInTiles( pps.getSliceHeightInTiles(sliceIdx) );
+        m_rectSlices[sliceIdx].setNumSlicesInTile( pps.getNumSlicesInTile(sliceIdx) );
+        m_rectSlices[sliceIdx].setSliceHeightInCtu( pps.getSliceHeightInCtu(sliceIdx) );
+        m_rectSlices[sliceIdx].setTileIdx( pps.getSliceTileIdx(sliceIdx) );
+      }
     }
+    // raster-scan slices
     else
     {
-      m_minSpatialSegmentationIdc = 0;
-    }
-#else
-    if(m_sliceMode == FIXED_NUMBER_OF_CTU)
-    {
-      m_minSpatialSegmentationIdc = 4*PicSizeInSamplesY/(m_sliceArgument*m_uiMaxCUWidth*m_uiMaxCUHeight)-4;
+      uint32_t listIdx = 0;
+      uint32_t remTiles = pps.getNumTiles();
+
+      // set default slice size if not provided
+      if( m_rasterSliceSize.size() == 0 ) 
+      {
+        m_rasterSliceSize.push_back( remTiles );
+      }
+
+      // set raster slice sizes
+      while( remTiles > 0 )
+      {
+        // truncate if size exceeds number of remaining tiles
+        if( listIdx < m_rasterSliceSize.size() )
+        {
+          m_rasterSliceSize[listIdx] = std::min( remTiles, m_rasterSliceSize[listIdx] );
+          remTiles -= m_rasterSliceSize[listIdx];
+        }
+        // replicate last size uniformly as needed to cover the remainder of the picture
+        else
+        {
+          m_rasterSliceSize.push_back( std::min( remTiles, m_rasterSliceSize.back() ) );
+          remTiles -= m_rasterSliceSize.back();
+        }
+        listIdx++;
+      }
+      // shrink list if too many sizes were provided
+      m_rasterSliceSize.resize( listIdx );
+      
+      m_numSlicesInPic = (uint32_t)m_rasterSliceSize.size();
+      xConfirmPara(m_rasterSliceSize.size() > getMaxSlicesByLevel( m_level ), "Number of raster-scan slices exceeds maximum number allowed according to specified level");
     }
-#endif
+  }
+  else 
+  {
+    m_numTileCols = 1;
+    m_numTileRows = 1;
+    m_numSlicesInPic = 1;
   }
 
-  if ((m_MCTSEncConstraint) && (m_bLFCrossTileBoundaryFlag))
+  if ((m_MCTSEncConstraint) && (!m_disableLFCrossTileBoundaryFlag))
   {
     printf("Warning: Constrained Encoding for Motion Constrained Tile Sets (MCTS) is enabled. Disabling filtering across tile boundaries!\n");
-    m_bLFCrossTileBoundaryFlag = false;
+    m_disableLFCrossTileBoundaryFlag = true;
   }
   if ((m_MCTSEncConstraint) && (m_TMVPModeId))
   {
@@ -2795,6 +3239,58 @@ bool EncAppCfg::xCheckParameter()
     m_BIO = false;
   }
 
+  // If m_PPSorSliceFlag is equal to 1, for each PPS parameter below,
+  //     0:  value is signaled in slice header
+  //     >0: value is derived from PPS parameter as value - 1
+  switch (m_PPSorSliceMode)
+  {
+  case 0: // All parameter values are signaled in slice header
+    m_constantSliceHeaderParamsEnabledFlag = 0;
+    m_PPSDepQuantEnabledIdc = 0;
+    m_PPSRefPicListSPSIdc0 = 0;
+    m_PPSRefPicListSPSIdc1 = 0;
+    m_PPSMvdL1ZeroIdc = 0;
+    m_PPSCollocatedFromL0Idc = 0;
+    m_PPSSixMinusMaxNumMergeCandPlus1 = 0;
+    m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = 0;
+    break;
+  case 1: // RA setting
+    m_constantSliceHeaderParamsEnabledFlag = 1;
+    m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1;
+    m_PPSRefPicListSPSIdc0 = 0;
+    m_PPSRefPicListSPSIdc1 = 0;
+    m_PPSMvdL1ZeroIdc = 0;
+    m_PPSCollocatedFromL0Idc = 0;
+    m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1;
+    m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = m_maxNumMergeCand - m_maxNumTriangleCand + 1;
+    break;
+  case 2: // LDB setting
+    m_constantSliceHeaderParamsEnabledFlag = 1;
+    m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1;
+    m_PPSRefPicListSPSIdc0 = 2;
+    m_PPSRefPicListSPSIdc1 = 2;
+    m_PPSMvdL1ZeroIdc = 2;
+    m_PPSCollocatedFromL0Idc = 1;
+    m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1;
+    m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = m_maxNumMergeCand - m_maxNumTriangleCand + 1;
+    break;
+  case 3: // LDP setting
+    m_constantSliceHeaderParamsEnabledFlag = 1;
+    m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1;
+    m_PPSRefPicListSPSIdc0 = 2;
+    m_PPSRefPicListSPSIdc1 = 2;
+    m_PPSMvdL1ZeroIdc = 0;
+    m_PPSCollocatedFromL0Idc = 0;
+    m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1;
+    m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = 0;
+    break;
+  default:
+    THROW("Invalid value for PPSorSliceMode");
+  }
+  xConfirmPara(m_drapPeriod > 0 && m_PPSRefPicListSPSIdc0 > 0, "PPSRefPicListSPSIdc0 shall be 0 when DRAP is used. This can be fixed by setting PPSorSliceMode=0.");
+  xConfirmPara(m_drapPeriod > 0 && m_PPSRefPicListSPSIdc1 > 0, "PPSRefPicListSPSIdc1 shall be 0 when DRAP is used. This can be fixed by setting PPSorSliceMode=0.");
+
+#if HEVC_SEI
   if (m_toneMappingInfoSEIEnabled)
   {
     xConfirmPara( m_toneMapCodedDataBitDepth < 8 || m_toneMapCodedDataBitDepth > 14 , "SEIToneMapCodedDataBitDepth must be in rage 8 to 14");
@@ -2827,6 +3323,8 @@ bool EncAppCfg::xCheckParameter()
     xConfirmPara( (m_chromaFormatIDC == CHROMA_400 ), "chromaResamplingFilterSEI is not allowed to be present when ChromaFormatIDC is equal to zero (4:0:0)" );
     xConfirmPara(m_vuiParametersPresentFlag && m_chromaLocInfoPresentFlag && (m_chromaSampleLocTypeTopField != m_chromaSampleLocTypeBottomField ), "When chromaResamplingFilterSEI is enabled, ChromaSampleLocTypeTopField has to be equal to ChromaSampleLocTypeBottomField" );
   }
+#endif
+  xConfirmPara( m_sariAspectRatioIdc < 0 || m_sariAspectRatioIdc > 255, "SEISARISampleAspectRatioIdc must be in the range of 0 to 255");
 
   if ( m_RCEnableRateControl )
   {
@@ -2853,42 +3351,59 @@ bool EncAppCfg::xCheckParameter()
   {
     xConfirmPara( m_RCCpbSaturationEnabled != 0, "Target bits saturation cannot be processed without Rate control" );
   }
-  if (m_vuiParametersPresentFlag)
-  {
-    xConfirmPara(m_RCTargetBitrate == 0, "A target bit rate is required to be set for VUI/HRD parameters.");
-    if (m_RCCpbSize == 0)
-    {
-      msg( WARNING, "Warning: CPB size is set equal to zero. Adjusting value to be equal to TargetBitrate!\n");
-      m_RCCpbSize = m_RCTargetBitrate;
-    }
-  }
 #endif
 
-  xConfirmPara(!m_TransquantBypassEnabledFlag && m_CUTransquantBypassFlagForce, "CUTransquantBypassFlagForce cannot be 1 when TransquantBypassEnableFlag is 0");
-
-  xConfirmPara(m_log2ParallelMergeLevel < 2, "Log2ParallelMergeLevel should be larger than or equal to 2");
-
   if (m_framePackingSEIEnabled)
   {
     xConfirmPara(m_framePackingSEIType < 3 || m_framePackingSEIType > 5 , "SEIFramePackingType must be in rage 3 to 5");
   }
 
-  if (m_segmentedRectFramePackingSEIEnabled)
+  if( m_erpSEIEnabled && !m_erpSEICancelFlag )
   {
-    xConfirmPara(m_framePackingSEIEnabled , "SEISegmentedRectFramePacking must be 0 when SEIFramePacking is 1");
+    xConfirmPara( m_erpSEIGuardBandType < 0 || m_erpSEIGuardBandType > 8, "SEIEquirectangularprojectionGuardBandType must be in the range of 0 to 7");
+    xConfirmPara( (m_chromaFormatIDC == CHROMA_420 || m_chromaFormatIDC == CHROMA_422) && (m_erpSEILeftGuardBandWidth%2 == 1), "SEIEquirectangularprojectionLeftGuardBandWidth must be an even number for 4:2:0 or 4:2:2 chroma format");
+    xConfirmPara( (m_chromaFormatIDC == CHROMA_420 || m_chromaFormatIDC == CHROMA_422) && (m_erpSEIRightGuardBandWidth%2 == 1), "SEIEquirectangularprojectionRightGuardBandWidth must be an even number for 4:2:0 or 4:2:2 chroma format");
   }
 
-#if HEVC_TILES_WPP
-  if((m_numTileColumnsMinus1 <= 0) && (m_numTileRowsMinus1 <= 0) && m_tmctsSEIEnabled)
+  if( m_sphereRotationSEIEnabled && !m_sphereRotationSEICancelFlag )
   {
-    msg( WARNING, "Warning: SEITempMotionConstrainedTileSets is set to false to disable temporal motion-constrained tile sets SEI message because there are no tiles enabled.\n");
-    m_tmctsSEIEnabled = false;
+    xConfirmPara( m_sphereRotationSEIYaw  < -(180<<16) || m_sphereRotationSEIYaw > (180<<16)-1, "SEISphereRotationYaw must be in the range of -11 796 480 to 11 796 479");
+    xConfirmPara( m_sphereRotationSEIPitch < -(90<<16) || m_sphereRotationSEIYaw > (90<<16),    "SEISphereRotationPitch must be in the range of -5 898 240 to 5 898 240");
+    xConfirmPara( m_sphereRotationSEIRoll < -(180<<16) || m_sphereRotationSEIYaw > (180<<16)-1, "SEISphereRotationRoll must be in the range of -11 796 480 to 11 796 479");
   }
-#endif
 
-  if(m_timeCodeSEIEnabled)
+  if ( m_omniViewportSEIEnabled && !m_omniViewportSEICancelFlag )
   {
-    xConfirmPara(m_timeCodeSEINumTs > MAX_TIMECODE_SEI_SETS, "Number of time sets cannot exceed 3");
+    xConfirmPara( m_omniViewportSEIId < 0 || m_omniViewportSEIId > 1023, "SEIomniViewportId must be in the range of 0 to 1023");
+    xConfirmPara( m_omniViewportSEICntMinus1 < 0 || m_omniViewportSEICntMinus1 > 15, "SEIomniViewportCntMinus1 must be in the range of 0 to 15");
+    for ( uint32_t i=0; i<=m_omniViewportSEICntMinus1; i++ )
+    {
+      xConfirmPara( m_omniViewportSEIAzimuthCentre[i] < -(180<<16)  || m_omniViewportSEIAzimuthCentre[i] > (180<<16)-1, "SEIOmniViewportAzimuthCentre must be in the range of -11 796 480 to 11 796 479");
+      xConfirmPara( m_omniViewportSEIElevationCentre[i] < -(90<<16) || m_omniViewportSEIElevationCentre[i] > (90<<16),  "SEIOmniViewportSEIElevationCentre must be in the range of -5 898 240 to 5 898 240");
+      xConfirmPara( m_omniViewportSEITiltCentre[i] < -(180<<16)     || m_omniViewportSEITiltCentre[i] > (180<<16)-1,    "SEIOmniViewportTiltCentre must be in the range of -11 796 480 to 11 796 479");
+      xConfirmPara( m_omniViewportSEIHorRange[i] < 1 || m_omniViewportSEIHorRange[i] > (360<<16), "SEIOmniViewportHorRange must be in the range of 1 to 360*2^16");
+      xConfirmPara( m_omniViewportSEIVerRange[i] < 1 || m_omniViewportSEIVerRange[i] > (180<<16), "SEIOmniViewportVerRange must be in the range of 1 to 180*2^16");
+    }
+  }
+
+  if (m_gcmpSEIEnabled && !m_gcmpSEICancelFlag)
+  {
+    xConfirmPara( m_gcmpSEIMappingFunctionType < 0 || m_gcmpSEIMappingFunctionType > 2, "SEIGcmpMappingFunctionType must be in the range of 0 to 2");
+    int numFace = m_gcmpSEIPackingType == 4 || m_gcmpSEIPackingType == 5 ? 5 : 6;
+    for ( int i = 0; i < numFace; i++ )
+    {
+      xConfirmPara( m_gcmpSEIFaceIndex[i] < 0 || m_gcmpSEIFaceIndex[i] > 5,       "SEIGcmpFaceIndex must be in the range of 0 to 5");
+      xConfirmPara( m_gcmpSEIFaceRotation[i] < 0 || m_gcmpSEIFaceRotation[i] > 3, "SEIGcmpFaceRotation must be in the range of 0 to 3");
+      if (m_gcmpSEIMappingFunctionType == 2)
+      {
+        xConfirmPara( m_gcmpSEIFunctionCoeffU[i] <= 0.0 || m_gcmpSEIFunctionCoeffU[i] > 1.0, "SEIGcmpFunctionCoeffU must be in the range (0, 1]");
+        xConfirmPara( m_gcmpSEIFunctionCoeffV[i] <= 0.0 || m_gcmpSEIFunctionCoeffV[i] > 1.0, "SEIGcmpFunctionCoeffV must be in the range (0, 1]");
+      }
+    }
+    if (m_gcmpSEIGuardBandFlag)
+    {
+      xConfirmPara( m_gcmpSEIGuardBandSamplesMinus1 < 0 || m_gcmpSEIGuardBandSamplesMinus1 > 15, "SEIGcmpGuardBandSamplesMinus1 must be in the range of 0 to 15");
+    }
   }
 
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
@@ -2898,10 +3413,16 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_decodeBitstreams[0] == m_bitstreamFileName, "Debug bitstream and the output bitstream cannot be equal.\n" );
   xConfirmPara( m_decodeBitstreams[1] == m_bitstreamFileName, "Decode2 bitstream and the output bitstream cannot be equal.\n" );
   xConfirmPara(unsigned(m_LMChroma) > 1, "LMMode exceeds range (0 to 1)");
+  if (m_gopBasedTemporalFilterEnabled)
+  {
+    xConfirmPara(m_temporalSubsampleRatio != 1, "GOP Based Temporal Filter only support Temporal sub-sample ratio 1");
+  }
 #if EXTENSION_360_VIDEO
   check_failed |= m_ext360.verifyParameters();
 #endif
 
+  xConfirmPara(m_useBDPCM < 0 || m_useBDPCM > 2, "BDPCM must be in range 0..2");
+
 #undef xConfirmPara
   return check_failed;
 }
@@ -2948,43 +3469,39 @@ void EncAppCfg::xPrintParameter()
     msg( DETAILS, "Frame/Field                            : Frame based coding\n" );
     msg( DETAILS, "Frame index                            : %u - %d (%d frames)\n", m_FrameSkip, m_FrameSkip + m_framesToBeEncoded - 1, m_framesToBeEncoded );
   }
-  if (m_profile == Profile::MAINREXT)
   {
-    ExtendedProfileName validProfileName;
-    if (m_onePictureOnlyConstraintFlag)
-    {
-      validProfileName = m_bitDepthConstraint == 8 ? MAIN_444_STILL_PICTURE : (m_bitDepthConstraint == 16 ? MAIN_444_16_STILL_PICTURE : NONE);
-    }
-    else
-    {
-      const uint32_t intraIdx = m_intraConstraintFlag ? 1:0;
-      const uint32_t bitDepthIdx = (m_bitDepthConstraint == 8 ? 0 : (m_bitDepthConstraint ==10 ? 1 : (m_bitDepthConstraint == 12 ? 2 : (m_bitDepthConstraint == 16 ? 3 : 4 ))));
-      const uint32_t chromaFormatIdx = uint32_t(m_chromaFormatConstraint);
-      validProfileName = (bitDepthIdx > 3 || chromaFormatIdx>3) ? NONE : validRExtProfileNames[intraIdx][bitDepthIdx][chromaFormatIdx];
-    }
-    std::string rextSubProfile;
-    if (validProfileName!=NONE)
-    {
-      rextSubProfile=enumToString(strToExtendedProfile, sizeof(strToExtendedProfile)/sizeof(*strToExtendedProfile), validProfileName);
-    }
-    if (rextSubProfile == "main_444_16")
+    msg( DETAILS, "Profile                                : %s\n", profileToString(m_profile) );
+  }
+  msg( DETAILS, "CU size / depth / total-depth          : %d / %d / %d\n", m_uiMaxCUWidth, m_uiMaxCUDepth, m_uiMaxCodingDepth );
+  msg(DETAILS, "subpicture present flag                            : %d\n", m_subPicPresentFlag);
+  if (m_subPicPresentFlag) 
+  {
+    msg(DETAILS, "number of subpictures                            : %d\n", m_numSubPics);
+    for (int i = 0; i < m_numSubPics; i++) 
     {
-      rextSubProfile="main_444_16 [NON STANDARD]";
+      msg(DETAILS, "[%d]th subpictures location                           :[%d %d]\n", i, m_subPicCtuTopLeftX[i], m_subPicCtuTopLeftY[i]);
+      msg(DETAILS, "[%d]th subpictures size                           :[%d %d]\n", i, m_subPicWidth[i], m_subPicHeight[i]);
+      msg(DETAILS, "[%d]th subpictures treated as picture flag                           :%d\n", i, m_subPicTreatedAsPicFlag[i]);
+      msg(DETAILS, "loop filter cross [%d]th subpictures enabled flag                           :%d\n", i, m_loopFilterAcrossSubpicEnabledFlag[i]);
+
     }
-    msg( DETAILS, "Profile                                : %s (%s)\n", profileToString(m_profile), (rextSubProfile.empty())?"INVALID REXT PROFILE":rextSubProfile.c_str() );
   }
-  else
+  msg(DETAILS, "subpicture ID present flag                            : %d\n", m_subPicIdPresentFlag);
+  if (m_subPicIdPresentFlag) 
   {
-    msg( DETAILS, "Profile                                : %s\n", profileToString(m_profile) );
+    msg(DETAILS, "subpicture ID signalling present flag                            : %d\n", m_subPicIdSignallingPresentFlag);
+    for (int i = 0; i < m_numSubPics; i++) 
+    {
+      msg(DETAILS, "[%d]th subpictures ID length                           :%d\n", i, m_subPicIdLen);
+      msg(DETAILS, "[%d]th subpictures ID                          :%d\n", i, m_subPicId[i]);
+
+    }
   }
-  msg( DETAILS, "CU size / depth / total-depth          : %d / %d / %d\n", m_uiMaxCUWidth, m_uiMaxCUDepth, m_uiMaxCodingDepth );
-#if MAX_TB_SIZE_SIGNALLING
   msg( DETAILS, "Max TB size                            : %d \n", 1 << m_log2MaxTbSize );
-#endif
-  msg( DETAILS, "Min PCM size                           : %d\n", 1 << m_uiPCMLog2MinSize);
   msg( DETAILS, "Motion search range                    : %d\n", m_iSearchRange );
   msg( DETAILS, "Intra period                           : %d\n", m_iIntraPeriod );
   msg( DETAILS, "Decoding refresh type                  : %d\n", m_iDecodingRefreshType );
+  msg( DETAILS, "DRAP period                            : %d\n", m_drapPeriod );
 #if QP_SWITCHING_FOR_PARALLEL
   if (m_qpIncrementAtSourceFrame.bPresent)
   {
@@ -3006,8 +3523,6 @@ void EncAppCfg::xPrintParameter()
   msg( DETAILS, "Input bit depth                        : (Y:%d, C:%d)\n", m_inputBitDepth[CHANNEL_TYPE_LUMA], m_inputBitDepth[CHANNEL_TYPE_CHROMA] );
   msg( DETAILS, "MSB-extended bit depth                 : (Y:%d, C:%d)\n", m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA], m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] );
   msg( DETAILS, "Internal bit depth                     : (Y:%d, C:%d)\n", m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA] );
-  msg( DETAILS, "PCM sample bit depth                   : (Y:%d, C:%d)\n", m_bPCMInputBitDepthFlag ? m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA] : m_internalBitDepth[CHANNEL_TYPE_LUMA],
-                                                                    m_bPCMInputBitDepthFlag ? m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] : m_internalBitDepth[CHANNEL_TYPE_CHROMA] );
   msg( DETAILS, "Intra reference smoothing              : %s\n", (m_enableIntraReferenceSmoothing           ? "Enabled" : "Disabled") );
   msg( DETAILS, "cu_chroma_qp_offset_subdiv             : %d\n", m_cuChromaQpOffsetSubdiv);
   msg( DETAILS, "extended_precision_processing_flag     : %s\n", (m_extendedPrecisionProcessingFlag         ? "Enabled" : "Disabled") );
@@ -3035,7 +3550,7 @@ void EncAppCfg::xPrintParameter()
   }
 
   msg( DETAILS, "RateControl                            : %d\n", m_RCEnableRateControl );
-  msg( DETAILS, "WPMethod                               : %d\n", int(m_weightedPredictionMethod));
+  msg( DETAILS, "WeightedPredMethod                     : %d\n", int(m_weightedPredictionMethod));
 
   if(m_RCEnableRateControl)
   {
@@ -3057,6 +3572,8 @@ void EncAppCfg::xPrintParameter()
 
   msg( DETAILS, "Max Num Merge Candidates               : %d\n", m_maxNumMergeCand );
   msg( DETAILS, "Max Num Affine Merge Candidates        : %d\n", m_maxNumAffineMergeCand );
+  msg( DETAILS, "Max Num Triangle Merge Candidates      : %d\n", m_maxNumTriangleCand );
+  msg( DETAILS, "Max Num IBC Merge Candidates           : %d\n", m_maxNumIBCMergeCand );
   msg( DETAILS, "\n");
 
   msg( VERBOSE, "TOOL CFG: ");
@@ -3080,85 +3597,67 @@ void EncAppCfg::xPrintParameter()
   msg( VERBOSE, "TransformSkip:%d ",     m_useTransformSkip     );
   msg( VERBOSE, "TransformSkipFast:%d ", m_useTransformSkipFast );
   msg( VERBOSE, "TransformSkipLog2MaxSize:%d ", m_log2MaxTransformSkipBlockSize);
-  msg( VERBOSE, "Slice: M=%d ", int(m_sliceMode));
-  if (m_sliceMode!=NO_SLICES)
-  {
-    msg( VERBOSE, "A=%d ", m_sliceArgument);
-  }
-#if HEVC_DEPENDENT_SLICES
-  msg( VERBOSE, "SliceSegment: M=%d ",m_sliceSegmentMode);
-  if (m_sliceSegmentMode!=NO_SLICES)
-  {
-    msg( VERBOSE, "A=%d ", m_sliceSegmentArgument);
-  }
-#endif
-  msg( VERBOSE, "Tiles:%dx%d ", m_numTileColumnsMinus1 + 1, m_numTileRowsMinus1 + 1 );
+  msg(VERBOSE, "ChromaTS:%d ", m_useChromaTS);
+  msg( VERBOSE, "BDPCM:%d ", m_useBDPCM                         );
+  msg( VERBOSE, "Tiles: %dx%d ", m_numTileCols, m_numTileRows );
+  msg( VERBOSE, "Slices: %d ", m_numSlicesInPic);
   msg( VERBOSE, "MCTS:%d ", m_MCTSEncConstraint );
-  msg( VERBOSE, "CIP:%d ", m_bUseConstrainedIntraPred);
   msg( VERBOSE, "SAO:%d ", (m_bUseSAO)?(1):(0));
   msg( VERBOSE, "ALF:%d ", m_alf ? 1 : 0 );
-  msg( VERBOSE, "PCM:%d ", (m_usePCM && (1<<m_uiPCMLog2MinSize) <= m_uiMaxCUWidth)? 1 : 0);
-
-  if (m_TransquantBypassEnabledFlag && m_CUTransquantBypassFlagForce)
-  {
-    msg( VERBOSE, "TransQuantBypassEnabled: =1");
-  }
-  else
-  {
-    msg( VERBOSE, "TransQuantBypassEnabled:%d ", (m_TransquantBypassEnabledFlag)? 1:0 );
-  }
 
   msg( VERBOSE, "WPP:%d ", (int)m_useWeightedPred);
   msg( VERBOSE, "WPB:%d ", (int)m_useWeightedBiPred);
-  msg( VERBOSE, "PME:%d ", m_log2ParallelMergeLevel);
-#if HEVC_TILES_WPP
   const int iWaveFrontSubstreams = m_entropyCodingSyncEnabledFlag ? (m_iSourceHeight + m_uiMaxCUHeight - 1) / m_uiMaxCUHeight : 1;
   msg( VERBOSE, " WaveFrontSynchro:%d WaveFrontSubstreams:%d", m_entropyCodingSyncEnabledFlag?1:0, iWaveFrontSubstreams);
-#endif
-#if HEVC_USE_SCALING_LISTS
   msg( VERBOSE, " ScalingList:%d ", m_useScalingListId );
-#endif
-  msg( VERBOSE, "TMVPMode:%d ", m_TMVPModeId     );
-
+  msg( VERBOSE, "TMVPMode:%d ", m_TMVPModeId );
   msg( VERBOSE, " DQ:%d ", m_depQuantEnabledFlag);
-#if HEVC_USE_SIGN_HIDING
   msg( VERBOSE, " SignBitHidingFlag:%d ", m_signDataHidingEnabledFlag);
-#endif
   msg( VERBOSE, "RecalQP:%d ", m_recalculateQPAccordingToLambda ? 1 : 0 );
 
-  if( m_profile == Profile::NEXT )
   {
-    msg( VERBOSE, "\nNEXT TOOL CFG: " );
+    msg( VERBOSE, "\nTOOL CFG: " );
+    msg( VERBOSE, "LFNST:%d ", m_LFNST );
+    msg( VERBOSE, "MMVD:%d ", m_MMVD);
     msg( VERBOSE, "Affine:%d ", m_Affine );
     if ( m_Affine )
     {
       msg( VERBOSE, "AffineType:%d ", m_AffineType );
     }
+    msg(VERBOSE, "PROF:%d ", m_PROF);
     msg(VERBOSE, "SubPuMvp:%d+%d ", m_SubPuMvpMode & 1, (m_SubPuMvpMode & 2) == 2);
     msg( VERBOSE, "DualITree:%d ", m_dualTree );
     msg( VERBOSE, "IMV:%d ", m_ImvMode );
     msg( VERBOSE, "BIO:%d ", m_BIO );
     msg( VERBOSE, "LMChroma:%d ", m_LMChroma );
-    if( m_LMChroma && m_chromaFormatIDC == CHROMA_420 )
-    {
-      msg( VERBOSE, "CclmCollocatedChroma:%d ", m_cclmCollocatedChromaFlag );
-    }
+    msg( VERBOSE, "HorCollocatedChroma:%d ", m_horCollocatedChromaFlag );
+    msg( VERBOSE, "VerCollocatedChroma:%d ", m_verCollocatedChromaFlag );
     msg( VERBOSE, "MTS: %1d(intra) %1d(inter) ", m_MTS & 1, ( m_MTS >> 1 ) & 1 );
     msg( VERBOSE, "SBT:%d ", m_SBT );
+    msg( VERBOSE, "ISP:%d ", m_ISP );
+    msg( VERBOSE, "SMVD:%d ", m_SMVD );
     msg( VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled);
-    msg( VERBOSE, "GBi:%d ", m_GBi );
-    msg( VERBOSE, "GBiFast:%d ", m_GBiFast );
+    msg( VERBOSE, "Bcw:%d ", m_bcw );
+    msg( VERBOSE, "BcwFast:%d ", m_BcwFast );
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
     msg( VERBOSE, "LADF:%d ", m_LadfEnabed );
 #endif
-    msg(VERBOSE, "MHIntra:%d ", m_MHIntra);
+    msg(VERBOSE, "CIIP:%d ", m_ciip);
     msg( VERBOSE, "Triangle:%d ", m_Triangle );
-    msg( VERBOSE, "AllowDisFracMMVD:%d ", m_allowDisFracMMVD );
+    m_allowDisFracMMVD = m_MMVD ? m_allowDisFracMMVD : false;
+    if ( m_MMVD )
+      msg(VERBOSE, "AllowDisFracMMVD:%d ", m_allowDisFracMMVD);
     msg( VERBOSE, "AffineAmvr:%d ", m_AffineAmvr );
     m_AffineAmvrEncOpt = m_AffineAmvr ? m_AffineAmvrEncOpt : false;
     msg( VERBOSE, "AffineAmvrEncOpt:%d ", m_AffineAmvrEncOpt );
     msg(VERBOSE, "DMVR:%d ", m_DMVR);
+    msg(VERBOSE, "MmvdDisNum:%d ", m_MmvdDisNum);
+    msg(VERBOSE, "JointCbCr:%d ", m_JointCbCrMode);
   }
+  m_useColorTrans = (m_chromaFormatIDC == CHROMA_444 && m_costMode != COST_LOSSLESS_CODING) ? m_useColorTrans : 0u;
+  msg(VERBOSE, "ACT:%d ", m_useColorTrans);
+    m_PLTMode = ( m_chromaFormatIDC == CHROMA_444) ? m_PLTMode : 0u;
+    msg(VERBOSE, "PLT:%d ", m_PLTMode);
     msg(VERBOSE, "IBC:%d ", m_IBCMode);
   msg( VERBOSE, "HashME:%d ", m_HashME );
   msg( VERBOSE, "WrapAround:%d ", m_wrapAround);
@@ -3167,12 +3666,32 @@ void EncAppCfg::xPrintParameter()
     msg( VERBOSE, "WrapAroundOffset:%d ", m_wrapAroundOffset );
   }
   // ADD_NEW_TOOL (add some output indicating the usage of tools)
-    msg(VERBOSE, "Reshape:%d ", m_lumaReshapeEnable);
-    if (m_lumaReshapeEnable)
+  msg(VERBOSE, "LoopFilterAcrossVirtualBoundaries:%d ", m_loopFilterAcrossVirtualBoundariesDisabledFlag);
+  if ( m_loopFilterAcrossVirtualBoundariesDisabledFlag )
+  {
+    msg(VERBOSE, "vertical virtual boundaries:[");
+    for (unsigned i = 0; i < m_numVerVirtualBoundaries; i++)
     {
-      msg(VERBOSE, "(Sigal:%s ", m_reshapeSignalType==0? "SDR" : "HDR-PQ");
+      msg(VERBOSE, " %d", m_virtualBoundariesPosX[i]);
+    }
+    msg(VERBOSE, " ] horizontal virtual boundaries:[");
+    for (unsigned i = 0; i < m_numHorVirtualBoundaries; i++)
+    {
+      msg(VERBOSE, " %d", m_virtualBoundariesPosY[i]);
+    }
+    msg(VERBOSE, " ] ");
+  }
+    msg(VERBOSE, "Reshape:%d ", m_lmcsEnabled);
+    if (m_lmcsEnabled)
+    {
+      msg(VERBOSE, "(Signal:%s ", m_reshapeSignalType == 0 ? "SDR" : (m_reshapeSignalType == 2 ? "HDR-HLG" : "HDR-PQ"));
+      msg(VERBOSE, "Opt:%d", m_adpOption);
+      if (m_adpOption > 0) { msg(VERBOSE, " CW:%d", m_initialCW); }
+      msg(VERBOSE, " CSoffset:%d", m_CSoffset);
       msg(VERBOSE, ") ");
     }
+    msg(VERBOSE, "MRL:%d ", m_MRL);
+    msg(VERBOSE, "MIP:%d ", m_MIP);
     msg(VERBOSE, "EncDbOpt:%d ", m_encDbOpt);
   msg( VERBOSE, "\nFAST TOOL CFG: " );
   msg( VERBOSE, "LCTUFast:%d ", m_useFastLCTU );
@@ -3180,10 +3699,16 @@ void EncAppCfg::xPrintParameter()
   msg( VERBOSE, "PBIntraFast:%d ", m_usePbIntraFast );
   if( m_ImvMode ) msg( VERBOSE, "IMV4PelFast:%d ", m_Imv4PelFast );
   if( m_MTS ) msg( VERBOSE, "MTSMaxCand: %1d(intra) %1d(inter) ", m_MTSIntraMaxCand, m_MTSInterMaxCand );
-  msg( VERBOSE, "ISPFast:%d ", m_useFastISP );
+  if( m_ISP ) msg( VERBOSE, "ISPFast:%d ", m_useFastISP );
+  if( m_LFNST ) msg( VERBOSE, "FastLFNST:%d ", m_useFastLFNST );
   msg( VERBOSE, "AMaxBT:%d ", m_useAMaxBT );
   msg( VERBOSE, "E0023FastEnc:%d ", m_e0023FastEnc );
   msg( VERBOSE, "ContentBasedFastQtbt:%d ", m_contentBasedFastQtbt );
+  msg( VERBOSE, "UseNonLinearAlfLuma:%d ", m_useNonLinearAlfLuma );
+  msg( VERBOSE, "UseNonLinearAlfChroma:%d ", m_useNonLinearAlfChroma );
+  msg( VERBOSE, "MaxNumAlfAlternativesChroma:%d ", m_maxNumAlfAlternativesChroma );
+  if( m_MIP ) msg(VERBOSE, "FastMIP:%d ", m_useFastMIP);
+  msg( VERBOSE, "FastLocalDualTree:%d ", m_fastLocalDualTreeMode );
 
   msg( VERBOSE, "NumSplitThreads:%d ", m_numSplitThreads );
   if( m_numSplitThreads > 1 )
@@ -3193,6 +3718,15 @@ void EncAppCfg::xPrintParameter()
   msg( VERBOSE, "NumWppThreads:%d+%d ", m_numWppThreads, m_numWppExtraLines );
   msg( VERBOSE, "EnsureWppBitEqual:%d ", m_ensureWppBitEqual );
 
+  if( m_rprEnabled )
+  {
+    msg( VERBOSE, "RPR:(%1.2lfx, %1.2lfx)|%d ", m_scalingRatioHor, m_scalingRatioVer, m_switchPocPeriod );
+  }
+  else
+  {
+    msg( VERBOSE, "RPR:%d ", 0 );
+  }
+  msg(VERBOSE, "TemporalFilter:%d ", m_gopBasedTemporalFilterEnabled);
 #if EXTENSION_360_VIDEO
   m_ext360.outputConfigurationSummary();
 #endif
@@ -3204,6 +3738,41 @@ void EncAppCfg::xPrintParameter()
   fflush( stdout );
 }
 
+bool EncAppCfg::xHasNonZeroTemporalID ()
+{
+  for (unsigned int i = 0; i < m_iGOPSize; i++)
+  {
+    if ( m_GOPList[i].m_temporalId != 0 )
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool EncAppCfg::xHasLeadingPicture ()
+{
+  for (unsigned int i = 0; i < m_iGOPSize; i++)
+  {
+    for ( unsigned int j = 0; j < m_GOPList[i].m_numRefPics0; j++)
+    {
+      if ( m_GOPList[i].m_deltaRefPics0[j] < 0 )
+      {
+        return true;
+      }
+    }
+    for ( unsigned int j = 0; j < m_GOPList[i].m_numRefPics1; j++)
+    {
+      if ( m_GOPList[i].m_deltaRefPics1[j] < 0 )
+      {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
 bool confirmPara(bool bflag, const char* message)
 {
   if (!bflag)
@@ -3215,4 +3784,6 @@ bool confirmPara(bool bflag, const char* message)
   return true;
 }
 
+
+
 //! \}
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 1debceb530f63da624cf1cbce902bb79306ef40b..4ce37e402f737e4f6cecb69d29b34f4c1b5b426d 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,10 +40,22 @@
 
 #include "CommonLib/CommonDef.h"
 
+#include <map>
+template <class T1, class T2>
+static inline std::istream& operator >> (std::istream &in, std::map<T1, T2> &map);
+
+#include "Utilities/program_options_lite.h"
+
 #include "EncoderLib/EncCfg.h"
 #if EXTENSION_360_VIDEO
 #include "AppEncHelper360/TExt360AppEncCfg.h"
 #endif
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+#include "HDRLib/inc/DistortionMetric.H"
+#endif
+namespace po = df::program_options_lite;
+
 #include <sstream>
 #include <vector>
 //! \ingroup EncoderApp
@@ -122,35 +134,50 @@ protected:
   uint32_t  m_maxChromaFormatConstraintIdc;
   bool      m_bFrameConstraintFlag;
   bool      m_bNoQtbttDualTreeIntraConstraintFlag;
+  bool      m_noPartitionConstraintsOverrideConstraintFlag;
   bool      m_bNoSaoConstraintFlag;
   bool      m_bNoAlfConstraintFlag;
-  bool      m_bNoPcmConstraintFlag;
   bool      m_bNoRefWraparoundConstraintFlag;
   bool      m_bNoTemporalMvpConstraintFlag;
   bool      m_bNoSbtmvpConstraintFlag;
   bool      m_bNoAmvrConstraintFlag;
   bool      m_bNoBdofConstraintFlag;
+  bool      m_noDmvrConstraintFlag;
   bool      m_bNoCclmConstraintFlag;
   bool      m_bNoMtsConstraintFlag;
+  bool      m_noSbtConstraintFlag;
   bool      m_bNoAffineMotionConstraintFlag;
-  bool      m_bNoGbiConstraintFlag;
-  bool      m_bNoMhIntraConstraintFlag;
+  bool      m_bNoBcwConstraintFlag;
+  bool      m_noIbcConstraintFlag;
+  bool      m_bNoCiipConstraintFlag;
+  bool      m_noFPelMmvdConstraintFlag;
   bool      m_bNoTriangleConstraintFlag;
   bool      m_bNoLadfConstraintFlag;
-  bool      m_bNoCurrPicRefConstraintFlag;
+  bool      m_noTransformSkipConstraintFlag;
+  bool      m_noBDPCMConstraintFlag;
+  bool      m_noJointCbCrConstraintFlag;
   bool      m_bNoQpDeltaConstraintFlag;
   bool      m_bNoDepQuantConstraintFlag;
   bool      m_bNoSignDataHidingConstraintFlag;
+  bool      m_noTrailConstraintFlag;
+  bool      m_noStsaConstraintFlag;
+  bool      m_noRaslConstraintFlag;
+  bool      m_noRadlConstraintFlag;
+  bool      m_noIdrConstraintFlag;
+  bool      m_noCraConstraintFlag;
+  bool      m_noGdrConstraintFlag;
+  bool      m_noApsConstraintFlag;
 
   // profile/level
   Profile::Name m_profile;
   Level::Tier   m_levelTier;
   Level::Name   m_level;
+  std::vector<uint32_t>  m_subProfile;
+  uint8_t      m_numSubProfile;
+
   uint32_t          m_bitDepthConstraint;
   ChromaFormat  m_chromaFormatConstraint;
   bool          m_intraConstraintFlag;
-  bool          m_onePictureOnlyConstraintFlag;
-  bool          m_lowerBitRateConstraintFlag;
   bool          m_progressiveSourceFlag;
   bool          m_interlacedSourceFlag;
   bool          m_nonPackedConstraintFlag;
@@ -160,10 +187,11 @@ protected:
   int       m_iIntraPeriod;                                   ///< period of I-slice (random access period)
   int       m_iDecodingRefreshType;                           ///< random access type
   int       m_iGOPSize;                                       ///< GOP size of hierarchical structure
-#if JCTVC_Y0038_PARAMS
+  int       m_drapPeriod;                                     ///< period of dependent RAP pictures
   bool      m_rewriteParamSets;                              ///< Flag to enable rewriting of parameter sets at random access points
-#endif
-  int       m_extraRPSs;                                      ///< extra RPSs added to handle CRA
+  RPLEntry  m_RPLList0[MAX_GOP];                               ///< the RPL entries from the config file
+  RPLEntry  m_RPLList1[MAX_GOP];                               ///< the RPL entries from the config file
+  bool      m_idrRefParamList;                                ///< indicates if reference picture list syntax elements are present in slice headers of IDR pictures
   GOPEntry  m_GOPList[MAX_GOP];                               ///< the coding structure entries from the config file
   int       m_numReorderPics[MAX_TLAYER];                     ///< total number of reorder pictures
   int       m_maxDecPicBuffering[MAX_TLAYER];                 ///< total number of pictures in the decoded picture buffer
@@ -172,12 +200,14 @@ protected:
   uint32_t      m_log2SaoOffsetScale[MAX_NUM_CHANNEL_TYPE];       ///< number of bits for the upward bit shift operation on the decoded SAO offsets
   bool      m_useTransformSkip;                               ///< flag for enabling intra transform skipping
   bool      m_useTransformSkipFast;                           ///< flag for enabling fast intra transform skipping
+  int       m_useBDPCM;
   uint32_t      m_log2MaxTransformSkipBlockSize;                  ///< transform-skip maximum size (minimum of 2)
   bool      m_transformSkipRotationEnabledFlag;               ///< control flag for transform-skip/transquant-bypass residual rotation
   bool      m_transformSkipContextEnabledFlag;                ///< control flag for transform-skip/transquant-bypass single significance map context
   bool      m_rdpcmEnabledFlag[NUMBER_OF_RDPCM_SIGNALLING_MODES];///< control flags for residual DPCM
   bool      m_persistentRiceAdaptationEnabledFlag;            ///< control flag for Golomb-Rice parameter adaptation over each slice
   bool      m_cabacBypassAlignmentEnabledFlag;
+  bool      m_ISP;
   bool      m_useFastISP;                                    ///< flag for enabling fast methods for ISP
 
   // coding quality
@@ -187,6 +217,8 @@ protected:
   double    m_fQP;                                            ///< QP value of key-picture (floating point)
 #endif
   int       m_iQP;                                            ///< QP value of key-picture (integer)
+  bool      m_useIdentityTableForNon420Chroma;
+  ChromaQpMappingTableParams m_chromaQpMappingTableParams;
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
   int       m_intraQPOffset;                                  ///< QP offset for intra slice (integer)
   bool      m_lambdaFromQPEnable;                             ///< enable flag for QP:lambda fix
@@ -203,6 +235,8 @@ protected:
   int       m_crQpOffset;                                     ///< Chroma Cr QP Offset (0:default)
   int       m_cbQpOffsetDualTree;                             ///< Chroma Cb QP Offset for dual tree (overwrite m_cbQpOffset for dual tree)
   int       m_crQpOffsetDualTree;                             ///< Chroma Cr QP Offset for dual tree (overwrite m_crQpOffset for dual tree)
+  int       m_cbCrQpOffset;                                   ///< QP Offset for joint Cb-Cr mode
+  int       m_cbCrQpOffsetDualTree;                           ///< QP Offset for joint Cb-Cr mode (overwrite m_cbCrQpOffset for dual tree)
 #if ER_CHROMA_QP_WCG_PPS
   WCGChromaQPControl m_wcgChromaQpControl;                    ///< Wide-colour-gamut chroma QP control.
 #endif
@@ -225,27 +259,44 @@ protected:
 
   // coding unit (CU) definition
   unsigned  m_uiCTUSize;
+  bool m_subPicPresentFlag;
+  unsigned m_numSubPics;
+  std::vector<uint32_t> m_subPicCtuTopLeftX;
+  std::vector<uint32_t> m_subPicCtuTopLeftY;
+  std::vector<uint32_t> m_subPicWidth;
+  std::vector<uint32_t> m_subPicHeight;
+  std::vector<uint32_t> m_subPicTreatedAsPicFlag;
+  std::vector<uint32_t> m_loopFilterAcrossSubpicEnabledFlag;
+  bool m_subPicIdPresentFlag;
+  bool m_subPicIdSignallingPresentFlag;
+  unsigned m_subPicIdLen;
+  std::vector<uint32_t> m_subPicId;
   bool      m_SplitConsOverrideEnabledFlag;
   unsigned  m_uiMinQT[3]; // 0: I slice luma; 1: P/B slice; 2: I slice chroma
-  unsigned  m_uiMaxBTDepth;
-  unsigned  m_uiMaxBTDepthI;
-  unsigned  m_uiMaxBTDepthIChroma;
+  unsigned  m_uiMaxMTTHierarchyDepth;
+  unsigned  m_uiMaxMTTHierarchyDepthI;
+  unsigned  m_uiMaxMTTHierarchyDepthIChroma;
   bool      m_dualTree;
+  bool      m_LFNST;
+  bool      m_useFastLFNST;
   int       m_SubPuMvpMode;
   bool      m_Affine;
   bool      m_AffineType;
+  bool      m_PROF;
   bool      m_BIO;
   int       m_LMChroma;
-  bool      m_cclmCollocatedChromaFlag;
+  bool      m_horCollocatedChromaFlag;
+  bool      m_verCollocatedChromaFlag;
   int       m_MTS;                                            ///< XZ: Multiple Transform Set
   int       m_MTSIntraMaxCand;                                ///< XZ: Number of additional candidates to test
   int       m_MTSInterMaxCand;                                ///< XZ: Number of additional candidates to test
   int       m_MTSImplicit;
   bool      m_SBT;                                            ///< Sub-Block Transform for inter blocks
-
+  int       m_SBTFast64WidthTh;
+  bool      m_SMVD;
   bool      m_compositeRefEnabled;
-  bool      m_GBi;
-  bool      m_GBiFast;
+  bool      m_bcw;
+  bool      m_BcwFast;
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   bool      m_LadfEnabed;
   int       m_LadfNumIntervals;
@@ -253,14 +304,20 @@ protected:
   int       m_LadfIntervalLowerBound[MAX_LADF_INTERVALS];
 #endif
 
-  bool      m_MHIntra;
+  bool      m_ciip;
   bool      m_Triangle;
   bool      m_HashME;
   bool      m_allowDisFracMMVD;
   bool      m_AffineAmvr;
   bool      m_AffineAmvrEncOpt;
   bool      m_DMVR;
-
+  bool      m_MMVD;
+  int       m_MmvdDisNum;
+  bool      m_rgbFormat;
+  bool      m_useColorTrans;
+  unsigned  m_PLTMode;
+  bool      m_JointCbCrMode;
+  bool      m_useChromaTS;
   unsigned  m_IBCMode;
   unsigned  m_IBCLocalSearchRangeX;
   unsigned  m_IBCLocalSearchRangeY;
@@ -273,10 +330,19 @@ protected:
   unsigned  m_wrapAroundOffset;
 
   // ADD_NEW_TOOL : (encoder app) add tool enabling flags and associated parameters here
-  bool      m_lumaReshapeEnable;
+  bool      m_loopFilterAcrossVirtualBoundariesDisabledFlag;
+  unsigned  m_numVerVirtualBoundaries;
+  unsigned  m_numHorVirtualBoundaries;
+  std::vector<unsigned> m_virtualBoundariesPosX;
+  std::vector<unsigned> m_virtualBoundariesPosY;
+  bool      m_lmcsEnabled;
   uint32_t  m_reshapeSignalType;
   uint32_t  m_intraCMD;
   ReshapeCW m_reshapeCW;
+  int       m_updateCtrl;
+  int       m_adpOption;
+  uint32_t  m_initialCW;
+  int       m_CSoffset;
   bool      m_encDbOpt;
   unsigned  m_uiMaxCUWidth;                                   ///< max. CU width in pixel
   unsigned  m_uiMaxCUHeight;                                  ///< max. CU height in pixel
@@ -290,6 +356,13 @@ protected:
   bool      m_useFastMrg;
   bool      m_e0023FastEnc;
   bool      m_contentBasedFastQtbt;
+  bool      m_useNonLinearAlfLuma;
+  bool      m_useNonLinearAlfChroma;
+  unsigned  m_maxNumAlfAlternativesChroma;
+  bool      m_MRL;
+  bool      m_MIP;
+  bool      m_useFastMIP;
+  int       m_fastLocalDualTreeMode;
 
 
   int       m_numSplitThreads;
@@ -298,9 +371,7 @@ protected:
   int       m_numWppExtraLines;
   bool      m_ensureWppBitEqual;
 
-#if MAX_TB_SIZE_SIGNALLING
   int       m_log2MaxTbSize;
-#endif
   // coding tools (bit-depth)
   int       m_inputBitDepth   [MAX_NUM_CHANNEL_TYPE];         ///< bit-depth of input file
   int       m_outputBitDepth  [MAX_NUM_CHANNEL_TYPE];         ///< bit-depth of output file
@@ -312,8 +383,6 @@ protected:
   //coding tools (chroma format)
   ChromaFormat m_chromaFormatIDC;
 
-  // coding tools (PCM bit-depth)
-  bool      m_bPCMInputBitDepthFlag;                          ///< 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth.
 
   // coding tool (SAO)
   bool      m_bUseSAO;
@@ -322,9 +391,7 @@ protected:
   double    m_saoEncodingRateChroma;                          ///< The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma.
   int       m_maxNumOffsetsPerPic;                            ///< SAO maximun number of offset per picture
   bool      m_saoCtuBoundary;                                 ///< SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   bool      m_saoGreedyMergeEnc;                              ///< SAO greedy merge encoding algorithm
-#endif
   // coding tools (loop filter)
   bool      m_bLoopFilterDisable;                             ///< flag for using deblocking filter
   bool      m_loopFilterOffsetInPPS;                         ///< offset for deblocking filter in 0 = slice header, 1 = PPS
@@ -335,11 +402,6 @@ protected:
 #else
   bool      m_DeblockingFilterMetric;                         ///< blockiness metric in encoder
 #endif
-  // coding tools (PCM)
-  bool      m_usePCM;                                         ///< flag for using IPCM
-  uint32_t      m_pcmLog2MaxSize;                                 ///< log2 of maximum PCM block size
-  uint32_t      m_uiPCMLog2MinSize;                               ///< log2 of minimum PCM block size
-  bool      m_bPCMFilterDisableFlag;                          ///< PCM filter disable flag
   bool      m_enableIntraReferenceSmoothing;                  ///< flag for enabling(default)/disabling intra reference smoothing/filtering
 
   // coding tools (encoder-only parameters)
@@ -364,99 +426,153 @@ protected:
   bool      m_useFastDecisionForMerge;                        ///< flag for using Fast Decision Merge RD-Cost
   bool      m_bUseCbfFastMode;                                ///< flag for using Cbf Fast PU Mode Decision
   bool      m_useEarlySkipDetection;                          ///< flag for using Early SKIP Detection
-  SliceConstraint m_sliceMode;
-  int             m_sliceArgument;                            ///< argument according to selected slice mode
-#if HEVC_DEPENDENT_SLICES
-  SliceConstraint m_sliceSegmentMode;
-  int             m_sliceSegmentArgument;                     ///< argument according to selected slice segment mode
-#endif
-
-  bool      m_bLFCrossSliceBoundaryFlag;  ///< 1: filter across slice boundaries 0: do not filter across slice boundaries
-#if HEVC_TILES_WPP
-  bool      m_bLFCrossTileBoundaryFlag;   ///< 1: filter across tile boundaries  0: do not filter across tile boundaries
-  bool      m_tileUniformSpacingFlag;
-  int       m_numTileColumnsMinus1;
-  int       m_numTileRowsMinus1;
-  std::vector<int> m_tileColumnWidth;
-  std::vector<int> m_tileRowHeight;
+  bool      m_picPartitionFlag;                               ///< enable picture partitioning (0: single tile, single slice, 1: multiple tiles/slices can be used)
+  std::vector<uint32_t> m_tileColumnWidth;                    ///< tile column widths in units of CTUs (last column width will be repeated uniformly to cover any remaining picture width)
+  std::vector<uint32_t> m_tileRowHeight;                      ///< tile row heights in units of CTUs (last row height will be repeated uniformly to cover any remaining picture height)
+  bool      m_rasterSliceFlag;                                ///< indicates if using raster-scan or rectangular slices (0: rectangular, 1: raster-scan)
+  std::vector<uint32_t> m_rectSlicePos;                       ///< rectangular slice positions (pairs of top-left CTU address followed by bottom-right CTU address)
+  int       m_rectSliceFixedWidth;                            ///< fixed rectangular slice width in units of tiles (0: disable this feature and use RectSlicePositions instead)
+  int       m_rectSliceFixedHeight;                           ///< fixed rectangular slice height in units of tiles (0: disable this feature and use RectSlicePositions instead)
+  std::vector<uint32_t> m_rasterSliceSize;                    ///< raster-scan slice sizes in units of tiles (last size will be repeated uniformly to cover any remaining tiles in the picture)
+  bool      m_disableLFCrossTileBoundaryFlag;                 ///< 0: filter across tile boundaries  1: do not filter across tile boundaries
+  bool      m_disableLFCrossSliceBoundaryFlag;                ///< 0: filter across slice boundaries 1: do not filter across slice boundaries
+  uint32_t  m_numSlicesInPic;                                 ///< derived number of rectangular slices in the picture (raster-scan slice specified at slice level)
+  bool      m_tileIdxDeltaPresentFlag;                        ///< derived tile index delta present flag
+  std::vector<RectSlice> m_rectSlices;                        ///< derived list of rectangular slice signalling parameters
+  uint32_t  m_numTileCols;                                    ///< derived number of tile columns
+  uint32_t  m_numTileRows;                                    ///< derived number of tile rows
+  bool      m_subPicPartitionFlag;
+  bool      m_singleSlicePerSubPicFlag;
   bool      m_entropyCodingSyncEnabledFlag;
-#endif
 
-  bool      m_bUseConstrainedIntraPred;                       ///< flag for using constrained intra prediction
+
   bool      m_bFastUDIUseMPMEnabled;
   bool      m_bFastMEForGenBLowDelayEnabled;
   bool      m_bUseBLambdaForNonKeyLowDelayPictures;
 
   HashType  m_decodedPictureHashSEIType;                      ///< Checksum mode for decoded picture hash SEI message
+#if HEVC_SEI
   bool      m_recoveryPointSEIEnabled;
+#endif
   bool      m_bufferingPeriodSEIEnabled;
   bool      m_pictureTimingSEIEnabled;
-  bool      m_toneMappingInfoSEIEnabled;
-  bool      m_chromaResamplingFilterSEIenabled;
-  int       m_chromaResamplingHorFilterIdc;
-  int       m_chromaResamplingVerFilterIdc;
-  int       m_toneMapId;
-  bool      m_toneMapCancelFlag;
-  bool      m_toneMapPersistenceFlag;
-  int       m_toneMapCodedDataBitDepth;
-  int       m_toneMapTargetBitDepth;
-  int       m_toneMapModelId;
-  int       m_toneMapMinValue;
-  int       m_toneMapMaxValue;
-  int       m_sigmoidMidpoint;
-  int       m_sigmoidWidth;
-  int       m_numPivots;
-  int       m_cameraIsoSpeedIdc;
-  int       m_cameraIsoSpeedValue;
-  int       m_exposureIndexIdc;
-  int       m_exposureIndexValue;
-  bool      m_exposureCompensationValueSignFlag;
-  int       m_exposureCompensationValueNumerator;
-  int       m_exposureCompensationValueDenomIdc;
-  int       m_refScreenLuminanceWhite;
-  int       m_extendedRangeWhiteLevel;
-  int       m_nominalBlackLevelLumaCodeValue;
-  int       m_nominalWhiteLevelLumaCodeValue;
-  int       m_extendedWhiteLevelLumaCodeValue;
-  int*      m_startOfCodedInterval;
-  int*      m_codedPivotValue;
-  int*      m_targetPivotValue;
+  bool      m_bpDeltasGOPStructure;
+  bool      m_decodingUnitInfoSEIEnabled;
+  bool      m_frameFieldInfoSEIEnabled;
   bool      m_framePackingSEIEnabled;
   int       m_framePackingSEIType;
   int       m_framePackingSEIId;
   int       m_framePackingSEIQuincunx;
   int       m_framePackingSEIInterpretation;
-  bool      m_segmentedRectFramePackingSEIEnabled;
-  bool      m_segmentedRectFramePackingSEICancel;
-  int       m_segmentedRectFramePackingSEIType;
-  bool      m_segmentedRectFramePackingSEIPersistence;
-  int       m_displayOrientationSEIAngle;
-  bool      m_temporalLevel0IndexSEIEnabled;
-  bool      m_gradualDecodingRefreshInfoEnabled;
-  int       m_noDisplaySEITLayer;
-  bool      m_decodingUnitInfoSEIEnabled;
-  bool      m_SOPDescriptionSEIEnabled;
-  bool      m_scalableNestingSEIEnabled;
-  bool      m_tmctsSEIEnabled;
-  bool      m_timeCodeSEIEnabled;
-  int       m_timeCodeSEINumTs;
-  SEITimeSet m_timeSetArray[MAX_TIMECODE_SEI_SETS];
-  bool      m_kneeSEIEnabled;
-  int       m_kneeSEIId;
-  bool      m_kneeSEICancelFlag;
-  bool      m_kneeSEIPersistenceFlag;
-  int       m_kneeSEIInputDrange;
-  int       m_kneeSEIInputDispLuminance;
-  int       m_kneeSEIOutputDrange;
-  int       m_kneeSEIOutputDispLuminance;
-  int       m_kneeSEINumKneePointsMinus1;
-  int*      m_kneeSEIInputKneePoint;
-  int*      m_kneeSEIOutputKneePoint;
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   int       m_preferredTransferCharacteristics;
 #endif
-  uint32_t      m_greenMetadataType;
-  uint32_t      m_xsdMetricType;
+  // film grain characterstics sei
+  bool      m_fgcSEIEnabled;
+  bool      m_fgcSEICancelFlag;
+  bool      m_fgcSEIPersistenceFlag;
+  uint32_t  m_fgcSEIModelID;
+  bool      m_fgcSEISepColourDescPresentFlag;
+  uint32_t  m_fgcSEIBlendingModeID;
+  uint32_t  m_fgcSEILog2ScaleFactor;
+  bool      m_fgcSEICompModelPresent[MAX_NUM_COMPONENT];
+  // content light level SEI
+  bool      m_cllSEIEnabled;
+  uint32_t  m_cllSEIMaxContentLevel;
+  uint32_t  m_cllSEIMaxPicAvgLevel;
+  // ambient viewing environment sei
+  bool      m_aveSEIEnabled;
+  uint32_t  m_aveSEIAmbientIlluminance;
+  uint32_t  m_aveSEIAmbientLightX;
+  uint32_t  m_aveSEIAmbientLightY;
+  // content colour volume sei
+  bool      m_ccvSEIEnabled;
+  bool      m_ccvSEICancelFlag;
+  bool      m_ccvSEIPersistenceFlag;
+  bool      m_ccvSEIPrimariesPresentFlag;
+  bool      m_ccvSEIMinLuminanceValuePresentFlag;
+  bool      m_ccvSEIMaxLuminanceValuePresentFlag;
+  bool      m_ccvSEIAvgLuminanceValuePresentFlag;
+  double    m_ccvSEIPrimariesX[MAX_NUM_COMPONENT];
+  double    m_ccvSEIPrimariesY[MAX_NUM_COMPONENT];
+  double    m_ccvSEIMinLuminanceValue;
+  double    m_ccvSEIMaxLuminanceValue;
+  double    m_ccvSEIAvgLuminanceValue;
+
+  bool      m_erpSEIEnabled;
+  bool      m_erpSEICancelFlag;
+  bool      m_erpSEIPersistenceFlag;
+  bool      m_erpSEIGuardBandFlag;
+  uint32_t  m_erpSEIGuardBandType;
+  uint32_t  m_erpSEILeftGuardBandWidth;
+  uint32_t  m_erpSEIRightGuardBandWidth;
+
+  bool      m_sphereRotationSEIEnabled;
+  bool      m_sphereRotationSEICancelFlag;
+  bool      m_sphereRotationSEIPersistenceFlag;
+  int       m_sphereRotationSEIYaw;
+  int       m_sphereRotationSEIPitch;
+  int       m_sphereRotationSEIRoll;
+
+  bool      m_omniViewportSEIEnabled;
+  uint32_t  m_omniViewportSEIId;
+  bool      m_omniViewportSEICancelFlag;
+  bool      m_omniViewportSEIPersistenceFlag;
+  uint32_t  m_omniViewportSEICntMinus1;
+  std::vector<int>      m_omniViewportSEIAzimuthCentre;
+  std::vector<int>      m_omniViewportSEIElevationCentre;
+  std::vector<int>      m_omniViewportSEITiltCentre;
+  std::vector<uint32_t> m_omniViewportSEIHorRange;
+  std::vector<uint32_t> m_omniViewportSEIVerRange;
+  bool                  m_rwpSEIEnabled;
+  bool                  m_rwpSEIRwpCancelFlag;
+  bool                  m_rwpSEIRwpPersistenceFlag;
+  bool                  m_rwpSEIConstituentPictureMatchingFlag;
+  int                   m_rwpSEINumPackedRegions;
+  int                   m_rwpSEIProjPictureWidth;
+  int                   m_rwpSEIProjPictureHeight;
+  int                   m_rwpSEIPackedPictureWidth;
+  int                   m_rwpSEIPackedPictureHeight;
+  std::vector<uint8_t>  m_rwpSEIRwpTransformType;
+  std::vector<bool>     m_rwpSEIRwpGuardBandFlag;
+  std::vector<uint32_t> m_rwpSEIProjRegionWidth;
+  std::vector<uint32_t> m_rwpSEIProjRegionHeight;
+  std::vector<uint32_t> m_rwpSEIRwpSEIProjRegionTop;
+  std::vector<uint32_t> m_rwpSEIProjRegionLeft;
+  std::vector<uint16_t> m_rwpSEIPackedRegionWidth;
+  std::vector<uint16_t> m_rwpSEIPackedRegionHeight;
+  std::vector<uint16_t> m_rwpSEIPackedRegionTop;
+  std::vector<uint16_t> m_rwpSEIPackedRegionLeft;
+  std::vector<uint8_t>  m_rwpSEIRwpLeftGuardBandWidth;
+  std::vector<uint8_t>  m_rwpSEIRwpRightGuardBandWidth;
+  std::vector<uint8_t>  m_rwpSEIRwpTopGuardBandHeight;
+  std::vector<uint8_t>  m_rwpSEIRwpBottomGuardBandHeight;
+  std::vector<bool>     m_rwpSEIRwpGuardBandNotUsedForPredFlag;
+  std::vector<uint8_t>  m_rwpSEIRwpGuardBandType;
+
+  bool                 m_gcmpSEIEnabled;
+  bool                 m_gcmpSEICancelFlag;
+  bool                 m_gcmpSEIPersistenceFlag;
+  uint32_t             m_gcmpSEIPackingType;
+  uint32_t             m_gcmpSEIMappingFunctionType;
+  std::vector<uint8_t> m_gcmpSEIFaceIndex;
+  std::vector<uint8_t> m_gcmpSEIFaceRotation;
+  std::vector<double>  m_gcmpSEIFunctionCoeffU;
+  std::vector<bool>    m_gcmpSEIFunctionUAffectedByVFlag;
+  std::vector<double>  m_gcmpSEIFunctionCoeffV;
+  std::vector<bool>    m_gcmpSEIFunctionVAffectedByUFlag;
+  bool                 m_gcmpSEIGuardBandFlag;
+  bool                 m_gcmpSEIGuardBandBoundaryType;
+  uint32_t             m_gcmpSEIGuardBandSamplesMinus1;
+
+  bool m_subpicureLevelInfoSEIEnabled;
+
+  bool                  m_sampleAspectRatioInfoSEIEnabled;
+  bool                  m_sariCancelFlag;
+  bool                  m_sariPersistenceFlag;
+  int                   m_sariAspectRatioIdc;
+  int                   m_sariSarWidth;
+  int                   m_sariSarHeight;
 
   bool      m_MCTSEncConstraint;
 
@@ -465,15 +581,27 @@ protected:
   bool      m_useWeightedBiPred;                  ///< Use of bi-directional weighted prediction in B slices
   WeightedPredictionMethod m_weightedPredictionMethod;
 
-  uint32_t      m_log2ParallelMergeLevel;                         ///< Parallel merge estimation region
   uint32_t      m_maxNumMergeCand;                                ///< Max number of merge candidates
   uint32_t      m_maxNumAffineMergeCand;                          ///< Max number of affine merge candidates
+  uint32_t      m_maxNumTriangleCand;
+  uint32_t      m_maxNumIBCMergeCand;                             ///< Max number of IBC merge candidates
 
+  bool      m_sliceLevelRpl;                                      ///< code reference picture lists in slice headers rather than picture header
+  bool      m_sliceLevelDblk;                                     ///< code deblocking filter parameters in slice headers rather than picture header
+  bool      m_sliceLevelSao;                                      ///< code SAO parameters in slice headers rather than picture header
+  bool      m_sliceLevelAlf;                                      ///< code ALF parameters in slice headers rather than picture header
   int       m_TMVPModeId;
+  int       m_PPSorSliceMode;
+  bool      m_constantSliceHeaderParamsEnabledFlag;
+  int       m_PPSDepQuantEnabledIdc;
+  int       m_PPSRefPicListSPSIdc0;
+  int       m_PPSRefPicListSPSIdc1;
+  int       m_PPSMvdL1ZeroIdc;
+  int       m_PPSCollocatedFromL0Idc;
+  uint32_t  m_PPSSixMinusMaxNumMergeCandPlus1;
+  uint32_t  m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1;
   bool      m_depQuantEnabledFlag;
-#if HEVC_USE_SIGN_HIDING
   bool      m_signDataHidingEnabledFlag;
-#endif
   bool      m_RCEnableRateControl;                ///< enable rate control or not
   int       m_RCTargetBitrate;                    ///< target bitrate when rate control is enabled
   int       m_RCKeepHierarchicalBit;              ///< 0: equal bit allocation; 1: fixed ratio bit allocation; 2: adaptive ratio bit allocation
@@ -486,30 +614,23 @@ protected:
   uint32_t      m_RCCpbSize;                          ///< CPB size
   double    m_RCInitialCpbFullness;               ///< initial CPB fullness
 #endif
-#if HEVC_USE_SCALING_LISTS
   ScalingListMode m_useScalingListId;                         ///< using quantization matrix
   std::string m_scalingListFileName;                          ///< quantization matrix file name
-#endif
-  bool      m_TransquantBypassEnabledFlag;                    ///< transquant_bypass_enabled_flag setting in PPS.
-  bool      m_CUTransquantBypassFlagForce;                    ///< if transquant_bypass_enabled_flag, then, if true, all CU transquant bypass flags will be set to true.
+  bool      m_disableScalingMatrixForLfnstBlks;
   CostMode  m_costMode;                                       ///< Cost mode to use
 
   bool      m_recalculateQPAccordingToLambda;                 ///< recalculate QP value according to the lambda value
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  bool      m_useStrongIntraSmoothing;                        ///< enable strong intra smoothing for 32x32 blocks where the reference samples are flat
-#endif
+#if HEVC_SEI
   int       m_activeParameterSetsSEIEnabled;
+#endif
+  bool      m_decodingParameterSetEnabled;                   ///< enable decoding parameter set
 
+  bool      m_hrdParametersPresentFlag;                       ///< enable generation of HRD parameters
   bool      m_vuiParametersPresentFlag;                       ///< enable generation of VUI parameters
   bool      m_aspectRatioInfoPresentFlag;                     ///< Signals whether aspect_ratio_idc is present
   int       m_aspectRatioIdc;                                 ///< aspect_ratio_idc
   int       m_sarWidth;                                       ///< horizontal size of the sample aspect ratio
   int       m_sarHeight;                                      ///< vertical size of the sample aspect ratio
-  bool      m_overscanInfoPresentFlag;                        ///< Signals whether overscan_appropriate_flag is present
-  bool      m_overscanAppropriateFlag;                        ///< Indicates whether conformant decoded pictures are suitable for display using overscan
-  bool      m_videoSignalTypePresentFlag;                     ///< Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present
-  int       m_videoFormat;                                    ///< Indicates representation of pictures
-  bool      m_videoFullRangeFlag;                             ///< Indicates the black level and range of luma and chroma signals
   bool      m_colourDescriptionPresentFlag;                   ///< Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present
   int       m_colourPrimaries;                                ///< Indicates chromaticity coordinates of the source primaries
   int       m_transferCharacteristics;                        ///< Indicates the opto-electronic transfer characteristics of the source
@@ -517,28 +638,15 @@ protected:
   bool      m_chromaLocInfoPresentFlag;                       ///< Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present
   int       m_chromaSampleLocTypeTopField;                    ///< Specifies the location of chroma samples for top field
   int       m_chromaSampleLocTypeBottomField;                 ///< Specifies the location of chroma samples for bottom field
-  bool      m_neutralChromaIndicationFlag;                    ///< Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1)
-  bool      m_defaultDisplayWindowFlag;                       ///< Indicates the presence of the default window parameters
-  int       m_defDispWinLeftOffset;                           ///< Specifies the left offset from the conformance window of the default window
-  int       m_defDispWinRightOffset;                          ///< Specifies the right offset from the conformance window of the default window
-  int       m_defDispWinTopOffset;                            ///< Specifies the top offset from the conformance window of the default window
-  int       m_defDispWinBottomOffset;                         ///< Specifies the bottom offset from the conformance window of the default window
-  bool      m_frameFieldInfoPresentFlag;                      ///< Indicates that pic_struct values are present in picture timing SEI messages
-  bool      m_pocProportionalToTimingFlag;                    ///< Indicates that the POC value is proportional to the output time w.r.t. first picture in CVS
-  int       m_numTicksPocDiffOneMinus1;                       ///< Number of ticks minus 1 that for a POC difference of one
-  bool      m_bitstreamRestrictionFlag;                       ///< Signals whether bitstream restriction parameters are present
-#if HEVC_TILES_WPP
-  bool      m_tilesFixedStructureFlag;                        ///< Indicates that each active picture parameter set has the same values of the syntax elements related to tiles
-#endif
-  bool      m_motionVectorsOverPicBoundariesFlag;             ///< Indicates that no samples outside the picture boundaries are used for inter prediction
-  int       m_minSpatialSegmentationIdc;                      ///< Indicates the maximum size of the spatial segments in the pictures in the coded video sequence
-  int       m_maxBytesPerPicDenom;                            ///< Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture
-  int       m_maxBitsPerMinCuDenom;                           ///< Indicates an upper bound for the number of bits of coding_unit() data
-  int       m_log2MaxMvLengthHorizontal;                      ///< Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units
-  int       m_log2MaxMvLengthVertical;                        ///< Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units
+  int       m_chromaSampleLocType;                            ///< Specifies the location of chroma samples for progressive content
+  bool      m_overscanInfoPresentFlag;                        ///< Signals whether overscan_appropriate_flag is present
+  bool      m_overscanAppropriateFlag;                        ///< Indicates whether conformant decoded pictures are suitable for display using overscan
+  bool      m_videoFullRangeFlag;                             ///< Indicates the black level and range of luma and chroma signals
   int       m_ImvMode;                                        ///< imv mode
   int       m_Imv4PelFast;                                    ///< imv 4-Pel fast mode
+#if HEVC_SEI
   std::string m_colourRemapSEIFileRoot;
+#endif
 
   std::string m_summaryOutFilename;                           ///< filename to use for producing summary output file.
   std::string m_summaryPicFilenameBase;                       ///< Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended.
@@ -555,7 +663,32 @@ protected:
   bool        m_bs2ModPOCAndType;
   bool        m_forceDecodeBitstream1;
 
-  bool        m_alf;                                          ///> Adaptive Loop Filter
+  bool        m_alf;                                          ///< Adaptive Loop Filter
+
+  double      m_scalingRatioHor;
+  double      m_scalingRatioVer;
+  bool        m_rprEnabled;
+  double      m_fractionOfFrames;                             ///< encode a fraction of the frames as specified in FramesToBeEncoded
+  int         m_switchPocPeriod;
+  int         m_upscaledOutput;                               ////< Output upscaled (2), decoded cropped but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR.
+
+  bool                  m_gopBasedTemporalFilterEnabled;               ///< GOP-based Temporal Filter enable/disable
+  bool                  m_gopBasedTemporalFilterFutureReference;       ///< Enable/disable future frame references in the GOP-based Temporal Filter
+  std::map<int, double> m_gopBasedTemporalFilterStrengths;             ///< Filter strength per frame for the GOP-based Temporal Filter
+
+  int         m_maxLayers;
+
+  int         m_layerId[MAX_VPS_LAYERS];
+  int         m_layerIdx;
+  int         m_maxSublayers;
+  bool        m_allLayersSameNumSublayersFlag;
+  bool        m_allIndependentLayersFlag;
+  int         m_numRefLayers[MAX_VPS_LAYERS];
+  std::string m_refLayerIdxStr[MAX_VPS_LAYERS];
+  bool        m_eachLayerIsAnOlsFlag;
+  int         m_olsModeIdc;
+  int         m_numOutputLayerSets;
+  std::string m_olsOutputLayerStr[MAX_VPS_LAYERS];
 
 #if EXTENSION_360_VIDEO
   TExt360AppEncCfg m_ext360;
@@ -563,11 +696,32 @@ protected:
   friend class TExt360AppEncTop;
 #endif
 
+#if JVET_O0756_CONFIG_HDRMETRICS || JVET_O0756_CALCULATE_HDRMETRICS
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  double      m_whitePointDeltaE[hdrtoolslib::NB_REF_WHITE];
+#else
+  double      m_whitePointDeltaE[3];
+#endif
+  double      m_maxSampleValue;
+  int         m_sampleRange;
+  int         m_colorPrimaries;
+  bool        m_enableTFunctionLUT;
+  int         m_chromaLocation;
+  int         m_chromaUPFilter;
+  int         m_cropOffsetLeft;
+  int         m_cropOffsetTop;
+  int         m_cropOffsetRight;
+  int         m_cropOffsetBottom;
+  bool        m_calculateHdrMetrics;
+#endif
 
   // internal member functions
   bool  xCheckParameter ();                                   ///< check validity of configuration values
   void  xPrintParameter ();                                   ///< print configuration values
   void  xPrintUsage     ();                                   ///< print usage
+  bool  xHasNonZeroTemporalID();                             ///< check presence of constant temporal ID in GOP structure
+  bool  xHasLeadingPicture();                                 ///< check presence of leading pictures in GOP structure
+  int   xAutoDetermineProfile();                              ///< auto determine the profile to use given the other configuration settings. Returns 1 if erred. Can select profile 'NONE'
 public:
   EncAppCfg();
   virtual ~EncAppCfg();
diff --git a/source/App/EncoderApp/encmain.cpp b/source/App/EncoderApp/encmain.cpp
index 444c6aff32bce814cc0e227a38a7d60eeff4acc3..1b752b0c1483df3d535d5ebadd53b5e206c09c4c 100644
--- a/source/App/EncoderApp/encmain.cpp
+++ b/source/App/EncoderApp/encmain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,7 @@
 #include <chrono>
 #include <ctime>
 
+#include "EncoderLib/EncLibCommon.h"
 #include "EncApp.h"
 #include "Utilities/program_options_lite.h"
 
@@ -104,10 +105,7 @@ int main(int argc, char* argv[])
 #if ENABLE_SPLIT_PARALLELISM
   fprintf( stdout, "[SPLIT_PARALLEL (%d jobs)]", PARL_SPLIT_MAX_NUM_JOBS );
 #endif
-#if ENABLE_WPP_PARALLELISM
-  fprintf( stdout, "[WPP_PARALLEL]" );
-#endif
-#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   const char* waitPolicy = getenv( "OMP_WAIT_POLICY" );
   const char* maxThLim   = getenv( "OMP_THREAD_LIMIT" );
   fprintf( stdout, waitPolicy ? "[OMP: WAIT_POLICY=%s," : "[OMP: WAIT_POLICY=,", waitPolicy );
@@ -116,24 +114,72 @@ int main(int argc, char* argv[])
 #endif
   fprintf( stdout, "\n" );
 
-  EncApp* pcEncApp = new EncApp;
-  // create application encoder class
-  pcEncApp->create();
+  std::fstream bitstream;
+  EncLibCommon encLibCommon;
+
+  std::vector<EncApp*> pcEncApp(1);
+  bool resized = false;
+  int layerIdx = 0;
 
-  // parse configuration
-  try
+  initROM();
+  TComHash::initBlockSizeToIndex();
+
+  char** layerArgv = new char*[argc];
+
+  do
   {
-    if(!pcEncApp->parseCfg( argc, argv ))
+    pcEncApp[layerIdx] = new EncApp( bitstream, &encLibCommon );
+    // create application encoder class per layer
+    pcEncApp[layerIdx]->create();
+
+    // parse configuration per layer
+    try
     {
-      pcEncApp->destroy();
+      int j = 0;
+      for( int i = 0; i < argc; i++ )
+      {
+        if( argv[i][0] == '-' && argv[i][1] == 'l' )
+        {
+          if( argv[i][2] == std::to_string( layerIdx ).c_str()[0] )
+          {
+            layerArgv[j] = argv[i + 1];
+            layerArgv[j + 1] = argv[i + 2];
+            j += 2;
+          }
+          i += 2;
+        }
+        else
+        {
+          layerArgv[j] = argv[i];
+          j++;
+        }
+      }
+
+      if( !pcEncApp[layerIdx]->parseCfg( j, layerArgv ) )
+      {
+        pcEncApp[layerIdx]->destroy();
+        return 1;
+      }
+    }
+    catch( df::program_options_lite::ParseFailure &e )
+    {
+      std::cerr << "Error parsing option \"" << e.arg << "\" with argument \"" << e.val << "\"." << std::endl;
       return 1;
     }
-  }
-  catch (df::program_options_lite::ParseFailure &e)
-  {
-    std::cerr << "Error parsing option \""<< e.arg <<"\" with argument \""<< e.val <<"\"." << std::endl;
-    return 1;
-  }
+
+    int layerId = layerIdx; //VS: layerIdx should be converted to layerId after VPS is implemented
+    pcEncApp[layerIdx]->createLib( layerId );
+
+    if( !resized )
+    {
+      pcEncApp.resize( pcEncApp[layerIdx]->getMaxLayers() );
+      resized = true;
+    }
+
+    layerIdx++;
+  } while( layerIdx < pcEncApp.size() );
+
+  delete[] layerArgv;
 
 #if PRINT_MACRO_VALUES
   printMacroSettings();
@@ -145,40 +191,111 @@ int main(int argc, char* argv[])
   fprintf(stdout, " started @ %s", std::ctime(&startTime2) );
   clock_t startClock = clock();
 
-  // call encoding function
-#ifndef _DEBUG
-  try
+  // call encoding function per layer
+  bool eos = false;
+
+  while( !eos )
   {
+    // read GOP
+    bool keepLoop = true;
+    while( keepLoop )
+    {
+      for( auto & encApp : pcEncApp )
+      {
+#ifndef _DEBUG
+        try
+        {
 #endif
-    pcEncApp->encode();
+          keepLoop = encApp->encodePrep( eos );
 #ifndef _DEBUG
-  }
-  catch( Exception &e )
-  {
-    std::cerr << e.what() << std::endl;
-    return 1;
-  }
-  catch( ... )
-  {
-    std::cerr << "Unspecified error occurred" << std::endl;
-    return 1;
-  }
+        }
+        catch( Exception &e )
+        {
+          std::cerr << e.what() << std::endl;
+          return EXIT_FAILURE;
+        }
+        catch( const std::bad_alloc &e )
+        {
+          std::cout << "Memory allocation failed: " << e.what() << std::endl;
+          return EXIT_FAILURE;
+        }
+#endif
+      }
+    }
+
+    // encode GOP
+    keepLoop = true;
+    while( keepLoop )
+    {
+      for( auto & encApp : pcEncApp )
+      {
+#ifndef _DEBUG
+        try
+        {
+#endif
+          keepLoop = encApp->encode();
+#ifndef _DEBUG
+        }
+        catch( Exception &e )
+        {
+          std::cerr << e.what() << std::endl;
+          return EXIT_FAILURE;
+        }
+        catch( const std::bad_alloc &e )
+        {
+          std::cout << "Memory allocation failed: " << e.what() << std::endl;
+          return EXIT_FAILURE;
+        }
 #endif
+      }
+    }
+  }
   // ending time
   clock_t endClock = clock();
   auto endTime = std::chrono::steady_clock::now();
   std::time_t endTime2 = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
-  auto encTime = std::chrono::duration_cast<std::chrono::milliseconds>( endTime- startTime ).count();
-  // destroy application encoder class
-  pcEncApp->destroy();
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  auto metricTime = pcEncApp[0]->getMetricTime();
 
-  delete pcEncApp;
+  for( int layerIdx = 1; layerIdx < pcEncApp.size(); layerIdx++ )
+  {
+    metricTime += pcEncApp[layerIdx]->getMetricTime();
+  }
+  auto totalTime      = std::chrono::duration_cast<std::chrono::milliseconds>( endTime - startTime ).count();
+  auto encTime        = std::chrono::duration_cast<std::chrono::milliseconds>( endTime - startTime - metricTime ).count();
+  auto metricTimeuser = std::chrono::duration_cast<std::chrono::milliseconds>( metricTime ).count();
+#else
+  auto encTime = std::chrono::duration_cast<std::chrono::milliseconds>( endTime - startTime).count();
+#endif
+
+  for( auto & encApp : pcEncApp )
+  {
+    encApp->destroyLib();
+
+    // destroy application encoder class per layer
+    encApp->destroy();
+
+    delete encApp;
+  }
+
+  // destroy ROM
+  destroyROM();
+
+  pcEncApp.clear();
 
   printf( "\n finished @ %s", std::ctime(&endTime2) );
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  printf(" Encoding Time (Total Time): %12.3f ( %12.3f ) sec. [user] %12.3f ( %12.3f ) sec. [elapsed]\n",
+         ((endClock - startClock) * 1.0 / CLOCKS_PER_SEC) - (metricTimeuser/1000.0),
+         (endClock - startClock) * 1.0 / CLOCKS_PER_SEC,
+         encTime / 1000.0,
+         totalTime / 1000.0);
+#else
   printf(" Total Time: %12.3f sec. [user] %12.3f sec. [elapsed]\n",
          (endClock - startClock) * 1.0 / CLOCKS_PER_SEC,
          encTime / 1000.0);
+#endif
 
   return 0;
 }
diff --git a/source/App/Parcat/CMakeLists.txt b/source/App/Parcat/CMakeLists.txt
index 55b144cc0671b5c2bd0e8c279bf689aabe2f7abf..12edc317ad223c8274d0234332fe9b2edf109db7 100644
--- a/source/App/Parcat/CMakeLists.txt
+++ b/source/App/Parcat/CMakeLists.txt
@@ -10,12 +10,12 @@ file( GLOB INC_FILES "*.h" )
 # add executable
 add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} )
 
-target_link_libraries( ${EXE_NAME} Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
 
 # include the output directory, where the svnrevision.h file is generated
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-include_directories(${CMAKE_SOURCE_DIR}/source/Lib/CommonLib)
+include_directories(${CMAKE_SOURCE_DIR}/source/Lib)
 
 if( CMAKE_SYSTEM_NAME STREQUAL "Linux" )
   add_custom_command( TARGET ${EXE_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy
diff --git a/source/App/Parcat/parcat.cpp b/source/App/Parcat/parcat.cpp
index d85c356d804143423a7cce3315fd047bfb3f5313..03997c6004a81b63899c7fdb60fcbdb3649e7701 100644
--- a/source/App/Parcat/parcat.cpp
+++ b/source/App/Parcat/parcat.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,26 @@
 #include <cstdlib>
 #include <cstdio>
 #include <cassert>
-#include "CommonDef.h"
+#include "CommonLib/CommonDef.h"
+#include "DecoderLib/NALread.h"
+#include "VLCReader.h"
+#if ENABLE_TRACING
+#include "CommonLib/dtrace_next.h"
+#endif
+
+#define PRINT_NALUS 1
 
-#define PRINT_NALUS 0
+class ParcatHLSyntaxReader : public VLCReader
+{
+  public:
+    void  parseSliceHeaderUpToPoc ( ParameterSetManager *parameterSetManager );
+};
 
+void ParcatHLSyntaxReader::parseSliceHeaderUpToPoc ( ParameterSetManager *parameterSetManager )
+{
+  // POC is first syntax element in slice header
+  return;
+}
 
 /**
  Find the beginning and end of a NAL (Network Abstraction Layer) unit in a byte buffer containing H264 bitstream data.
@@ -102,56 +118,6 @@ const bool verbose = false;
 
 const char * NALU_TYPE[] =
 {
-#if !JVET_M0101_HLS
-    "TRAIL_N",
-    "TRAIL_R",
-    "TSA_N",
-    "TSA_R",
-    "STSA_N",
-    "STSA_R",
-    "RADL_N",
-    "RADL_R",
-    "RASL_N",
-    "RASL_R",
-    "RSV_VCL_N10",
-    "RSV_VCL_N12",
-    "RSV_VCL_N14",
-    "RSV_VCL_R11",
-    "RSV_VCL_R13",
-    "RSV_VCL_R15",
-    "BLA_W_LP",
-    "BLA_W_RADL",
-    "BLA_N_LP",
-    "IDR_W_RADL",
-    "IDR_N_LP",
-    "CRA_NUT",
-    "RSV_IRAP_VCL22",
-    "RSV_IRAP_VCL23",
-    "unk",
-    "unk",
-    "unk",
-    "unk",
-    "unk",
-    "unk",
-    "unk",
-    "unk",
-#if HEVC_VPS
-    "VPS_NUT",
-#else
-    "unk",
-#endif
-    "SPS_NUT",
-    "PPS_NUT",
-#if JVET_M0132
-    "APS_NUT",
-#endif
-    "AUD_NUT",
-    "EOS_NUT",
-    "EOB_NUT",
-    "FD_NUT",
-    "PREFIX_SEI_NUT",
-    "SUFFIX_SEI_NUT",
-#else
     "NAL_UNIT_CODED_SLICE_TRAIL",
     "NAL_UNIT_CODED_SLICE_STSA",
     "NAL_UNIT_CODED_SLICE_RADL",
@@ -159,43 +125,31 @@ const char * NALU_TYPE[] =
     "NAL_UNIT_RESERVED_VCL_4",
     "NAL_UNIT_RESERVED_VCL_5",
     "NAL_UNIT_RESERVED_VCL_6",
-    "NAL_UNIT_RESERVED_VCL_7",
-
     "NAL_UNIT_CODED_SLICE_IDR_W_RADL",
     "NAL_UNIT_CODED_SLICE_IDR_N_LP",
     "NAL_UNIT_CODED_SLICE_CRA",
-
+    "NAL_UNIT_CODED_SLICE_GDR",
     "NAL_UNIT_RESERVED_IRAP_VCL11",
     "NAL_UNIT_RESERVED_IRAP_VCL12",
-    "NAL_UNIT_RESERVED_IRAP_VCL13",
-
-    "NAL_UNIT_RESERVED_VCL14",
-
-#if HEVC_VPS
+    "NAL_UNIT_DPS",
     "NAL_UNIT_VPS",
-#else
-    "NAL_UNIT_RESERVED_VCL15",
-#endif
-
-    "NAL_UNIT_RESERVED_NVCL16",
-
     "NAL_UNIT_SPS",
     "NAL_UNIT_PPS",
-    "NAL_UNIT_APS",
+    "NAL_UNIT_PREFIX_APS",
+    "NAL_UNIT_SUFFIX_APS",
+    "NAL_UNIT_PH",
     "NAL_UNIT_ACCESS_UNIT_DELIMITER",
     "NAL_UNIT_EOS",
     "NAL_UNIT_EOB",
     "NAL_UNIT_PREFIX_SEI",
     "NAL_UNIT_SUFFIX_SEI",
-    "NAL_UNIT_FILLER_DATA",
-
+    "NAL_UNIT_FD",
     "NAL_UNIT_RESERVED_NVCL26",
     "NAL_UNIT_RESERVED_NVCL27",
     "NAL_UNIT_UNSPECIFIED_28",
     "NAL_UNIT_UNSPECIFIED_29",
     "NAL_UNIT_UNSPECIFIED_30",
     "NAL_UNIT_UNSPECIFIED_31"
-#endif
 };
 
 int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type)
@@ -217,15 +171,6 @@ int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type)
   {
     iPOCmsb = iPrevPOCmsb;
   }
-#if !JVET_M0101_HLS
-  if ( nalu_type == NAL_UNIT_CODED_SLICE_BLA_W_LP
-    || nalu_type == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-    || nalu_type == NAL_UNIT_CODED_SLICE_BLA_N_LP )
-  {
-    // For BLA picture types, POCmsb is set to 0.
-    iPOCmsb = 0;
-  }
-#endif
 
   return iPOCmsb + iPOClsb;
 }
@@ -245,6 +190,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
 
   int bits_for_poc = 8;
   bool skip_next_sei = false;
+  bool first_slice_segment_in_pic_flag = false;
 
   while(find_nal_unit(p, sz, &nal_start, &nal_end) > 0)
   {
@@ -260,50 +206,57 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
     p += nal_start;
 
     std::vector<uint8_t> nalu(p, p + nal_end - nal_start);
-    int nalu_type = nalu[0] >> 1;
+    int nalu_type = nalu[1] >> 3;
+#if ENABLE_TRACING
+    printf ("NALU Type: %d (%s)\n", nalu_type, NALU_TYPE[nalu_type]);
+#endif
     int poc = -1;
     int poc_lsb = -1;
     int new_poc = -1;
-    
+
+    HLSyntaxReader HLSReader;
+    static ParameterSetManager parameterSetManager;
+    ParcatHLSyntaxReader parcatHLSReader;
+    InputNALUnit inp_nalu;
+    std::vector<uint8_t> & nalu_bs = inp_nalu.getBitstream().getFifo();
+    nalu_bs = nalu;
+    read(inp_nalu);
+
+    if( inp_nalu.m_nalUnitType == NAL_UNIT_SPS )
+    {
+      SPS* sps = new SPS();
+      HLSReader.setBitstream( &inp_nalu.getBitstream() );
+      HLSReader.parseSPS( sps );
+      parameterSetManager.storeSPS( sps, inp_nalu.getBitstream().getFifo() );
+    }
+
+    if( inp_nalu.m_nalUnitType == NAL_UNIT_PPS )
+    {
+      PPS* pps = new PPS();
+      HLSReader.setBitstream( &inp_nalu.getBitstream() );
+      HLSReader.parsePPS( pps, &parameterSetManager );
+      parameterSetManager.storePPS( pps, inp_nalu.getBitstream().getFifo() );
+    }
+    if( inp_nalu.m_nalUnitType == NAL_UNIT_PH )
+    {
+      first_slice_segment_in_pic_flag = true;
+    }
+
     if(nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)
     {
       poc = 0;
       new_poc = *poc_base + poc;
+      first_slice_segment_in_pic_flag = false;
     }
-
-#if !JVET_M0101_HLS
-    if(nalu_type < 32 && nalu_type != NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu_type != NAL_UNIT_CODED_SLICE_IDR_N_LP)
-#else
-      if(nalu_type < 15 && nalu_type != NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu_type != NAL_UNIT_CODED_SLICE_IDR_N_LP)
-#endif
+    if((nalu_type < NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu_type > NAL_UNIT_CODED_SLICE_IDR_N_LP && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL_12) )
     {
-      int offset = 16;
+      parcatHLSReader.setBitstream( &inp_nalu.getBitstream() );
+      
+      // beginning of slice header parsing, taken from VLCReader
+      parcatHLSReader.parseSliceHeaderUpToPoc( &parameterSetManager );
+      int num_bits_up_to_poc_lsb = parcatHLSReader.getBitstream()->getNumBitsRead();
+      int offset = num_bits_up_to_poc_lsb;
 
-      offset += 1; //first_slice_segment_in_pic_flag
-#if !JVET_M0101_HLS
-      if (nalu_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL23)
-#else
-      if (nalu_type >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL13)
-#endif
-      {
-        offset += 1; //no_output_of_prior_pics_flag
-      }
-
-      // determine offset for slice_pic_parameter_set_id TODO: ue(v)
-      int byte_offset2 = offset / 8;
-      int hi_bits2 = offset % 8;
-      uint16_t data2 = (nalu[byte_offset2] << 8) | nalu[byte_offset2 + 1];
-      int low_bits2 = 16 - hi_bits2 - 1;
-      if(((data2 >> low_bits2) % 2))
-        offset += 1; // PPSId=0
-      else
-        offset += 3; // PPSId=1
-      offset += 1; // slice_type TODO: ue(v)
-      // separate_colour_plane_flag is not supported in JEM1.0
-      if (nalu_type == NAL_UNIT_CODED_SLICE_CRA)
-      {
-        offset += 2;
-      }
       int byte_offset = offset / 8;
       int hi_bits = offset % 8;
       uint16_t data = (nalu[byte_offset] << 8) | nalu[byte_offset + 1];
@@ -315,14 +268,21 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
       // int picOrderCntLSB = (pcSlice->getPOC()-pcSlice->getLastIDR()+(1<<pcSlice->getSPS()->getBitsForPOC())) & ((1<<pcSlice->getSPS()->getBitsForPOC())-1);
       unsigned picOrderCntLSB = (new_poc - *last_idr_poc +(1 << bits_for_poc)) & ((1<<bits_for_poc)-1);
 
-      int low = data & ((1 << (low_bits + 1)) - 1);
+      int low = data & ((1 << low_bits) - 1);
       int hi = data >> (16 - hi_bits);
       data = (hi << (16 - hi_bits)) | (picOrderCntLSB << low_bits) | low;
 
       nalu[byte_offset] = data >> 8;
       nalu[byte_offset + 1] = data & 0xff;
 
-      ++cnt;
+      if( first_slice_segment_in_pic_flag )
+      {
+#if ENABLE_TRACING
+        std::cout << "Changed poc " << poc << " to " << new_poc << std::endl;
+#endif
+        ++cnt;
+        first_slice_segment_in_pic_flag = false;
+      }
     }
 
     if(idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP))
@@ -331,11 +291,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
       idr_found = true;
     }
 
-#if HEVC_VPS
-    if((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP )) || ((idx>1 && !idr_found) && ( nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS))
-#else
-    if((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_APS))
-#endif
+    if( ( idx > 1 && ( nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP ) ) || ( ( idx > 1 && !idr_found ) && ( nalu_type == NAL_UNIT_DPS || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER ) )
       || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei))
     {
     }
@@ -389,6 +345,12 @@ std::vector<uint8_t> process_segment(const char * path, int idx, int * poc_base,
 
 int main(int argc, char * argv[])
 {
+#if ENABLE_TRACING
+  std::string tracingFile;
+  std::string tracingRule;
+
+  g_trace_ctx = tracing_init(tracingFile, tracingRule);
+#endif
   if(argc < 3)
   {
     printf("parcat version VTM %s\n", VTM_VERSION);
@@ -405,6 +367,8 @@ int main(int argc, char * argv[])
   int poc_base = 0;
   int last_idr_poc = 0;
 
+  initROM();
+
   for(int i = 1; i < argc - 1; ++i)
   {
     std::vector<uint8_t> v = process_segment(argv[i], i, &poc_base, &last_idr_poc);
@@ -413,4 +377,7 @@ int main(int argc, char * argv[])
   }
 
   fclose(fdo);
+#if ENABLE_TRACING
+  tracing_uninit(g_trace_ctx);
+#endif
 }
diff --git a/source/App/SEIRemovalApp/SEIRemovalApp.cpp b/source/App/SEIRemovalApp/SEIRemovalApp.cpp
index dd22baa5291c4d962591b8354e98a8e19be63843..e10c99da882d7df9985db68d04239c880f4d251a 100644
--- a/source/App/SEIRemovalApp/SEIRemovalApp.cpp
+++ b/source/App/SEIRemovalApp/SEIRemovalApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -72,11 +72,11 @@ void read2(InputNALUnit& nalu)
 {
   InputBitstream& bs = nalu.getBitstream();
 
-  bool forbidden_zero_bit = bs.read(1);           // forbidden_zero_bit
-  if(forbidden_zero_bit != 0) { THROW( "Forbidden zero-bit not '0'" );}
-  nalu.m_nalUnitType = (NalUnitType) bs.read(6);  // nal_unit_type
-  nalu.m_nuhLayerId = bs.read(6);                 // nuh_layer_id
-  nalu.m_temporalId = bs.read(3) - 1;             // nuh_temporal_id_plus1
+  nalu.m_forbiddenZeroBit   = bs.read(1);                 // forbidden zero bit
+  nalu.m_nuhReservedZeroBit = bs.read(1);                 // nuh_reserved_zero_bit
+  nalu.m_nuhLayerId         = bs.read(6);                 // nuh_layer_id
+  nalu.m_nalUnitType        = (NalUnitType) bs.read(5);   // nal_unit_type
+  nalu.m_temporalId         = bs.read(3) - 1;             // nuh_temporal_id_plus1
 }
 
 uint32_t SEIRemovalApp::decode()
@@ -129,7 +129,7 @@ uint32_t SEIRemovalApp::decode()
       // just kick out all suffix SEIS
       bWrite &= (( !m_discardSuffixSEIs || nalu.m_nalUnitType != NAL_UNIT_SUFFIX_SEI ) && ( !m_discardPrefixSEIs || nalu.m_nalUnitType != NAL_UNIT_PREFIX_SEI ));
       bWrite &= unitCnt >= m_numNALUnitsToSkip;
-      bWrite &= m_numNALUnitsToWrite > 0 && unitCnt <= m_numNALUnitsToWrite;
+      bWrite &= m_numNALUnitsToWrite < 0 || unitCnt <= m_numNALUnitsToWrite;
 
       if( bWrite )
       {
diff --git a/source/App/SEIRemovalApp/SEIRemovalApp.h b/source/App/SEIRemovalApp/SEIRemovalApp.h
index ec585ea5610cac993616f1836d446eeaace6bba9..32bb23ac26d974025866f8df9eb28d359d40e65c 100644
--- a/source/App/SEIRemovalApp/SEIRemovalApp.h
+++ b/source/App/SEIRemovalApp/SEIRemovalApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp b/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp
index 8f03fa9df5a2881a42cc36528b81e01c613c494f..f4ab6e1f0c46ff63065465c81edacb81d144bf38 100644
--- a/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp
+++ b/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/SEIRemovalAppCfg.h b/source/App/SEIRemovalApp/SEIRemovalAppCfg.h
index 7daa16afe5161bc87e79dc4bb5aec16f1993dc10..593ba838316762c62ac1bae26d23d0e6cff21d12 100644
--- a/source/App/SEIRemovalApp/SEIRemovalAppCfg.h
+++ b/source/App/SEIRemovalApp/SEIRemovalAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/seiremovalmain.cpp b/source/App/SEIRemovalApp/seiremovalmain.cpp
index 572ed84fa8bc0709f8a61d1f060579410d1d636f..5fba16f98c0820dc7d1273f8adcff2ddbe9b9432 100644
--- a/source/App/SEIRemovalApp/seiremovalmain.cpp
+++ b/source/App/SEIRemovalApp/seiremovalmain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/StreamMergeApp/CMakeLists.txt b/source/App/StreamMergeApp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77c53ece6b5e192580b86a7f3b7dac9e4c0b6290
--- /dev/null
+++ b/source/App/StreamMergeApp/CMakeLists.txt
@@ -0,0 +1,84 @@
+# executable
+set( EXE_NAME StreamMergeApp )
+
+# get source files
+file( GLOB SRC_FILES "*.cpp" )
+
+# get include files
+file( GLOB INC_FILES "*.h" )
+
+# get additional libs for gcc on Ubuntu systems
+if( CMAKE_SYSTEM_NAME STREQUAL "Linux" )
+  if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
+    if( USE_ADDRESS_SANITIZER )
+      set( ADDITIONAL_LIBS asan )
+    endif()
+  endif()
+endif()
+
+# NATVIS files for Visual Studio
+if( MSVC )
+  file( GLOB NATVIS_FILES "../../VisualStudio/*.natvis" )
+endif()
+
+# add executable
+add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+if( SET_ENABLE_TRACING )
+  if( ENABLE_TRACING )
+    target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_TRACING=1 )
+  else()
+    target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_TRACING=0 )
+  endif()
+endif()
+
+if( OpenMP_FOUND )
+  if( SET_ENABLE_SPLIT_PARALLELISM )
+    if( ENABLE_SPLIT_PARALLELISM )
+      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
+    else()
+      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
+    endif()
+  endif()
+  if( SET_ENABLE_WPP_PARALLELISM )
+    if( ENABLE_WPP_PARALLELISM )
+      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
+    else()
+      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
+    endif()
+  endif()
+else()
+  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
+  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
+endif()
+
+if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
+  set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
+  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
+endif()
+
+target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+
+# lldb custom data formatters
+if( XCODE )
+  add_dependencies( ${EXE_NAME} Install${PROJECT_NAME}LldbFiles )
+endif()
+
+if( CMAKE_SYSTEM_NAME STREQUAL "Linux" )
+  add_custom_command( TARGET ${EXE_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy
+                                                          $<$<CONFIG:Debug>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG}/StreamMergeApp>
+                                                          $<$<CONFIG:Release>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE}/StreamMergeApp>
+                                                          $<$<CONFIG:RelWithDebInfo>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO}/StreamMergeApp>
+                                                          $<$<CONFIG:MinSizeRel>:${CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL}/StreamMergeApp>
+                                                          $<$<CONFIG:Debug>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStaticd>
+                                                          $<$<CONFIG:Release>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStatic>
+                                                          $<$<CONFIG:RelWithDebInfo>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStaticp>
+                                                          $<$<CONFIG:MinSizeRel>:${CMAKE_SOURCE_DIR}/bin/StreamMergeAppStaticm> )
+endif()
+
+# example: place header files in different folders
+source_group( "Natvis Files" FILES ${NATVIS_FILES} )
+
+# set the folder where to place the projects
+set_target_properties( ${EXE_NAME}         PROPERTIES FOLDER app LINKER_LANGUAGE CXX )
diff --git a/source/App/StreamMergeApp/StreamMergeApp.cpp b/source/App/StreamMergeApp/StreamMergeApp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b9943e8ec3ee89f0de0ca8856ec57e93c3ca0b18
--- /dev/null
+++ b/source/App/StreamMergeApp/StreamMergeApp.cpp
@@ -0,0 +1,345 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ /** \file     StreamMergeApp.cpp
+     \brief    Decoder application class
+ */
+
+#include <list>
+#include <vector>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include "StreamMergeApp.h"
+#include "DecoderLib/AnnexBread.h"
+#include "DecoderLib/NALread.h"
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+#include "CommonLib/CodingStatistics.h"
+#endif
+
+ //! \ingroup DecoderApp
+ //! \{
+
+ // ====================================================================================================================
+ // Constructor / destructor / initialization / destroy
+ // ====================================================================================================================
+
+StreamMergeApp::StreamMergeApp()
+{
+
+}
+
+// ====================================================================================================================
+// Public member functions
+// ====================================================================================================================
+
+/**
+ - create internal class
+ - initialize internal class
+ - until the end of the bitstream, call decoding function in StreamMergeApp class
+ - delete allocated buffers
+ - destroy internal class
+ - returns the number of mismatching pictures
+ */
+
+void read2(InputNALUnit& nalu)
+{
+  InputBitstream& bs = nalu.getBitstream();
+
+  nalu.m_forbiddenZeroBit   = bs.read(1);                 // forbidden zero bit
+  nalu.m_nuhReservedZeroBit = bs.read(1);                 // nuh_reserved_zero_bit
+  nalu.m_nuhLayerId         = bs.read(6);                 // nuh_layer_id
+  nalu.m_nalUnitType        = (NalUnitType) bs.read(5);   // nal_unit_type
+  nalu.m_temporalId         = bs.read(3) - 1;             // nuh_temporal_id_plus1
+}
+
+static void
+_byteStreamNALUnit(
+  SingleLayerStream& bs,
+  std::istream& istream,
+  vector<uint8_t>& nalUnit,
+  AnnexBStats& stats)
+{
+  /* At the beginning of the decoding process, the decoder initialises its
+   * current position in the byte stream to the beginning of the byte stream.
+   * It then extracts and discards each leading_zero_8bits syntax element (if
+   * present), moving the current position in the byte stream forward one
+   * byte at a time, until the current position in the byte stream is such
+   * that the next four bytes in the bitstream form the four-byte sequence
+   * 0x00000001.
+   */
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  CodingStatistics::SStat &statBits = CodingStatistics::GetStatisticEP(STATS__NAL_UNIT_PACKING);
+#endif
+  while ((bs.eofBeforeNBytes(24 / 8, istream) || bs.peekBytes(24 / 8, istream) != 0x000001)
+    && (bs.eofBeforeNBytes(32 / 8, istream) || bs.peekBytes(32 / 8, istream) != 0x00000001))
+  {
+    uint8_t leading_zero_8bits = bs.readByte(istream);
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+    statBits.bits += 8; statBits.count++;
+#endif
+    if (leading_zero_8bits != 0) { THROW("Leading zero bits not zero"); }
+    stats.m_numLeadingZero8BitsBytes++;
+  }
+
+  /* 1. When the next four bytes in the bitstream form the four-byte sequence
+   * 0x00000001, the next byte in the byte stream (which is a zero_byte
+   * syntax element) is extracted and discarded and the current position in
+   * the byte stream is set equal to the position of the byte following this
+   * discarded byte.
+   */
+   /* NB, the previous step guarantees this will succeed -- if EOF was
+    * encountered, an exception will stop execution getting this far */
+  if (bs.peekBytes(24 / 8, istream) != 0x000001)
+  {
+    uint8_t zero_byte = bs.readByte(istream);
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+    statBits.bits += 8; statBits.count++;
+#endif
+    CHECK(zero_byte != 0, "Zero byte not '0'");
+    stats.m_numZeroByteBytes++;
+  }
+
+  /* 2. The next three-byte sequence in the byte stream (which is a
+   * start_code_prefix_one_3bytes) is extracted and discarded and the current
+   * position in the byte stream is set equal to the position of the byte
+   * following this three-byte sequence.
+   */
+   /* NB, (1) guarantees that the next three bytes are 0x00 00 01 */
+  uint32_t start_code_prefix_one_3bytes = bs.readBytes(24 / 8, istream);
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  statBits.bits += 24; statBits.count += 3;
+#endif
+  if (start_code_prefix_one_3bytes != 0x000001) { THROW("Invalid code prefix"); }
+  stats.m_numStartCodePrefixBytes += 3;
+
+  /* 3. NumBytesInNALunit is set equal to the number of bytes starting with
+   * the byte at the current position in the byte stream up to and including
+   * the last byte that precedes the location of any of the following
+   * conditions:
+   *   a. A subsequent byte-aligned three-byte sequence equal to 0x000000, or
+   *   b. A subsequent byte-aligned three-byte sequence equal to 0x000001, or
+   *   c. The end of the byte stream, as determined by unspecified means.
+   */
+   /* 4. NumBytesInNALunit bytes are removed from the bitstream and the
+    * current position in the byte stream is advanced by NumBytesInNALunit
+    * bytes. This sequence of bytes is nal_unit( NumBytesInNALunit ) and is
+    * decoded using the NAL unit decoding process
+    */
+    /* NB, (unsigned)x > 2 implies n!=0 && n!=1 */
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  CodingStatistics::SStat &bodyStats = CodingStatistics::GetStatisticEP(STATS__NAL_UNIT_TOTAL_BODY);
+#endif
+  while (bs.eofBeforeNBytes(24 / 8, istream) || bs.peekBytes(24 / 8, istream) > 2)
+  {
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+    uint8_t thebyte = bs.readByte(istream); bodyStats.bits += 8; bodyStats.count++;
+    nalUnit.push_back(thebyte);
+#else
+    nalUnit.push_back(bs.readByte(istream));
+#endif
+  }
+
+  /* 5. When the current position in the byte stream is:
+   *  - not at the end of the byte stream (as determined by unspecified means)
+   *  - and the next bytes in the byte stream do not start with a three-byte
+   *    sequence equal to 0x000001
+   *  - and the next bytes in the byte stream do not start with a four byte
+   *    sequence equal to 0x00000001,
+   * the decoder extracts and discards each trailing_zero_8bits syntax
+   * element, moving the current position in the byte stream forward one byte
+   * at a time, until the current position in the byte stream is such that:
+   *  - the next bytes in the byte stream form the four-byte sequence
+   *    0x00000001 or
+   *  - the end of the byte stream has been encountered (as determined by
+   *    unspecified means).
+   */
+   /* NB, (3) guarantees there are at least three bytes available or none */
+  while ((bs.eofBeforeNBytes(24 / 8, istream) || bs.peekBytes(24 / 8, istream) != 0x000001)
+    && (bs.eofBeforeNBytes(32 / 8, istream) || bs.peekBytes(32 / 8, istream) != 0x00000001))
+  {
+    uint8_t trailing_zero_8bits = bs.readByte(istream);
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+    statBits.bits += 8; statBits.count++;
+#endif
+    CHECK(trailing_zero_8bits != 0, "Trailing zero bits not '0'");
+    stats.m_numTrailingZero8BitsBytes++;
+  }
+}
+
+/**
+ * Parse an AVC AnnexB Bytestream bs to extract a single nalUnit
+ * while accumulating bytestream statistics into stats.
+ *
+ * Returns false if EOF was reached (NB, nalunit data may be valid),
+ *         otherwise true.
+ */
+bool
+byteStreamNALUnit(
+  SingleLayerStream& bs,
+  std::istream& istream,
+  vector<uint8_t>& nalUnit,
+  AnnexBStats& stats)
+{
+  bool eof = false;
+  try
+  {
+    _byteStreamNALUnit(bs, istream, nalUnit, stats);
+  }
+  catch (...)
+  {
+    eof = true;
+  }
+  stats.m_numBytesInNALUnit = uint32_t(nalUnit.size());
+  return eof;
+}
+
+void StreamMergeApp::writeNewVPS(ostream& out, int nLayerId, int nTemporalId)
+{
+  //write NALU header
+  OutputBitstream bsNALUHeader;
+  static const uint8_t start_code_prefix[] = { 0,0,0,1 };
+
+  int forbiddenZero = 0;
+  bsNALUHeader.write(forbiddenZero, 1);   // forbidden_zero_bit
+  int nuhReservedZeroBit = 0;
+  bsNALUHeader.write(nuhReservedZeroBit, 1);   // nuh_reserved_zero_bit
+  bsNALUHeader.write(nLayerId, 6);             // nuh_layer_id
+  bsNALUHeader.write(NAL_UNIT_VPS, 5);         // nal_unit_type
+  bsNALUHeader.write(nTemporalId + 1, 3);      // nuh_temporal_id_plus1
+
+  out.write(reinterpret_cast<const char*>(start_code_prefix), 4);
+  out.write(reinterpret_cast<const char*>(bsNALUHeader.getByteStream()), bsNALUHeader.getByteStreamLength());
+
+  //write VPS
+  OutputBitstream bsVPS;
+  HLSWriter       m_HLSWriter;
+
+  m_HLSWriter.setBitstream(&bsVPS);
+  m_HLSWriter.codeVPS(&vps);
+
+  out.write(reinterpret_cast<const char*>(bsVPS.getByteStream()), bsVPS.getByteStreamLength());
+
+  return;
+}
+
+uint32_t StreamMergeApp::mergeStreams()
+{
+  ifstream bitstreamFileIn[MAX_VPS_LAYERS];
+  ofstream bitstreamFileOut(m_bitstreamFileNameOut.c_str(), ifstream::out | ifstream::binary);
+  int nNumValidStr = m_numInputStreams;
+
+  for (int i = 0; i < m_numInputStreams; i++)
+  {
+    bitstreamFileIn[i].open(m_bitstreamFileNameIn[i].c_str(), ifstream::in | ifstream::binary);
+
+    if (!bitstreamFileIn[i])
+    {
+      EXIT("failed to open bitstream file " << m_bitstreamFileNameIn[i].c_str() << " for reading");
+    }
+
+    bitstreamFileIn[i].clear();
+    bitstreamFileIn[i].seekg(0, ios::beg);
+  }
+
+  SingleLayerStream bytestream[MAX_VPS_LAYERS];
+
+  for (int i = 0; i < m_numInputStreams; i++)
+    bytestream[i].init(bitstreamFileIn[i]);
+
+  //set VPS which will be replicated for all layers but with differnt nul_layer_id
+  vps.setMaxLayers(m_numInputStreams);
+  vps.setVPSExtensionFlag(false);
+
+  //Loop all input bitstreams to interleave their NALUs
+  while (nNumValidStr)
+  {
+    //loop over all input streams
+    for (int i = 0; i < m_numInputStreams; i++)
+    {
+      uint8_t layerId = i < 63 ? i : i + 1;
+
+      if (!bitstreamFileIn[i])
+        continue;
+
+      AnnexBStats stats = AnnexBStats();
+
+      InputNALUnit nalu;
+
+      byteStreamNALUnit(bytestream[i], bitstreamFileIn[i], nalu.getBitstream().getFifo(), stats);
+
+      // call actual decoding function
+      if (nalu.getBitstream().getFifo().empty())
+      {
+        /* this can happen if the following occur:
+         *  - empty input file
+         *  - two back-to-back start_code_prefixes
+         *  - start_code_prefix immediately followed by EOF
+         */
+        std::cerr << "Warning: Attempt to decode an empty NAL unit" << std::endl;
+      }
+      else
+      {
+        read2(nalu);
+
+        if (nalu.m_nalUnitType == NAL_UNIT_VPS)
+        {
+          writeNewVPS(bitstreamFileOut, layerId, nalu.m_temporalId);
+          printf("Write new VPS for stream %d\n", i);
+
+          continue;
+        }
+
+        int iNumZeros = stats.m_numLeadingZero8BitsBytes + stats.m_numZeroByteBytes + stats.m_numStartCodePrefixBytes - 1;
+        char ch = 0;
+        for (int i = 0; i < iNumZeros; i++) { bitstreamFileOut.write(&ch, 1); }
+        ch = 1; bitstreamFileOut.write(&ch, 1);
+
+        //update the nul_layer_id
+        uint8_t *p = (uint8_t*)nalu.getBitstream().getFifo().data();
+        p[1] = ((layerId + 1) << 1) & 0xff;
+
+        bitstreamFileOut.write((const char*)p, nalu.getBitstream().getFifo().size());
+
+        printf("Merge NALU type %d from stream %d\n", nalu.m_nalUnitType, i);
+      }
+
+      if (!bitstreamFileIn[i])
+        nNumValidStr--;
+    }
+  }
+
+  return 0;
+}
+
+//! \}
diff --git a/source/App/StreamMergeApp/StreamMergeApp.h b/source/App/StreamMergeApp/StreamMergeApp.h
new file mode 100644
index 0000000000000000000000000000000000000000..b4dc15ae8673800ec65c470ed6a948e56ad0fd84
--- /dev/null
+++ b/source/App/StreamMergeApp/StreamMergeApp.h
@@ -0,0 +1,196 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     SEIRemovalApp.h
+    \brief    Decoder application class (header)
+*/
+
+#ifndef __STREAMMERGEAPP__
+#define __STREAMMERGEAPP__
+
+#if _MSC_VER > 1000
+#pragma once
+#endif // _MSC_VER > 1000
+
+#include <stdio.h>
+#include <fstream>
+#include <iostream>
+#include "CommonLib/CommonDef.h"
+#include "VLCWriter.h"
+#include "CABACWriter.h"
+#include "AnnexBread.h"
+#include "StreamMergeAppCfg.h"
+
+using namespace std;
+
+// ====================================================================================================================
+// Class definition
+// ====================================================================================================================
+
+/// decoder application class
+class StreamMergeApp : public StreamMergeAppCfg
+{
+
+public:
+  StreamMergeApp();
+  virtual ~StreamMergeApp         ()  {}
+
+  VPS vps;
+
+  uint32_t  mergeStreams            (); ///< main stream merging function
+  void      writeNewVPS             (ostream& out, int nNumLayers, int nTemporalId);
+};
+
+
+class SingleLayerStream
+{
+public:
+  /**
+  * Create a bytestream reader that will extract bytes from
+  * istream.
+  *
+  * NB, it isn't safe to access istream while in use by a
+  * InputByteStream.
+  *
+  * Side-effects: the exception mask of istream is set to eofbit
+  */
+  SingleLayerStream()
+    : m_numFutureBytes(0)
+    , m_futureBytes(0)
+  {
+  }
+
+  /**
+  * Reset the internal state.  Must be called if input stream is
+  * modified externally to this class
+  */
+  void reset()
+  {
+    m_numFutureBytes = 0;
+    m_futureBytes = 0;
+  }
+
+  void init(std::istream& istream)
+  {
+    istream.exceptions(std::istream::eofbit | std::istream::badbit);
+  }
+
+  /**
+  * returns true if an EOF will be encountered within the next
+  * n bytes.
+  */
+  bool eofBeforeNBytes(uint32_t n, std::istream& m_Input)
+  {
+    CHECK(n > 4, "Unsupported look-ahead value");
+    if (m_numFutureBytes >= n)
+    {
+      return false;
+    }
+
+    n -= m_numFutureBytes;
+    try
+    {
+      for (uint32_t i = 0; i < n; i++)
+      {
+        m_futureBytes = (m_futureBytes << 8) | m_Input.get();
+          m_numFutureBytes++;
+      }
+    }
+    catch (...)
+    {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+  * return the next n bytes in the stream without advancing
+  * the stream pointer.
+  *
+  * Returns: an unsigned integer representing an n byte bigendian
+  * word.
+  *
+  * If an attempt is made to read past EOF, an n-byte word is
+  * returned, but the portion that required input bytes beyond EOF
+  * is undefined.
+  *
+  */
+  uint32_t peekBytes(uint32_t n, std::istream& m_Input)
+  {
+    eofBeforeNBytes(n, m_Input);
+    return m_futureBytes >> 8 * (m_numFutureBytes - n);
+  }
+
+  /**
+  * consume and return one byte from the input.
+  *
+  * If bytestream is already at EOF prior to a call to readByte(),
+  * an exception std::ios_base::failure is thrown.
+  */
+  uint8_t readByte(std::istream& m_Input)
+  {
+    if (!m_numFutureBytes)
+    {
+      uint8_t byte = m_Input.get();
+      return byte;
+    }
+    m_numFutureBytes--;
+    uint8_t wanted_byte = m_futureBytes >> 8 * m_numFutureBytes;
+    m_futureBytes &= ~(0xff << 8 * m_numFutureBytes);
+    return wanted_byte;
+  }
+
+  /**
+  * consume and return n bytes from the input.  n bytes from
+  * bytestream are interpreted as bigendian when assembling
+  * the return value.
+  */
+  uint32_t readBytes(uint32_t n, std::istream& m_Input)
+  {
+    uint32_t val = 0;
+    for (uint32_t i = 0; i < n; i++)
+    {
+      val = (val << 8) | readByte(m_Input);
+    }
+    return val;
+  }
+
+private:
+  uint32_t m_numFutureBytes; /* number of valid bytes in m_futureBytes */
+  uint32_t m_futureBytes; /* bytes that have been peeked */
+};
+
+bool byteStreamNALUnit(SingleLayerStream& bs, std::istream& istream, vector<uint8_t>& nalUnit, AnnexBStats& stats);
+
+#endif // __STREAMMERGEAPP__
+
diff --git a/source/App/StreamMergeApp/StreamMergeAppCfg.cpp b/source/App/StreamMergeApp/StreamMergeAppCfg.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..88432c360fd9a5e9304961c2872fae8a841119f3
--- /dev/null
+++ b/source/App/StreamMergeApp/StreamMergeAppCfg.cpp
@@ -0,0 +1,86 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ /** \file     StreamMergeAppCfg.cpp
+     \brief    Decoder configuration class
+ */
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include "StreamMergeAppCfg.h"
+#include "Utilities/program_options_lite.h"
+
+using namespace std;
+namespace po = df::program_options_lite;
+
+//! \ingroup DecoderApp
+//! \{
+
+// ====================================================================================================================
+// Public member functions
+// ====================================================================================================================
+
+/** \param argc number of arguments
+    \param argv array of arguments
+ */
+bool StreamMergeAppCfg::parseCfg(int argc, char* argv[])
+{
+  int i;
+
+  m_numInputStreams = argc - 2;
+
+  for (i = 0; i < m_numInputStreams; i++)
+  {
+    m_bitstreamFileNameIn[i] = argv[i + 1];
+  }
+
+  m_bitstreamFileNameOut = argv[i + 1];
+
+  return true;
+}
+
+StreamMergeAppCfg::StreamMergeAppCfg()
+  : m_bitstreamFileNameOut()
+  , m_numInputStreams(0)
+{
+  for (int i = 0; i < MAX_VPS_LAYERS; i++)
+    m_bitstreamFileNameIn[i] = "";
+}
+
+StreamMergeAppCfg::~StreamMergeAppCfg()
+{
+
+}
+
+//! \}
diff --git a/source/App/StreamMergeApp/StreamMergeAppCfg.h b/source/App/StreamMergeApp/StreamMergeAppCfg.h
new file mode 100644
index 0000000000000000000000000000000000000000..6ef3e791ce8f0fbe708759cfd5026f99c5bf4c34
--- /dev/null
+++ b/source/App/StreamMergeApp/StreamMergeAppCfg.h
@@ -0,0 +1,74 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     StreamMergeAppCfg.h
+    \brief    Stream merge app configuration class (header)
+*/
+
+#ifndef __STREAMMERGEAPPCFG__
+#define __STREAMMERGEAPPCFG__
+
+#if _MSC_VER > 1000
+#pragma once
+#endif // _MSC_VER > 1000
+
+#include "CommonLib/CommonDef.h"
+#include <vector>
+
+//! \ingroup DecoderApp
+//! \{
+
+// ====================================================================================================================
+// Class definition
+// ====================================================================================================================
+
+/// Decoder configuration class
+class StreamMergeAppCfg
+{
+protected:
+  std::string   m_bitstreamFileNameIn[MAX_VPS_LAYERS];                ///< output bitstream file name
+  std::string   m_bitstreamFileNameOut;               ///< input bitstream file name
+  int           m_numInputStreams;                    ///< number of input bitstreams
+
+public:
+  StreamMergeAppCfg();
+  virtual ~StreamMergeAppCfg();
+
+  bool  parseCfg        ( int argc, char* argv[] );   ///< initialize option class from configuration
+};
+
+//! \}
+
+#endif  // __STREAMMERGEAPPCFG__
+
+
diff --git a/source/App/StreamMergeApp/StreamMergeMain.cpp b/source/App/StreamMergeApp/StreamMergeMain.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7404d8751ae8f398ca4559d9afab2eccca11fe31
--- /dev/null
+++ b/source/App/StreamMergeApp/StreamMergeMain.cpp
@@ -0,0 +1,94 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ /** \file     StreamMergeMain.cpp
+     \brief    Stream merge application main
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include "StreamMergeApp.h"
+#include "program_options_lite.h"
+
+ //! \ingroup DecoderApp
+ //! \{
+
+ // ====================================================================================================================
+ // Main function
+ // ====================================================================================================================
+
+int main(int argc, char* argv[])
+{
+  int returnCode = EXIT_SUCCESS;
+
+  if (argc < 4)
+  {
+    printf("usage: %s <bitstream1> <bitstream2> [<bitstream3> ...] <outfile>\n", argv[0]);
+    return -1;
+  }
+
+  // print information
+  fprintf(stdout, "\n");
+  fprintf(stdout, "VVCSoftware: VTM Version %s ", VTM_VERSION);
+  fprintf(stdout, "\n");
+
+  StreamMergeApp *pStrMergeApp = new StreamMergeApp;
+  // parse configuration
+  if (!pStrMergeApp->parseCfg(argc, argv))
+  {
+    returnCode = EXIT_FAILURE;
+    return returnCode;
+  }
+
+  // starting time
+  double dResult;
+  clock_t lBefore = clock();
+
+  // call decoding function
+  if (0 != pStrMergeApp->mergeStreams())
+  {
+    printf("\n\n***ERROR*** A merge error happened\n");
+    returnCode = EXIT_FAILURE;
+  }
+
+  // ending time
+  dResult = (double)(clock() - lBefore) / CLOCKS_PER_SEC;
+  printf("\n Total Time: %12.3f sec.\n", dResult);
+
+  delete pStrMergeApp;
+
+  return returnCode;
+}
+
+//! \}
diff --git a/source/App/utils/BitrateTargeting/ExtractBitrates.cpp b/source/App/utils/BitrateTargeting/ExtractBitrates.cpp
index c13fa5e73218919c77157b732f94b38232528d93..1740a22d0c356fabf455da6dee91a7a5a9264a3f 100644
--- a/source/App/utils/BitrateTargeting/ExtractBitrates.cpp
+++ b/source/App/utils/BitrateTargeting/ExtractBitrates.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/BitrateTargeting/ExtractBitrates.h b/source/App/utils/BitrateTargeting/ExtractBitrates.h
index dd73ffe9588fec1540ba4412a0c23a437c22075f..a3732fc9f03414445d06827246becb1c9a3e3703 100644
--- a/source/App/utils/BitrateTargeting/ExtractBitrates.h
+++ b/source/App/utils/BitrateTargeting/ExtractBitrates.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp b/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp
index 562096a895cd60af1656407cbbcc47f8b7087e15..f44ac2d7b932f74b6d43dffb4296cdf51886c024 100644
--- a/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp
+++ b/source/App/utils/BitrateTargeting/ExtractBitratesMain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp
index 2e591804d47e86369e2ebec446d0513141ab6b61..039a1ff98f3fe3608c658e954e791d46fded04d7 100644
--- a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp
+++ b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h
index 151e9b830a16e3b0022c9f1b95e487c71411e5de..16e4a6a036562ec77050d6141a03bd09a7eaebfe 100644
--- a/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h
+++ b/source/App/utils/BitrateTargeting/GuessLambdaModifiers.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp b/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp
index cddb75b46f3f72f68f540bfa995267f4965729c8..c4ad9388ac896836b07d32289d6efb32d6e30846 100644
--- a/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp
+++ b/source/App/utils/BitrateTargeting/GuessLambdaModifiersMain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/BitrateTargeting/RuntimeError.h b/source/App/utils/BitrateTargeting/RuntimeError.h
index e777654e87cbbbe08921ed2b601337f1642a6e35..01f720a9f85aa1052fc5d07e56d6101887c0423c 100644
--- a/source/App/utils/BitrateTargeting/RuntimeError.h
+++ b/source/App/utils/BitrateTargeting/RuntimeError.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/annexBbytecount.cpp b/source/App/utils/annexBbytecount.cpp
index 2f6c1095943c6b34a3aa09e13e500b1ba8c27705..593a9a12d96e05064d310071699848286308866a 100644
--- a/source/App/utils/annexBbytecount.cpp
+++ b/source/App/utils/annexBbytecount.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/utils/convert_NtoMbit_YCbCr.cpp b/source/App/utils/convert_NtoMbit_YCbCr.cpp
index 843b1ecc8b0ebfca445e33b601655abd711b0da0..79663cfc4da71080afe9777a3a46e6b09708a2f6 100644
--- a/source/App/utils/convert_NtoMbit_YCbCr.cpp
+++ b/source/App/utils/convert_NtoMbit_YCbCr.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt
index 0fd16c8367e1c2756ebb970de74309327d285668..7f5f76558de44e7170c31fe438e444568bd6b87c 100644
--- a/source/Lib/CommonAnalyserLib/CMakeLists.txt
+++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt
@@ -46,12 +46,17 @@ set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} )
 
 # library
 add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
+target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_BIT_STATISTICS=1 )
 target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 )
 
 if( EXTENSION_360_VIDEO )
   target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_360_VIDEO=1 )
 endif()
 
+if( EXTENSION_HDRTOOLS )
+  target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_HDRTOOLS=1 )
+endif()
+
 if( SET_ENABLE_TRACING )
   if( ENABLE_TRACING )
     target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=1 )
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index b1aef8429b7e98beaf45a5abfacc7aa74a85e8bb..60beb504f1719e3b7dbfa659184e455782bb83f8 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -39,18 +39,27 @@
 
 #include "CodingStructure.h"
 #include "Picture.h"
+#include <array>
+#include <cmath>
+
+constexpr int AdaptiveLoopFilter::AlfNumClippingValues[];
 
 AdaptiveLoopFilter::AdaptiveLoopFilter()
   : m_classifier( nullptr )
 {
-  for( int i = 0; i < NUM_DIRECTIONS; i++ )
+  for (size_t i = 0; i < NUM_DIRECTIONS; i++)
   {
-    m_laplacian[i] = nullptr;
+    m_laplacian[i] = m_laplacianPtr[i];
+    for (size_t j = 0; j < sizeof(m_laplacianPtr[i]) / sizeof(m_laplacianPtr[i][0]); j++)
+    {
+      m_laplacianPtr[i][j] = m_laplacianData[i][j];
+    }
   }
 
   for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
   {
     m_ctuEnableFlag[compIdx] = nullptr;
+    m_ctuAlternative[compIdx] = nullptr;
   }
 
   m_deriveClassificationBlk = deriveClassificationBlk;
@@ -64,15 +73,220 @@ AdaptiveLoopFilter::AdaptiveLoopFilter()
 #endif
 }
 
-void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSliceParam )
+bool AdaptiveLoopFilter::isCrossedByVirtualBoundaries( const CodingStructure& cs, const int xPos, const int yPos, const int width, const int height, bool& clipTop, bool& clipBottom, bool& clipLeft, bool& clipRight, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], int& rasterSliceAlfPad )
 {
-  if( !alfSliceParam.enabledFlag[COMPONENT_Y] && !alfSliceParam.enabledFlag[COMPONENT_Cb] && !alfSliceParam.enabledFlag[COMPONENT_Cr] )
+  clipTop = false; clipBottom = false; clipLeft = false; clipRight = false;
+  numHorVirBndry = 0; numVerVirBndry = 0;
+  const PPS*   pps = cs.pps;
+  const PicHeader* picHeader = cs.picHeader;
+
+  if( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
   {
-    return;
+    for( int i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++ )
+    {
+      if( picHeader->getVirtualBoundariesPosY(i) == yPos )
+      {
+        clipTop = true;
+      }
+      else if( picHeader->getVirtualBoundariesPosY(i) == yPos + height )
+      {
+        clipBottom = true;
+      }
+      else if( yPos < picHeader->getVirtualBoundariesPosY(i) && picHeader->getVirtualBoundariesPosY(i) < yPos + height )
+      {
+        horVirBndryPos[numHorVirBndry++] = picHeader->getVirtualBoundariesPosY(i);
+      }
+    }
+    for( int i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++ )
+    {
+      if( picHeader->getVirtualBoundariesPosX(i) == xPos )
+      {
+        clipLeft = true;
+      }
+      else if( picHeader->getVirtualBoundariesPosX(i) == xPos + width )
+      {
+        clipRight = true;
+      }
+      else if( xPos < picHeader->getVirtualBoundariesPosX(i) && picHeader->getVirtualBoundariesPosX(i) < xPos + width )
+      {
+        verVirBndryPos[numVerVirBndry++] = picHeader->getVirtualBoundariesPosX(i);
+      }
+    }
   }
 
-  // set available filter shapes
-  alfSliceParam.filterShapes = m_filterShapes;
+  const Slice& slice = *(cs.slice);
+  int   ctuSize = slice.getSPS()->getCTUSize();
+  const Position currCtuPos(xPos, yPos);
+  const CodingUnit *currCtu = cs.getCU(currCtuPos, CHANNEL_TYPE_LUMA);
+  //top
+  if (yPos >= ctuSize && clipTop == false)
+  {
+    const Position prevCtuPos(xPos, yPos - ctuSize);
+    const CodingUnit *prevCtu = cs.getCU(prevCtuPos, CHANNEL_TYPE_LUMA);
+    if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *prevCtu)) || 
+        (!pps->getLoopFilterAcrossTilesEnabledFlag()  && !CU::isSameTile(*currCtu,  *prevCtu)))
+    {
+      clipTop = true;
+    }
+  }
+
+  //bottom
+  if (yPos + ctuSize < cs.pcv->lumaHeight && clipBottom == false)
+  {
+    const Position nextCtuPos(xPos, yPos + ctuSize);
+    const CodingUnit *nextCtu = cs.getCU(nextCtuPos, CHANNEL_TYPE_LUMA);
+    if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *nextCtu)) || 
+        (!pps->getLoopFilterAcrossTilesEnabledFlag()  && !CU::isSameTile(*currCtu,  *nextCtu)))
+    {
+      clipBottom = true;
+    }
+  }
+
+  //left
+  if (xPos >= ctuSize && clipLeft == false)
+  {
+    const Position prevCtuPos(xPos - ctuSize, yPos);
+    const CodingUnit *prevCtu = cs.getCU(prevCtuPos, CHANNEL_TYPE_LUMA);
+    if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *prevCtu)) || 
+        (!pps->getLoopFilterAcrossTilesEnabledFlag()  && !CU::isSameTile(*currCtu,  *prevCtu)))
+    {
+      clipLeft = true;
+    }
+  }
+
+  //right
+  if (xPos + ctuSize < cs.pcv->lumaWidth && clipRight == false)
+  {
+    const Position nextCtuPos(xPos + ctuSize, yPos);
+    const CodingUnit *nextCtu = cs.getCU(nextCtuPos, CHANNEL_TYPE_LUMA);
+    if ((!pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice(*currCtu, *nextCtu)) || 
+        (!pps->getLoopFilterAcrossTilesEnabledFlag()  && !CU::isSameTile(*currCtu,  *nextCtu)))
+    {
+      clipRight = true;
+    }
+  }
+
+  rasterSliceAlfPad = 0;
+  if ( !clipTop && !clipLeft )
+  {
+    //top-left CTU
+    if ( xPos >= ctuSize && yPos >= ctuSize )
+    {
+      const Position prevCtuPos( xPos - ctuSize, yPos - ctuSize );
+      const CodingUnit *prevCtu = cs.getCU( prevCtuPos, CHANNEL_TYPE_LUMA );
+      if ( !pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice( *currCtu, *prevCtu ) )
+      {
+        rasterSliceAlfPad = 1;
+      }
+    }
+  }
+
+  if ( !clipBottom && !clipRight )
+  {
+    //bottom-right CTU
+    if ( xPos + ctuSize < cs.pcv->lumaWidth && yPos + ctuSize < cs.pcv->lumaHeight )
+    {
+      const Position nextCtuPos( xPos + ctuSize, yPos + ctuSize );
+      const CodingUnit *nextCtu = cs.getCU( nextCtuPos, CHANNEL_TYPE_LUMA );
+      if ( !pps->getLoopFilterAcrossSlicesEnabledFlag() && !CU::isSameSlice( *currCtu, *nextCtu ) )
+      {
+        rasterSliceAlfPad += 2;
+      }
+    }
+  }
+
+  return numHorVirBndry > 0 || numVerVirBndry > 0 || clipTop || clipBottom || clipLeft || clipRight || rasterSliceAlfPad;
+}
+
+const int AdaptiveLoopFilter::m_fixedFilterSetCoeff[ALF_FIXED_FILTER_NUM][MAX_NUM_ALF_LUMA_COEFF] =
+{
+  { 0,   0,   2,  -3,   1,  -4,   1,   7,  -1,   1,  -1,   5, 0 },
+  { 0,   0,   0,   0,   0,  -1,   0,   1,   0,   0,  -1,   2, 0 },
+  { 0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0, 0 },
+  { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  -1,   1, 0 },
+  { 2,   2,  -7,  -3,   0,  -5,  13,  22,  12,  -3,  -3,  17,  0 },
+  { -1,   0,   6,  -8,   1,  -5,   1,  23,   0,   2,  -5,  10,  0 },
+  { 0,   0,  -1,  -1,   0,  -1,   2,   1,   0,   0,  -1,   4, 0 },
+  { 0,   0,   3, -11,   1,   0,  -1,  35,   5,   2,  -9,   9,  0 },
+  { 0,   0,   8,  -8,  -2,  -7,   4,   4,   2,   1,  -1,  25,  0 },
+  { 0,   0,   1,  -1,   0,  -3,   1,   3,  -1,   1,  -1,   3, 0 },
+  { 0,   0,   3,  -3,   0,  -6,   5,  -1,   2,   1,  -4,  21,  0 },
+  { -7,   1,   5,   4,  -3,   5,  11,  13,  12,  -8,  11,  12,  0 },
+  { -5,  -3,   6,  -2,  -3,   8,  14,  15,   2,  -7,  11,  16,  0 },
+  { 2,  -1,  -6,  -5,  -2,  -2,  20,  14,  -4,   0,  -3,  25,  0 },
+  { 3,   1,  -8,  -4,   0,  -8,  22,   5,  -3,   2, -10,  29,  0 },
+  { 2,   1,  -7,  -1,   2, -11,  23,  -5,   0,   2, -10,  29,  0 },
+  { -6,  -3,   8,   9,  -4,   8,   9,   7,  14,  -2,   8,   9,  0 },
+  { 2,   1,  -4,  -7,   0,  -8,  17,  22,   1,  -1,  -4,  23,  0 },
+  { 3,   0,  -5,  -7,   0,  -7,  15,  18,  -5,   0,  -5,  27,  0 },
+  { 2,   0,   0,  -7,   1, -10,  13,  13,  -4,   2,  -7,  24,  0 },
+  { 3,   3, -13,   4,  -2,  -5,   9,  21,  25,  -2,  -3,  12,  0 },
+  { -5,  -2,   7,  -3,  -7,   9,   8,   9,  16,  -2,  15,  12,  0 },
+  { 0,  -1,   0,  -7,  -5,   4,  11,  11,   8,  -6,  12,  21,  0 },
+  { 3,  -2,  -3,  -8,  -4,  -1,  16,  15,  -2,  -3,   3,  26,  0 },
+  { 2,   1,  -5,  -4,  -1,  -8,  16,   4,  -2,   1,  -7,  33,  0 },
+  { 2,   1,  -4,  -2,   1, -10,  17,  -2,   0,   2, -11,  33,  0 },
+  { 1,  -2,   7, -15, -16,  10,   8,   8,  20,  11,  14,  11,  0 },
+  { 2,   2,   3, -13, -13,   4,   8,  12,   2,  -3,  16,  24,  0 },
+  { 1,   4,   0,  -7,  -8,  -4,   9,   9,  -2,  -2,   8,  29,  0 },
+  { 1,   1,   2,  -4,  -1,  -6,   6,   3,  -1,  -1,  -3,  30,  0 },
+  { -7,   3,   2,  10,  -2,   3,   7,  11,  19,  -7,   8,  10, 0 },
+  { 0,  -2,  -5,  -3,  -2,   4,  20,  15,  -1,  -3,  -1,  22,  0 },
+  { 3,  -1,  -8,  -4,  -1,  -4,  22,   8,  -4,   2,  -8,  28,  0 },
+  { 0,   3, -14,   3,   0,   1,  19,  17,   8,  -3,  -7,  20,  0 },
+  { 0,   2,  -1,  -8,   3,  -6,   5,  21,   1,   1,  -9,  13,  0 },
+  { -4,  -2,   8,  20,  -2,   2,   3,   5,  21,   4,   6,   1, 0 },
+  { 2,  -2,  -3,  -9,  -4,   2,  14,  16,   3,  -6,   8,  24,  0 },
+  { 2,   1,   5, -16,  -7,   2,   3,  11,  15,  -3,  11,  22,  0 },
+  { 1,   2,   3, -11,  -2,  -5,   4,   8,   9,  -3,  -2,  26,  0 },
+  { 0,  -1,  10,  -9,  -1,  -8,   2,   3,   4,   0,   0,  29,  0 },
+  { 1,   2,   0,  -5,   1,  -9,   9,   3,   0,   1,  -7,  20,  0 },
+  { -2,   8,  -6,  -4,   3,  -9,  -8,  45,  14,   2, -13,   7, 0 },
+  { 1,  -1,  16, -19,  -8,  -4,  -3,   2,  19,   0,   4,  30,  0 },
+  { 1,   1,  -3,   0,   2, -11,  15,  -5,   1,   2,  -9,  24,  0 },
+  { 0,   1,  -2,   0,   1,  -4,   4,   0,   0,   1,  -4,   7,  0 },
+  { 0,   1,   2,  -5,   1,  -6,   4,  10,  -2,   1,  -4,  10,  0 },
+  { 3,   0,  -3,  -6,  -2,  -6,  14,   8,  -1,  -1,  -3,  31,  0 },
+  { 0,   1,   0,  -2,   1,  -6,   5,   1,   0,   1,  -5,  13,  0 },
+  { 3,   1,   9, -19, -21,   9,   7,   6,  13,   5,  15,  21,  0 },
+  { 2,   4,   3, -12, -13,   1,   7,   8,   3,   0,  12,  26,  0 },
+  { 3,   1,  -8,  -2,   0,  -6,  18,   2,  -2,   3, -10,  23,  0 },
+  { 1,   1,  -4,  -1,   1,  -5,   8,   1,  -1,   2,  -5,  10,  0 },
+  { 0,   1,  -1,   0,   0,  -2,   2,   0,   0,   1,  -2,   3,  0 },
+  { 1,   1,  -2,  -7,   1,  -7,  14,  18,   0,   0,  -7,  21,  0 },
+  { 0,   1,   0,  -2,   0,  -7,   8,   1,  -2,   0,  -3,  24,  0 },
+  { 0,   1,   1,  -2,   2, -10,  10,   0,  -2,   1,  -7,  23,  0 },
+  { 0,   2,   2, -11,   2,  -4,  -3,  39,   7,   1, -10,   9,  0 },
+  { 1,   0,  13, -16,  -5,  -6,  -1,   8,   6,   0,   6,  29,  0 },
+  { 1,   3,   1,  -6,  -4,  -7,   9,   6,  -3,  -2,   3,  33,  0 },
+  { 4,   0, -17,  -1,  -1,   5,  26,   8,  -2,   3, -15,  30,  0 },
+  { 0,   1,  -2,   0,   2,  -8,  12,  -6,   1,   1,  -6,  16,  0 },
+  { 0,   0,   0,  -1,   1,  -4,   4,   0,   0,   0,  -3,  11,  0 },
+  { 0,   1,   2,  -8,   2,  -6,   5,  15,   0,   2,  -7,   9,  0 },
+  { 1,  -1,  12, -15,  -7,  -2,   3,   6,   6,  -1,   7,  30,  0 },
+};
+const int AdaptiveLoopFilter::m_classToFilterMapping[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES] =
+{
+  { 8,   2,   2,   2,   3,   4,  53,   9,   9,  52,   4,   4,   5,   9,   2,   8,  10,   9,   1,   3,  39,  39,  10,   9,  52 },
+  { 11,  12,  13,  14,  15,  30,  11,  17,  18,  19,  16,  20,  20,   4,  53,  21,  22,  23,  14,  25,  26,  26,  27,  28,  10 },
+  { 16,  12,  31,  32,  14,  16,  30,  33,  53,  34,  35,  16,  20,   4,   7,  16,  21,  36,  18,  19,  21,  26,  37,  38,  39 },
+  { 35,  11,  13,  14,  43,  35,  16,   4,  34,  62,  35,  35,  30,  56,   7,  35,  21,  38,  24,  40,  16,  21,  48,  57,  39 },
+  { 11,  31,  32,  43,  44,  16,   4,  17,  34,  45,  30,  20,  20,   7,   5,  21,  22,  46,  40,  47,  26,  48,  63,  58,  10 },
+  { 12,  13,  50,  51,  52,  11,  17,  53,  45,   9,  30,   4,  53,  19,   0,  22,  23,  25,  43,  44,  37,  27,  28,  10,  55 },
+  { 30,  33,  62,  51,  44,  20,  41,  56,  34,  45,  20,  41,  41,  56,   5,  30,  56,  38,  40,  47,  11,  37,  42,  57,   8 },
+  { 35,  11,  23,  32,  14,  35,  20,   4,  17,  18,  21,  20,  20,  20,   4,  16,  21,  36,  46,  25,  41,  26,  48,  49,  58 },
+  { 12,  31,  59,  59,   3,  33,  33,  59,  59,  52,   4,  33,  17,  59,  55,  22,  36,  59,  59,  60,  22,  36,  59,  25,  55 },
+  { 31,  25,  15,  60,  60,  22,  17,  19,  55,  55,  20,  20,  53,  19,  55,  22,  46,  25,  43,  60,  37,  28,  10,  55,  52 },
+  { 12,  31,  32,  50,  51,  11,  33,  53,  19,  45,  16,   4,   4,  53,   5,  22,  36,  18,  25,  43,  26,  27,  27,  28,  10 },
+  { 5,   2,  44,  52,   3,   4,  53,  45,   9,   3,   4,  56,   5,   0,   2,   5,  10,  47,  52,   3,  63,  39,  10,   9,  52 },
+  { 12,  34,  44,  44,   3,  56,  56,  62,  45,   9,  56,  56,   7,   5,   0,  22,  38,  40,  47,  52,  48,  57,  39,  10,   9 },
+  { 35,  11,  23,  14,  51,  35,  20,  41,  56,  62,  16,  20,  41,  56,   7,  16,  21,  38,  24,  40,  26,  26,  42,  57,  39 },
+  { 33,  34,  51,  51,  52,  41,  41,  34,  62,   0,  41,  41,  56,   7,   5,  56,  38,  38,  40,  44,  37,  42,  57,  39,  10 },
+  { 16,  31,  32,  15,  60,  30,   4,  17,  19,  25,  22,  20,   4,  53,  19,  21,  22,  46,  25,  55,  26,  48,  63,  58,  55 },
+};
+
+void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
+{
 
   // set clipping range
   m_clpRngs = cs.slice->getClpRngs();
@@ -81,9 +295,10 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
   for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
   {
     m_ctuEnableFlag[compIdx] = cs.picture->getAlfCtuEnableFlag( compIdx );
+    m_ctuAlternative[compIdx] = cs.picture->getAlfCtuAlternativeData( compIdx );
   }
-  reconstructCoeff( alfSliceParam, CHANNEL_TYPE_LUMA );
-  reconstructCoeff( alfSliceParam, CHANNEL_TYPE_CHROMA );
+  short* alfCtuFilterIndex = nullptr;
+  uint32_t lastSliceIdx = 0xFFFFFFFF;
 
   PelUnitBuf recYuv = cs.getRecoBuf();
   m_tempBuf.copyFrom( recYuv );
@@ -93,20 +308,147 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
   const PreCalcValues& pcv = *cs.pcv;
 
   int ctuIdx = 0;
+  bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false;
+  int numHorVirBndry = 0, numVerVirBndry = 0;
+  int horVirBndryPos[] = { 0, 0, 0 };
+  int verVirBndryPos[] = { 0, 0, 0 };
+
   for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
   {
     for( int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth )
     {
+      // get first CU in CTU
+      const CodingUnit *cu = cs.getCU( Position(xPos, yPos), CHANNEL_TYPE_LUMA );
+
+      // skip this CTU if ALF is disabled
+      if (!cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Y) && !cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) && !cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr))
+      {
+        ctuIdx++;
+        continue;
+      }
+
+      // reload ALF APS each time the slice changes during raster scan filtering
+      if(ctuIdx == 0 || lastSliceIdx != cu->slice->getSliceID() || alfCtuFilterIndex==nullptr)
+      {
+        cs.slice = cu->slice;
+        reconstructCoeffAPSs(cs, true, cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), false);
+        alfCtuFilterIndex = cu->slice->getPic()->getAlfCtbFilterIndex();
+      }
+      lastSliceIdx = cu->slice->getSliceID();
+
       const int width = ( xPos + pcv.maxCUWidth > pcv.lumaWidth ) ? ( pcv.lumaWidth - xPos ) : pcv.maxCUWidth;
       const int height = ( yPos + pcv.maxCUHeight > pcv.lumaHeight ) ? ( pcv.lumaHeight - yPos ) : pcv.maxCUHeight;
+      bool ctuEnableFlag = m_ctuEnableFlag[COMPONENT_Y][ctuIdx];
+      for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+      {
+        ctuEnableFlag |= m_ctuEnableFlag[compIdx][ctuIdx] > 0;
+      }
+      int rasterSliceAlfPad = 0;
+      if( ctuEnableFlag && isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) )
+      {
+        int yStart = yPos;
+        for( int i = 0; i <= numHorVirBndry; i++ )
+        {
+          const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i];
+          const int h = yEnd - yStart;
+          const bool clipT = ( i == 0 && clipTop ) || ( i > 0 ) || ( yStart == 0 );
+          const bool clipB = ( i == numHorVirBndry && clipBottom ) || ( i < numHorVirBndry ) || ( yEnd == pcv.lumaHeight );
+          int xStart = xPos;
+          for( int j = 0; j <= numVerVirBndry; j++ )
+          {
+            const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j];
+            const int w = xEnd - xStart;
+            const bool clipL = ( j == 0 && clipLeft ) || ( j > 0 ) || ( xStart == 0 );
+            const bool clipR = ( j == numVerVirBndry && clipRight ) || ( j < numVerVirBndry ) || ( xEnd == pcv.lumaWidth );
+            const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE);
+            const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE);
+            PelUnitBuf buf = m_tempBuf2.subBuf( UnitArea( cs.area.chromaFormat, Area( 0, 0, wBuf, hBuf ) ) );
+            buf.copyFrom( tmpYuv.subBuf( UnitArea( cs.area.chromaFormat, Area( xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf ) ) ) );
+            // pad top-left unavailable samples for raster slice
+            if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) )
+            {
+              buf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 );
+            }
+
+            // pad bottom-right unavailable samples for raster slice
+            if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) )
+            {
+              buf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 );
+            }
+            buf.extendBorderPel( MAX_ALF_PADDING_SIZE );
+            buf = buf.subBuf( UnitArea ( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
+
+            if( m_ctuEnableFlag[COMPONENT_Y][ctuIdx] )
+            {
+              const Area blkSrc( 0, 0, w, h );
+              const Area blkDst( xStart, yStart, w, h );
+              deriveClassification( m_classifier, buf.get(COMPONENT_Y), blkDst, blkSrc );
+              short filterSetIndex = alfCtuFilterIndex[ctuIdx];
+              short *coeff;
+              short *clip;
+              if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
+              {
+                coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+                clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+              }
+              else
+              {
+                coeff = m_fixedFilterSetCoeffDec[filterSetIndex];
+                clip = m_clipDefault;
+              }
+              m_filter7x7Blk(m_classifier, recYuv, buf, blkDst, blkSrc, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs
+                , m_alfVBLumaCTUHeight
+                , m_alfVBLumaPos
+              );
+            }
+
+            for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+            {
+              ComponentID compID = ComponentID( compIdx );
+              const int chromaScaleX = getComponentScaleX( compID, tmpYuv.chromaFormat );
+              const int chromaScaleY = getComponentScaleY( compID, tmpYuv.chromaFormat );
+
+              if( m_ctuEnableFlag[compIdx][ctuIdx] )
+              {
+                const Area blkSrc( 0, 0, w >> chromaScaleX, h >> chromaScaleY );
+                const Area blkDst( xStart >> chromaScaleX, yStart >> chromaScaleY, w >> chromaScaleX, h >> chromaScaleY );
+                uint8_t alt_num = m_ctuAlternative[compIdx][ctuIdx];
+                m_filter5x5Blk(m_classifier, recYuv, buf, blkDst, blkSrc, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs
+                  , m_alfVBChmaCTUHeight
+                   , m_alfVBChmaPos );
+              }
+            }
+
+            xStart = xEnd;
+          }
+
+          yStart = yEnd;
+        }
+      }
+      else
+      {
       const UnitArea area( cs.area.chromaFormat, Area( xPos, yPos, width, height ) );
       if( m_ctuEnableFlag[COMPONENT_Y][ctuIdx] )
       {
         Area blk( xPos, yPos, width, height );
-        deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk );
-        Area blkPCM(xPos, yPos, width, height);
-        resetPCMBlkClassInfo(cs, m_classifier, tmpYuv.get(COMPONENT_Y), blkPCM);
-        m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs );
+        deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk, blk );
+        short filterSetIndex = alfCtuFilterIndex[ctuIdx];
+        short *coeff;
+        short *clip;
+        if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
+        {
+          coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+          clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+        }
+        else
+        {
+          coeff = m_fixedFilterSetCoeffDec[filterSetIndex];
+          clip = m_clipDefault;
+        }
+        m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, blk, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs
+          , m_alfVBLumaCTUHeight
+          , m_alfVBLumaPos
+        );
       }
 
       for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
@@ -118,71 +460,116 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
         if( m_ctuEnableFlag[compIdx][ctuIdx] )
         {
           Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY );
-
-          m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx], cs );
+          uint8_t alt_num = m_ctuAlternative[compIdx][ctuIdx];
+          m_filter5x5Blk(m_classifier, recYuv, tmpYuv, blk, blk, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs
+            , m_alfVBChmaCTUHeight
+            , m_alfVBChmaPos);
         }
       }
+      }
       ctuIdx++;
     }
   }
 }
 
-void AdaptiveLoopFilter::reconstructCoeff( AlfSliceParam& alfSliceParam, ChannelType channel, const bool bRedo )
+void AdaptiveLoopFilter::reconstructCoeffAPSs(CodingStructure& cs, bool luma, bool chroma, bool isRdo)
 {
-  int factor = ( 1 << ( m_NUM_BITS - 1 ) );
-  AlfFilterType filterType = isLuma( channel ) ? ALF_FILTER_7 : ALF_FILTER_5;
-  int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1;
-  int numCoeff = filterType == ALF_FILTER_5 ? 7 : 13;
-  int numCoeffMinus1 = numCoeff - 1;
-  int numFilters = isLuma( channel ) ? alfSliceParam.numLumaFilters : 1;
-  short* coeff = isLuma( channel ) ? alfSliceParam.lumaCoeff : alfSliceParam.chromaCoeff;
-
-  if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag && isLuma( channel ) )
+  //luma
+  APS** aps = cs.slice->getAlfAPSs();
+  AlfParam alfParamTmp;
+  APS* curAPS;
+  if (luma)
   {
-    for( int i = 1; i < numFilters; i++ )
+    for (int i = 0; i < cs.slice->getTileGroupNumAps(); i++)
     {
-      for( int j = 0; j < numCoeffMinus1; j++ )
-      {
-        coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += coeff[( i - 1 ) * MAX_NUM_ALF_LUMA_COEFF + j];
-      }
+      int apsIdx = cs.slice->getTileGroupApsIdLuma()[i];
+      curAPS = aps[apsIdx];
+      CHECK(curAPS == NULL, "invalid APS");
+      alfParamTmp = curAPS->getAlfAPSParam();
+      reconstructCoeff(alfParamTmp, CHANNEL_TYPE_LUMA, isRdo, true);
+      memcpy(m_coeffApsLuma[i], m_coeffFinal, sizeof(m_coeffFinal));
+      memcpy(m_clippApsLuma[i], m_clippFinal, sizeof(m_clippFinal));
     }
   }
 
-  for( int filterIdx = 0; filterIdx < numFilters; filterIdx++ )
+  //chroma
+  if (chroma)
   {
-    int sum = 0;
-    for( int i = 0; i < numCoeffMinus1; i++ )
-    {
-      sum += ( coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + i] << 1 );
-    }
-    coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor - sum;
+    int apsIdxChroma = cs.slice->getTileGroupApsIdChroma();
+    curAPS = aps[apsIdxChroma];
+    m_alfParamChroma = &curAPS->getAlfAPSParam();
+    alfParamTmp = *m_alfParamChroma;
+    reconstructCoeff(alfParamTmp, CHANNEL_TYPE_CHROMA, isRdo, true);
   }
+}
 
-  if( isChroma( channel ) )
-  {
-    return;
-  }
+void AdaptiveLoopFilter::reconstructCoeff( AlfParam& alfParam, ChannelType channel, const bool isRdo, const bool isRedo )
+{
+  int factor = isRdo ? 0 : (1 << (m_NUM_BITS - 1));
+  AlfFilterType filterType = isLuma( channel ) ? ALF_FILTER_7 : ALF_FILTER_5;
+  int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1;
+  int numCoeff = filterType == ALF_FILTER_5 ? 7 : 13;
+  int numCoeffMinus1 = numCoeff - 1;
+  const int numAlts = isLuma( channel ) ? 1 : alfParam.numAlternativesChroma;
 
-  for( int classIdx = 0; classIdx < numClasses; classIdx++ )
+  for( int altIdx = 0; altIdx < numAlts; ++ altIdx )
   {
-    int filterIdx = alfSliceParam.filterCoeffDeltaIdx[classIdx];
-    memcpy( m_coeffFinal + classIdx * MAX_NUM_ALF_LUMA_COEFF, coeff + filterIdx * MAX_NUM_ALF_LUMA_COEFF, sizeof( short ) * numCoeff );
-  }
+    int numFilters = isLuma( channel ) ? alfParam.numLumaFilters : 1;
+    short* coeff = isLuma( channel ) ? alfParam.lumaCoeff : alfParam.chromaCoeff[altIdx];
+    short* clipp = isLuma( channel ) ? alfParam.lumaClipp : alfParam.chromaClipp[altIdx];
 
-  if( bRedo && alfSliceParam.alfLumaCoeffDeltaPredictionFlag )
-  {
-    for( int i = numFilters - 1; i > 0; i-- )
+    for( int filterIdx = 0; filterIdx < numFilters; filterIdx++ )
+    {
+      coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor;
+    }
+
+    if( isChroma( channel ) )
     {
-      for( int j = 0; j < numCoeffMinus1; j++ )
+      for( int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx )
       {
-        coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] = coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] - coeff[( i - 1 ) * MAX_NUM_ALF_LUMA_COEFF + j];
+        m_chromaCoeffFinal[altIdx][coeffIdx] = coeff[coeffIdx];
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+        int clipIdx = alfParam.nonLinearFlag[channel] ? clipp[coeffIdx] : 0;
+#else
+        int clipIdx = alfParam.nonLinearFlag[channel][altIdx] ? clipp[coeffIdx] : 0;
+#endif
+        m_chromaClippFinal[altIdx][coeffIdx] = isRdo ? clipIdx : m_alfClippingValues[channel][clipIdx];
+      }
+      m_chromaCoeffFinal[altIdx][numCoeffMinus1] = factor;
+      m_chromaClippFinal[altIdx][numCoeffMinus1] = isRdo ? 0 : m_alfClippingValues[channel][0];
+      continue;
+    }
+    for( int classIdx = 0; classIdx < numClasses; classIdx++ )
+    {
+      int filterIdx = alfParam.filterCoeffDeltaIdx[classIdx];
+
+      CHECK(!(filterIdx >= 0 && filterIdx < alfParam.numLumaFilters), "Bad coeff delta idx in ALF");
+      for (int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx)
+      {
+        m_coeffFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] = coeff[filterIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx];
+      }
+      m_coeffFinal[classIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor;
+      m_clippFinal[classIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = isRdo ? 0 : m_alfClippingValues[channel][0];
+      for( int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx )
+      {
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+        int clipIdx = alfParam.nonLinearFlag[channel] ? clipp[filterIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] : 0;
+#else
+        int clipIdx = alfParam.nonLinearFlag[channel][altIdx] ? clipp[filterIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] : 0;
+#endif
+        CHECK(!(clipIdx >= 0 && clipIdx < MaxAlfNumClippingValues), "Bad clip idx in ALF");
+        m_clippFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + coeffIdx] = isRdo ? clipIdx : m_alfClippingValues[channel][clipIdx];
       }
+      m_clippFinal[classIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] =
+        isRdo ? 0 :
+        m_alfClippingValues[channel][0];
     }
   }
 }
 
 void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const ChromaFormat format, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE] )
 {
+  destroy();
   std::memcpy( m_inputBitDepth, inputBitDepth, sizeof( m_inputBitDepth ) );
   m_picWidth = picWidth;
   m_picHeight = picHeight;
@@ -196,68 +583,87 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const
   m_numCTUsInPic = m_numCTUsInHeight * m_numCTUsInWidth;
   m_filterShapes[CHANNEL_TYPE_LUMA].push_back( AlfFilterShape( 7 ) );
   m_filterShapes[CHANNEL_TYPE_CHROMA].push_back( AlfFilterShape( 5 ) );
+  m_alfVBLumaPos = m_maxCUHeight - ALF_VB_POS_ABOVE_CTUROW_LUMA;
+  m_alfVBChmaPos = (m_maxCUHeight >> ((m_chromaFormat == CHROMA_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA;
 
-  m_tempBuf.destroy();
-  m_tempBuf.create( format, Area( 0, 0, picWidth, picHeight ), maxCUWidth, MAX_ALF_FILTER_LENGTH >> 1, 0, false );
+  m_alfVBLumaCTUHeight = m_maxCUHeight;
+  m_alfVBChmaCTUHeight = (m_maxCUHeight >> ((m_chromaFormat == CHROMA_420) ? 1 : 0));
 
-  // Laplacian based activity
-  for( int i = 0; i < NUM_DIRECTIONS; i++ )
+  static_assert( AlfNumClippingValues[CHANNEL_TYPE_LUMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_LUMA] must be at least one" );
+  for( int i = 0; i < AlfNumClippingValues[CHANNEL_TYPE_LUMA]; ++i )
   {
-    if ( m_laplacian[i] == nullptr )
-    {
-      m_laplacian[i] = new int*[m_CLASSIFICATION_BLK_SIZE + 5];
+    m_alfClippingValues[CHANNEL_TYPE_LUMA][i] = (Pel)std::round( std::pow(2., double(m_inputBitDepth[CHANNEL_TYPE_LUMA] - 2.35*i)) );
+  }
+  static_assert( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_CHROMA] must be at least one" );
+  m_alfClippingValues[CHANNEL_TYPE_CHROMA][0] = 1 << m_inputBitDepth[CHANNEL_TYPE_CHROMA];
+  for( int i = 1; i < AlfNumClippingValues[CHANNEL_TYPE_CHROMA]; ++i )
+  {
+    m_alfClippingValues[CHANNEL_TYPE_CHROMA][i] = (Pel)std::round( std::pow(2., double(m_inputBitDepth[CHANNEL_TYPE_CHROMA] - 2.35*i)) );
+  }
 
-      for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ )
-      {
-        m_laplacian[i][y] = new int[m_CLASSIFICATION_BLK_SIZE + 5];
-      }
-    }
+  if (m_created)
+  {
+    return;
   }
 
+  m_tempBuf.destroy();
+  m_tempBuf.create( format, Area( 0, 0, picWidth, picHeight ), maxCUWidth, MAX_ALF_FILTER_LENGTH >> 1, 0, false );
+  m_tempBuf2.destroy();
+  m_tempBuf2.create( format, Area( 0, 0, maxCUWidth + (MAX_ALF_PADDING_SIZE << 1), maxCUHeight + (MAX_ALF_PADDING_SIZE << 1) ), maxCUWidth, MAX_ALF_PADDING_SIZE, 0, false );
+
   // Classification
   if ( m_classifier == nullptr )
   {
     m_classifier = new AlfClassifier*[picHeight];
-    for( int i = 0; i < picHeight; i++ )
+    m_classifier[0] = new AlfClassifier[picWidth * picHeight];
+
+    for (int i = 1; i < picHeight; i++)
     {
-      m_classifier[i] = new AlfClassifier[picWidth];
+      m_classifier[i] = m_classifier[0] + i * picWidth;
     }
   }
-}
 
-void AdaptiveLoopFilter::destroy()
-{
-  for( int i = 0; i < NUM_DIRECTIONS; i++ )
+  for (int filterSetIndex = 0; filterSetIndex < NUM_FIXED_FILTER_SETS; filterSetIndex++)
   {
-    if( m_laplacian[i] )
+    for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++)
     {
-      for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ )
+      int fixedFilterIdx = m_classToFilterMapping[filterSetIndex][classIdx];
+      for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF - 1; i++)
       {
-        delete[] m_laplacian[i][y];
-        m_laplacian[i][y] = nullptr;
+       m_fixedFilterSetCoeffDec[filterSetIndex][classIdx * MAX_NUM_ALF_LUMA_COEFF + i] = m_fixedFilterSetCoeff[fixedFilterIdx][i];
       }
-
-      delete[] m_laplacian[i];
-      m_laplacian[i] = nullptr;
+      m_fixedFilterSetCoeffDec[filterSetIndex][classIdx * MAX_NUM_ALF_LUMA_COEFF + MAX_NUM_ALF_LUMA_COEFF - 1] = (1 << (m_NUM_BITS - 1));
     }
   }
+  for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES; i++)
+  {
+    m_clipDefault[i] = m_alfClippingValues[CHANNEL_TYPE_LUMA][0];
+  }
+  m_created = true;
+}
 
-  if( m_classifier )
+void AdaptiveLoopFilter::destroy()
+{
+  if (!m_created)
   {
-    for( int i = 0; i < m_picHeight; i++ )
-    {
-      delete[] m_classifier[i];
-      m_classifier[i] = nullptr;
-    }
+    return;
+  }
 
+  if( m_classifier )
+  {
+    delete[] m_classifier[0];
     delete[] m_classifier;
     m_classifier = nullptr;
   }
 
   m_tempBuf.destroy();
+  m_tempBuf2.destroy();
+  m_filterShapes[CHANNEL_TYPE_LUMA].clear();
+  m_filterShapes[CHANNEL_TYPE_CHROMA].clear();
+  m_created = false;
 }
 
-void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk )
+void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blkDst, const Area& blk )
 {
   int height = blk.pos().y + blk.height;
   int width = blk.pos().x + blk.width;
@@ -269,63 +675,20 @@ void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const
     for( int j = blk.pos().x; j < width; j += m_CLASSIFICATION_BLK_SIZE )
     {
       int nWidth = std::min( j + m_CLASSIFICATION_BLK_SIZE, width ) - j;
-
-      m_deriveClassificationBlk( classifier, m_laplacian, srcLuma, Area( j, i, nWidth, nHeight ), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4 );
+      m_deriveClassificationBlk(classifier, m_laplacian, srcLuma, Area( j - blk.pos().x + blkDst.pos().x, i - blk.pos().y + blkDst.pos().y, nWidth, nHeight ), Area(j, i, nWidth, nHeight), m_inputBitDepth[CHANNEL_TYPE_LUMA] + 4
+        , m_alfVBLumaCTUHeight
+        , m_alfVBLumaPos
+      );
     }
   }
 }
-void AdaptiveLoopFilter::resetPCMBlkClassInfo(CodingStructure & cs,  AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk)
-{
-  if ( !cs.sps->getPCMFilterDisableFlag() )
-  {
-    return;
-  }
-
-  int height = blk.pos().y + blk.height;
-  int width = blk.pos().x + blk.width;
-  const int clsSizeY = 4;
-  const int clsSizeX = 4;
-  int classIdx = m_ALF_UNUSED_CLASSIDX;
-  int transposeIdx = m_ALF_UNUSED_TRANSPOSIDX;
 
-  for( int i = blk.pos().y; i < height; i += m_CLASSIFICATION_BLK_SIZE )
-  {
-    int nHeight = std::min(i + m_CLASSIFICATION_BLK_SIZE, height) - i;
-
-    for( int j = blk.pos().x; j < width; j += m_CLASSIFICATION_BLK_SIZE )
-    {
-      int nWidth = std::min(j + m_CLASSIFICATION_BLK_SIZE, width) - j;
-      int posX = j;
-      int posY = i;
-
-      for( int subi = 0; subi < nHeight; subi += clsSizeY )
-      {
-        for( int subj = 0; subj < nWidth; subj += clsSizeX )
-        {
-          int yOffset = subi + posY;
-          int xOffset = subj + posX;
-          Position pos(xOffset, yOffset);
-
-          const CodingUnit* cu = cs.getCU(pos, CH_L);
-          if ( cu->ipcm )
-          {
-            AlfClassifier *cl0 = classifier[yOffset] + xOffset;
-            AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset;
-            AlfClassifier *cl2 = classifier[yOffset + 2] + xOffset;
-            AlfClassifier *cl3 = classifier[yOffset + 3] + xOffset;
-            cl0[0] = cl0[1] = cl0[2] = cl0[3] =
-            cl1[0] = cl1[1] = cl1[2] = cl1[3] =
-            cl2[0] = cl2[1] = cl2[2] = cl2[3] =
-            cl3[0] = cl3[1] = cl3[2] = cl3[3] = AlfClassifier(classIdx, transposeIdx);
-          }
-        }
-      }
-    }
-  }
-}
-
-void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift )
+void AdaptiveLoopFilter::deriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS],
+                                                 const CPelBuf &srcLuma, const Area &blkDst, const Area &blk,
+                                                 const int shift, const int vbCTUHeight, int vbPos)
 {
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+
   static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
   const int stride = srcLuma.stride;
   const Pel* src = srcLuma.buf;
@@ -352,6 +715,15 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
     const Pel *src2 = &src[yoffset + stride];
     const Pel *src3 = &src[yoffset + stride * 2];
 
+    const int y = blkDst.pos().y - 2 + i;
+    if (y > 0 && (y & (vbCTUHeight - 1)) == vbPos - 2)
+    {
+      src3 = &src[yoffset + stride];
+    }
+    else if (y > 0 && (y & (vbCTUHeight - 1)) == vbPos)
+    {
+      src0 = &src[yoffset];
+    }
     int* pYver = laplacian[VER][i];
     int* pYhor = laplacian[HOR][i];
     int* pYdig0 = laplacian[DIAG0][i];
@@ -378,7 +750,6 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
         int jM6 = j - 6;
         int jM4 = j - 4;
         int jM2 = j - 2;
-
         pYver[jM6] += pYver[jM4] + pYver[jM2] + pYver[j];
         pYhor[jM6] += pYhor[jM4] + pYhor[jM2] + pYhor[j];
         pYdig0[jM6] += pYdig0[jM4] + pYdig0[jM2] + pYdig0[j];
@@ -415,13 +786,41 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
 
     for( int j = 0; j < blk.width; j += clsSizeX )
     {
-      int sumV = pYver[j] + pYver2[j] + pYver4[j] + pYver6[j];
-      int sumH = pYhor[j] + pYhor2[j] + pYhor4[j] + pYhor6[j];
-      int sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j] + pYdig06[j];
-      int sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j];
+      int sumV = 0; int sumH = 0; int sumD0 = 0; int sumD1 = 0;
+      if (((i + blkDst.pos().y) % vbCTUHeight) == (vbPos - 4))
+      {
+        sumV = pYver[j] + pYver2[j] + pYver4[j];
+        sumH = pYhor[j] + pYhor2[j] + pYhor4[j];
+        sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j];
+        sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j];
+      }
+      else if (((i + blkDst.pos().y) % vbCTUHeight) == vbPos)
+      {
+        sumV = pYver2[j] + pYver4[j] + pYver6[j];
+        sumH = pYhor2[j] + pYhor4[j] + pYhor6[j];
+        sumD0 = pYdig02[j] + pYdig04[j] + pYdig06[j];
+        sumD1 = pYdig12[j] + pYdig14[j] + pYdig16[j];
+      }
+      else
+      {
+        sumV = pYver[j] + pYver2[j] + pYver4[j] + pYver6[j];
+        sumH = pYhor[j] + pYhor2[j] + pYhor4[j] + pYhor6[j];
+        sumD0 = pYdig0[j] + pYdig02[j] + pYdig04[j] + pYdig06[j];
+        sumD1 = pYdig1[j] + pYdig12[j] + pYdig14[j] + pYdig16[j];
+      }
 
       int tempAct = sumV + sumH;
-      int activity = (Pel)Clip3<int>( 0, maxActivity, ( tempAct * 64 ) >> shift );
+      int activity = 0;
+
+      const int y = (i + blkDst.pos().y) & (vbCTUHeight - 1);
+      if (y == vbPos - 4 || y == vbPos)
+      {
+        activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 96) >> shift);
+      }
+      else
+      {
+        activity = (Pel)Clip3<int>(0, maxActivity, (tempAct * 64) >> shift);
+      }
       int classIdx = th[activity];
 
       int hv1, hv0, d1, d0, hvd1, hvd0;
@@ -450,7 +849,7 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
         d0 = sumD0;
         dirTempD = 2;
       }
-      if( d1*hv0 > hv1*d0 )
+      if( (uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0 )
       {
         hvd1 = d1;
         hvd0 = d0;
@@ -483,8 +882,8 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
       static const int transposeTable[8] = { 0, 1, 0, 2, 2, 3, 1, 3 };
       int transposeIdx = transposeTable[mainDirection * 2 + ( secondaryDirection >> 1 )];
 
-      int yOffset = i + posY;
-      int xOffset = j + posX;
+      int yOffset = i + blkDst.pos().y;
+      int xOffset = j + blkDst.pos().x;
 
       AlfClassifier *cl0 = classifier[yOffset] + xOffset;
       AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset;
@@ -496,17 +895,18 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
 }
 
 template<AlfFilterType filtType>
-void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
+void AdaptiveLoopFilter::filterBlk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+                                   const Area &blkDst, const Area &blk, const ComponentID compId,
+                                   const short *filterSet, const short *fClipSet, const ClpRng &clpRng,
+                                   CodingStructure &cs, const int vbCTUHeight, int vbPos)
 {
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+
   const bool bChroma = isChroma( compId );
   if( bChroma )
   {
     CHECK( filtType != 0, "Chroma needs to have filtType == 0" );
   }
-  const SPS*     sps = cs.slice->getSPS();
-  bool isDualTree =CS::isDualITree(cs);
-  bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag();
-  ChromaFormat nChromaFormat = sps->getChromaFormatIdc();
 
   const CPelBuf srcLuma = recSrc.get( compId );
   PelBuf dstLuma = recDst.get( compId );
@@ -520,12 +920,13 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
   const int endWidth = blk.x + blk.width;
 
   const Pel* src = srcLuma.buf;
-  Pel* dst = dstLuma.buf + startHeight * dstStride;
+  Pel* dst = dstLuma.buf + blkDst.y * dstStride;
 
   const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5, *pImgYPad6;
   const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
 
-  short *coef = filterSet;
+  const short *coef = filterSet;
+  const short *clip = fClipSet;
 
   const int shift = m_NUM_BITS - 1;
 
@@ -535,7 +936,6 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
   const int clsSizeY = 4;
   const int clsSizeX = 4;
 
-  bool pcmFlags2x2[4] = {0,0,0,0};
 
   CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" );
   CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" );
@@ -547,7 +947,8 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
   int dstStride2 = dstStride * clsSizeY;
   int srcStride2 = srcStride * clsSizeY;
 
-  std::vector<Pel> filterCoeff( MAX_NUM_ALF_LUMA_COEFF );
+  std::array<int, MAX_NUM_ALF_LUMA_COEFF> filterCoeff;
+  std::array<int, MAX_NUM_ALF_LUMA_COEFF> filterClipp;
 
   pImgYPad0 = src + startHeight * srcStride + startWidth;
   pImgYPad1 = pImgYPad0 + srcStride;
@@ -557,14 +958,14 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
   pImgYPad5 = pImgYPad3 + srcStride;
   pImgYPad6 = pImgYPad4 - srcStride;
 
-  Pel* pRec0 = dst + startWidth;
+  Pel* pRec0 = dst + blkDst.x;
   Pel* pRec1 = pRec0 + dstStride;
 
   for( int i = 0; i < endHeight - startHeight; i += clsSizeY )
   {
     if( !bChroma )
     {
-      pClass = classifier[startHeight + i] + startWidth;
+      pClass = classifier[blkDst.y + i] + blkDst.x;
     }
 
     for( int j = 0; j < endWidth - startWidth; j += clsSizeX )
@@ -573,54 +974,31 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
       {
         AlfClassifier& cl = pClass[j];
         transposeIdx = cl.transposeIdx;
-        if( isPCMFilterDisabled && cl.classIdx== m_ALF_UNUSED_CLASSIDX && transposeIdx== m_ALF_UNUSED_TRANSPOSIDX )
-        {
-          continue;
-        }
         coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
-      }
-      else if( isPCMFilterDisabled )
-      {
-        int  blkX, blkY;
-        bool *flags = pcmFlags2x2;
-
-        // check which chroma 2x2 blocks use PCM
-        // chroma PCM may not be aligned with 4x4 ALF processing grid
-        for( blkY=0; blkY<4; blkY+=2 )
-        {
-          for( blkX=0; blkX<4; blkX+=2 )
-          {
-            Position pos(j+startWidth+blkX, i+startHeight+blkY);
-            CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L);
-            *flags++ = cu->ipcm ? 1 : 0;
-          }
-        }
-
-        // skip entire 4x4 if all chroma 2x2 blocks use PCM
-        if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] )
-        {
-          continue;
-        }
+        clip = fClipSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
       }
 
-
       if( filtType == ALF_FILTER_7 )
       {
         if( transposeIdx == 1 )
         {
           filterCoeff = { coef[9], coef[4], coef[10], coef[8], coef[1], coef[5], coef[11], coef[7], coef[3], coef[0], coef[2], coef[6], coef[12] };
+          filterClipp = { clip[9], clip[4], clip[10], clip[8], clip[1], clip[5], clip[11], clip[7], clip[3], clip[0], clip[2], clip[6], clip[12] };
         }
         else if( transposeIdx == 2 )
         {
           filterCoeff = { coef[0], coef[3], coef[2], coef[1], coef[8], coef[7], coef[6], coef[5], coef[4], coef[9], coef[10], coef[11], coef[12] };
+          filterClipp = { clip[0], clip[3], clip[2], clip[1], clip[8], clip[7], clip[6], clip[5], clip[4], clip[9], clip[10], clip[11], clip[12] };
         }
         else if( transposeIdx == 3 )
         {
           filterCoeff = { coef[9], coef[8], coef[10], coef[4], coef[3], coef[7], coef[11], coef[5], coef[1], coef[0], coef[2], coef[6], coef[12] };
+          filterClipp = { clip[9], clip[8], clip[10], clip[4], clip[3], clip[7], clip[11], clip[5], clip[1], clip[0], clip[2], clip[6], clip[12] };
         }
         else
         {
           filterCoeff = { coef[0], coef[1], coef[2], coef[3], coef[4], coef[5], coef[6], coef[7], coef[8], coef[9], coef[10], coef[11], coef[12] };
+          filterClipp = { clip[0], clip[1], clip[2], clip[3], clip[4], clip[5], clip[6], clip[7], clip[8], clip[9], clip[10], clip[11], clip[12] };
         }
       }
       else
@@ -628,18 +1006,22 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
         if( transposeIdx == 1 )
         {
           filterCoeff = { coef[4], coef[1], coef[5], coef[3], coef[0], coef[2], coef[6] };
+          filterClipp = { clip[4], clip[1], clip[5], clip[3], clip[0], clip[2], clip[6] };
         }
         else if( transposeIdx == 2 )
         {
           filterCoeff = { coef[0], coef[3], coef[2], coef[1], coef[4], coef[5], coef[6] };
+          filterClipp = { clip[0], clip[3], clip[2], clip[1], clip[4], clip[5], clip[6] };
         }
         else if( transposeIdx == 3 )
         {
           filterCoeff = { coef[4], coef[3], coef[5], coef[1], coef[0], coef[2], coef[6] };
+          filterClipp = { clip[4], clip[3], clip[5], clip[1], clip[0], clip[2], clip[6] };
         }
         else
         {
           filterCoeff = { coef[0], coef[1], coef[2], coef[3], coef[4], coef[5], coef[6] };
+          filterClipp = { clip[0], clip[1], clip[2], clip[3], clip[4], clip[5], clip[6] };
         }
       }
 
@@ -655,59 +1037,79 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
 
         pRec1 = pRec0 + j + ii * dstStride;
 
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - (bChroma ? 2 : 4)))   // above
+        {
+          pImg1 = (yVb == vbPos - 1) ? pImg0 : pImg1;
+          pImg3 = (yVb >= vbPos - 2) ? pImg1 : pImg3;
+          pImg5 = (yVb >= vbPos - 3) ? pImg3 : pImg5;
+
+          pImg2 = (yVb == vbPos - 1) ? pImg0 : pImg2;
+          pImg4 = (yVb >= vbPos - 2) ? pImg2 : pImg4;
+          pImg6 = (yVb >= vbPos - 3) ? pImg4 : pImg6;
+        }
+        else if (yVb >= vbPos && (yVb <= vbPos + (bChroma ? 1 : 3)))   // bottom
+        {
+          pImg2 = (yVb == vbPos) ? pImg0 : pImg2;
+          pImg4 = (yVb <= vbPos + 1) ? pImg2 : pImg4;
+          pImg6 = (yVb <= vbPos + 2) ? pImg4 : pImg6;
+
+          pImg1 = (yVb == vbPos) ? pImg0 : pImg1;
+          pImg3 = (yVb <= vbPos + 1) ? pImg1 : pImg3;
+          pImg5 = (yVb <= vbPos + 2) ? pImg3 : pImg5;
+        }
+#if JVET_Q0150
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+#endif
         for( int jj = 0; jj < clsSizeX; jj++ )
         {
 
-          // skip 2x2 PCM chroma blocks
-          if( bChroma && isPCMFilterDisabled )
-          {
-            if( pcmFlags2x2[2*(ii>>1) + (jj>>1)] )
-            {
-              pImg0++;
-              pImg1++;
-              pImg2++;
-              pImg3++;
-              pImg4++;
-              pImg5++;
-              pImg6++;
-              continue;
-            }
-          }
 
           int sum = 0;
+          const Pel curr = pImg0[+0];
           if( filtType == ALF_FILTER_7 )
           {
-            sum += filterCoeff[0] * ( pImg5[0] + pImg6[0] );
-
-            sum += filterCoeff[1] * ( pImg3[+1] + pImg4[-1] );
-            sum += filterCoeff[2] * ( pImg3[+0] + pImg4[+0] );
-            sum += filterCoeff[3] * ( pImg3[-1] + pImg4[+1] );
-
-            sum += filterCoeff[4] * ( pImg1[+2] + pImg2[-2] );
-            sum += filterCoeff[5] * ( pImg1[+1] + pImg2[-1] );
-            sum += filterCoeff[6] * ( pImg1[+0] + pImg2[+0] );
-            sum += filterCoeff[7] * ( pImg1[-1] + pImg2[+1] );
-            sum += filterCoeff[8] * ( pImg1[-2] + pImg2[+2] );
-
-            sum += filterCoeff[9] * ( pImg0[+3] + pImg0[-3] );
-            sum += filterCoeff[10] * ( pImg0[+2] + pImg0[-2] );
-            sum += filterCoeff[11] * ( pImg0[+1] + pImg0[-1] );
-            sum += filterCoeff[12] * ( pImg0[+0] );
+            sum += filterCoeff[0] * ( clipALF(filterClipp[0], curr, pImg5[+0], pImg6[+0]) );
+
+            sum += filterCoeff[1] * ( clipALF(filterClipp[1], curr, pImg3[+1], pImg4[-1]) );
+            sum += filterCoeff[2] * ( clipALF(filterClipp[2], curr, pImg3[+0], pImg4[+0]) );
+            sum += filterCoeff[3] * ( clipALF(filterClipp[3], curr, pImg3[-1], pImg4[+1]) );
+
+            sum += filterCoeff[4] * ( clipALF(filterClipp[4], curr, pImg1[+2], pImg2[-2]) );
+            sum += filterCoeff[5] * ( clipALF(filterClipp[5], curr, pImg1[+1], pImg2[-1]) );
+            sum += filterCoeff[6] * ( clipALF(filterClipp[6], curr, pImg1[+0], pImg2[+0]) );
+            sum += filterCoeff[7] * ( clipALF(filterClipp[7], curr, pImg1[-1], pImg2[+1]) );
+            sum += filterCoeff[8] * ( clipALF(filterClipp[8], curr, pImg1[-2], pImg2[+2]) );
+
+            sum += filterCoeff[9] * ( clipALF(filterClipp[9], curr, pImg0[+3], pImg0[-3]) );
+            sum += filterCoeff[10] * ( clipALF(filterClipp[10], curr, pImg0[+2], pImg0[-2]) );
+            sum += filterCoeff[11] * ( clipALF(filterClipp[11], curr, pImg0[+1], pImg0[-1]) );
           }
           else
           {
-            sum += filterCoeff[0] * ( pImg3[+0] + pImg4[+0] );
+            sum += filterCoeff[0] * ( clipALF(filterClipp[0], curr, pImg3[+0], pImg4[+0]) );
 
-            sum += filterCoeff[1] * ( pImg1[+1] + pImg2[-1] );
-            sum += filterCoeff[2] * ( pImg1[+0] + pImg2[+0] );
-            sum += filterCoeff[3] * ( pImg1[-1] + pImg2[+1] );
+            sum += filterCoeff[1] * ( clipALF(filterClipp[1], curr, pImg1[+1], pImg2[-1]) );
+            sum += filterCoeff[2] * ( clipALF(filterClipp[2], curr, pImg1[+0], pImg2[+0]) );
+            sum += filterCoeff[3] * ( clipALF(filterClipp[3], curr, pImg1[-1], pImg2[+1]) );
 
-            sum += filterCoeff[4] * ( pImg0[+2] + pImg0[-2] );
-            sum += filterCoeff[5] * ( pImg0[+1] + pImg0[-1] );
-            sum += filterCoeff[6] * ( pImg0[+0] );
+            sum += filterCoeff[4] * ( clipALF(filterClipp[4], curr, pImg0[+2], pImg0[-2]) );
+            sum += filterCoeff[5] * ( clipALF(filterClipp[5], curr, pImg0[+1], pImg0[-1]) );
           }
-
+#if JVET_Q0150
+          if (!(isNearVBabove || isNearVBbelow))
+          {
+            sum = (sum + offset) >> shift;
+          }
+          else
+          {
+            sum = (sum + offset) >> (shift + 3);
+          }
+#else
           sum = ( sum + offset ) >> shift;
+#endif
+          sum += curr;
           pRec1[jj] = ClipPel( sum, clpRng );
 
           pImg0++;
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index 92928fee096080ce0efaf7ab6c5eaef23aae135e..f93fd8e6c72fb1a6fc1b391c4da527acf33945cd 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,7 @@
 
 #include "Unit.h"
 #include "UnitTools.h"
+
 struct AlfClassifier
 {
   AlfClassifier() {}
@@ -66,6 +67,14 @@ enum Direction
 class AdaptiveLoopFilter
 {
 public:
+  static inline int clipALF(const int clip, const short ref, const short val0, const short val1)
+  {
+    return Clip3<int>(-clip, +clip, val0-ref) + Clip3<int>(-clip, +clip, val1-ref);
+  }
+
+  static constexpr int AlfNumClippingValues[MAX_NUM_CHANNEL_TYPE] = { 4, 4 };
+  static constexpr int MaxAlfNumClippingValues = 4;
+
   static constexpr int   m_NUM_BITS = 8;
   static constexpr int   m_CLASSIFICATION_BLK_SIZE = 32;  //non-normative, local buffer size
   static constexpr int m_ALF_UNUSED_CLASSIDX = 255;
@@ -73,24 +82,32 @@ public:
 
   AdaptiveLoopFilter();
   virtual ~AdaptiveLoopFilter() {}
-
-  void ALFProcess( CodingStructure& cs, AlfSliceParam& alfSliceParam );
-  void reconstructCoeff( AlfSliceParam& alfSliceParam, ChannelType channel, const bool bRedo = false );
+  void reconstructCoeffAPSs(CodingStructure& cs, bool luma, bool chroma, bool isRdo);
+  void reconstructCoeff(AlfParam& alfParam, ChannelType channel, const bool isRdo, const bool isRedo = false);
+  void ALFProcess(CodingStructure& cs);
   void create( const int picWidth, const int picHeight, const ChromaFormat format, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE] );
   void destroy();
-  static void deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
-  void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk );
-  void resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk);
+  static void deriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS],
+                                      const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const int shift,
+                                      const int vbCTUHeight, int vbPos);
+  void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blkDst, const Area& blk );
   template<AlfFilterType filtType>
-  static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
-  inline static int getMaxGolombIdx( AlfFilterType filterType )
-  {
-    return filterType == ALF_FILTER_5 ? 2 : 3;
-  }
+  static void filterBlk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+                        const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+                        const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+                        int vbPos);
+  void (*m_deriveClassificationBlk)(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS], const CPelBuf &srcLuma,
+                                    const Area &blkDst, const Area &blk, const int shift, const int vbCTUHeight,
+                                    int vbPos);
 
-  void( *m_deriveClassificationBlk )( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
-  void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
-  void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
+  void (*m_filter5x5Blk)(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+                         const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+                         const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+                         int vbPos);
+  void (*m_filter7x7Blk)(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+                         const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+                         const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+                         int vbPos);
 
 #ifdef TARGET_SIMD_X86
   void initAdaptiveLoopFilterX86();
@@ -99,12 +116,29 @@ public:
 #endif
 
 protected:
+  bool isCrossedByVirtualBoundaries( const CodingStructure& cs, const int xPos, const int yPos, const int width, const int height, bool& clipTop, bool& clipBottom, bool& clipLeft, bool& clipRight, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], int& rasterSliceAlfPad );
+  static const int             m_classToFilterMapping[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES];
+  static const int             m_fixedFilterSetCoeff[ALF_FIXED_FILTER_NUM][MAX_NUM_ALF_LUMA_COEFF];
+  short                        m_fixedFilterSetCoeffDec[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+  short                        m_coeffApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
+  short                        m_clippApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
+  short                        m_clipDefault[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+  bool                         m_created = false;
+  short                        m_chromaCoeffFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF];
+  AlfParam*                    m_alfParamChroma;
+  Pel                          m_alfClippingValues[MAX_NUM_CHANNEL_TYPE][MaxAlfNumClippingValues];
   std::vector<AlfFilterShape>  m_filterShapes[MAX_NUM_CHANNEL_TYPE];
   AlfClassifier**              m_classifier;
   short                        m_coeffFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+  short                        m_clippFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+  short                        m_chromaClippFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF];
   int**                        m_laplacian[NUM_DIRECTIONS];
-  uint8_t*                       m_ctuEnableFlag[MAX_NUM_COMPONENT];
+  int *                        m_laplacianPtr[NUM_DIRECTIONS][m_CLASSIFICATION_BLK_SIZE + 5];
+  int m_laplacianData[NUM_DIRECTIONS][m_CLASSIFICATION_BLK_SIZE + 5][m_CLASSIFICATION_BLK_SIZE + 5];
+  uint8_t*                     m_ctuEnableFlag[MAX_NUM_COMPONENT];
+  uint8_t*                     m_ctuAlternative[MAX_NUM_COMPONENT];
   PelStorage                   m_tempBuf;
+  PelStorage                   m_tempBuf2;
   int                          m_inputBitDepth[MAX_NUM_CHANNEL_TYPE];
   int                          m_picWidth;
   int                          m_picHeight;
@@ -114,6 +148,10 @@ protected:
   int                          m_numCTUsInWidth;
   int                          m_numCTUsInHeight;
   int                          m_numCTUsInPic;
+  int                          m_alfVBLumaPos;
+  int                          m_alfVBChmaPos;
+  int                          m_alfVBLumaCTUHeight;
+  int                          m_alfVBChmaCTUHeight;
   ChromaFormat                 m_chromaFormat;
   ClpRngs                      m_clpRngs;
 };
diff --git a/source/Lib/CommonLib/AffineGradientSearch.cpp b/source/Lib/CommonLib/AffineGradientSearch.cpp
index d91e938d9d18026868d9fced6d4795fc9dd834a5..90d939ac738f2a25b125f1952e300b6e4fa5f857 100644
--- a/source/Lib/CommonLib/AffineGradientSearch.cpp
+++ b/source/Lib/CommonLib/AffineGradientSearch.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/AffineGradientSearch.h b/source/Lib/CommonLib/AffineGradientSearch.h
index 40adbcdf0189c5c5d4191349a9407f3d4b5da588..380db32074491e625fce3d06ec4e02bd18e0ab9f 100644
--- a/source/Lib/CommonLib/AffineGradientSearch.h
+++ b/source/Lib/CommonLib/AffineGradientSearch.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/AlfParameters.h b/source/Lib/CommonLib/AlfParameters.h
new file mode 100644
index 0000000000000000000000000000000000000000..abaef3a4968ab6d5dd6535656592652056abe129
--- /dev/null
+++ b/source/Lib/CommonLib/AlfParameters.h
@@ -0,0 +1,236 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     AlfParameters.h
+    \brief    Define types for storing ALF parameters
+*/
+
+#ifndef __ALFPARAMETERS__
+#define __ALFPARAMETERS__
+
+#include <vector>
+#include "CommonDef.h"
+
+//! \ingroup AlfParameters
+//! \{
+
+enum AlfFilterType
+{
+  ALF_FILTER_5,
+  ALF_FILTER_7,
+  ALF_NUM_OF_FILTER_TYPES
+};
+
+struct AlfFilterShape
+{
+  AlfFilterShape( int size )
+    : filterLength( size ),
+    numCoeff( size * size / 4 + 1 ),
+    filterSize( size * size / 2 + 1 )
+  {
+    if( size == 5 )
+    {
+      pattern = {
+                 0,
+             1,  2,  3,
+         4,  5,  6,  5,  4,
+             3,  2,  1,
+                 0
+      };
+
+      weights = {
+                 2,
+              2, 2, 2,
+           2, 2, 1, 1
+      };
+
+      filterType = ALF_FILTER_5;
+    }
+    else if( size == 7 )
+    {
+      pattern = {
+                     0,
+                 1,  2,  3,
+             4,  5,  6,  7,  8,
+         9, 10, 11, 12, 11, 10, 9,
+             8,  7,  6,  5,  4,
+                 3,  2,  1,
+                     0
+      };
+
+      weights = {
+                    2,
+                2,  2,  2,
+            2,  2,  2,  2,  2,
+        2,  2,  2,  1,  1
+      };
+
+      filterType = ALF_FILTER_7;
+    }
+    else
+    {
+      filterType = ALF_NUM_OF_FILTER_TYPES;
+      CHECK( 0, "Wrong ALF filter shape" );
+    }
+  }
+
+  AlfFilterType filterType;
+  int filterLength;
+  int numCoeff;      //TO DO: check whether we need both numCoeff and filterSize
+  int filterSize;
+  std::vector<int> pattern;
+  std::vector<int> weights;
+};
+
+struct AlfParam
+{
+  bool                         enabledFlag[MAX_NUM_COMPONENT];                          // alf_slice_enable_flag, alf_chroma_idc
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  bool                         nonLinearFlag[MAX_NUM_CHANNEL_TYPE];                     // alf_[luma/chroma]_clip_flag
+#else
+  bool                         nonLinearFlag[MAX_NUM_CHANNEL_TYPE][MAX_NUM_ALF_ALTERNATIVES_CHROMA]; // alf_[luma/chroma]_clip_flag
+#endif
+  short                        lumaCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_coeff_luma_delta[i][j]
+  short                        lumaClipp[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_clipp_luma_[i][j]
+  int                          numAlternativesChroma;                                                  // alf_chroma_num_alts_minus_one + 1
+  short                        chromaCoeff[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_coeff_chroma[i]
+  short                        chromaClipp[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_clipp_chroma[i]
+  short                        filterCoeffDeltaIdx[MAX_NUM_ALF_CLASSES];                // filter_coeff_delta[i]
+  bool                         alfLumaCoeffFlag[MAX_NUM_ALF_CLASSES];                   // alf_luma_coeff_flag[i]
+  int                          numLumaFilters;                                          // number_of_filters_minus1 + 1
+  bool                         alfLumaCoeffDeltaFlag;                                   // alf_luma_coeff_delta_flag
+  std::vector<AlfFilterShape>* filterShapes;
+  bool                         newFilterFlag[MAX_NUM_CHANNEL_TYPE];
+
+  AlfParam()
+  {
+    reset();
+  }
+
+  void reset()
+  {
+    std::memset( enabledFlag, false, sizeof( enabledFlag ) );
+    std::memset( nonLinearFlag, false, sizeof( nonLinearFlag ) );
+    std::memset( lumaCoeff, 0, sizeof( lumaCoeff ) );
+    std::memset( lumaClipp, 0, sizeof( lumaClipp ) );
+    numAlternativesChroma = 1;
+    std::memset( chromaCoeff, 0, sizeof( chromaCoeff ) );
+    std::memset( chromaClipp, 0, sizeof( chromaClipp ) );
+    std::memset( filterCoeffDeltaIdx, 0, sizeof( filterCoeffDeltaIdx ) );
+    std::memset( alfLumaCoeffFlag, true, sizeof( alfLumaCoeffFlag ) );
+    numLumaFilters = 1;
+    alfLumaCoeffDeltaFlag = false;
+    memset(newFilterFlag, 0, sizeof(newFilterFlag));
+  }
+
+  const AlfParam& operator = ( const AlfParam& src )
+  {
+    std::memcpy( enabledFlag, src.enabledFlag, sizeof( enabledFlag ) );
+    std::memcpy( nonLinearFlag, src.nonLinearFlag, sizeof( nonLinearFlag ) );
+    std::memcpy( lumaCoeff, src.lumaCoeff, sizeof( lumaCoeff ) );
+    std::memcpy( lumaClipp, src.lumaClipp, sizeof( lumaClipp ) );
+    numAlternativesChroma = src.numAlternativesChroma;
+    std::memcpy( chromaCoeff, src.chromaCoeff, sizeof( chromaCoeff ) );
+    std::memcpy( chromaClipp, src.chromaClipp, sizeof( chromaClipp ) );
+    std::memcpy( filterCoeffDeltaIdx, src.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) );
+    std::memcpy( alfLumaCoeffFlag, src.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) );
+    numLumaFilters = src.numLumaFilters;
+    alfLumaCoeffDeltaFlag = src.alfLumaCoeffDeltaFlag;
+    filterShapes = src.filterShapes;
+    std::memcpy(newFilterFlag, src.newFilterFlag, sizeof(newFilterFlag));
+    return *this;
+  }
+
+  bool operator==( const AlfParam& other )
+  {
+    if( memcmp( enabledFlag, other.enabledFlag, sizeof( enabledFlag ) ) )
+    {
+      return false;
+    }
+    if( memcmp( nonLinearFlag, other.nonLinearFlag, sizeof( nonLinearFlag ) ) )
+    {
+      return false;
+    }
+    if( memcmp( lumaCoeff, other.lumaCoeff, sizeof( lumaCoeff ) ) )
+    {
+      return false;
+    }
+    if( memcmp( lumaClipp, other.lumaClipp, sizeof( lumaClipp ) ) )
+    {
+      return false;
+    }
+    if( memcmp( chromaCoeff, other.chromaCoeff, sizeof( chromaCoeff ) ) )
+    {
+      return false;
+    }
+    if( memcmp( chromaClipp, other.chromaClipp, sizeof( chromaClipp ) ) )
+    {
+      return false;
+    }
+    if( memcmp( filterCoeffDeltaIdx, other.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) ) )
+    {
+      return false;
+    }
+    if( memcmp( alfLumaCoeffFlag, other.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) ) )
+    {
+      return false;
+    }
+    if( memcmp( newFilterFlag, other.newFilterFlag, sizeof( newFilterFlag ) ) )
+    {
+      return false;
+    }
+    if( numAlternativesChroma != other.numAlternativesChroma )
+    {
+      return false;
+    }
+    if( numLumaFilters != other.numLumaFilters )
+    {
+      return false;
+    }
+    if( alfLumaCoeffDeltaFlag != other.alfLumaCoeffDeltaFlag )
+    {
+      return false;
+    }
+
+    return true;
+  }
+
+  bool operator!=( const AlfParam& other )
+  {
+    return !( *this == other );
+  }
+};
+
+//! \}
+
+#endif  // end of #ifndef  __ALFPARAMETERS__
diff --git a/source/Lib/CommonLib/BitStream.cpp b/source/Lib/CommonLib/BitStream.cpp
index 9d66589fc13fcc302c9833d5e76b01582724b854..58a3360b3e5f9b22af1651219ae8f8a83d7654e3 100644
--- a/source/Lib/CommonLib/BitStream.cpp
+++ b/source/Lib/CommonLib/BitStream.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/BitStream.h b/source/Lib/CommonLib/BitStream.h
index 64a9c8df612a13fd2cb806f30e5e1d8609e5c20b..bce5feadcb227b56dbcf2c757ddba14279f6ca91 100644
--- a/source/Lib/CommonLib/BitStream.h
+++ b/source/Lib/CommonLib/BitStream.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index a773bd30664c756d70014d295fbfdc7e9a0dc726..e1f967f14fef717a85f2c5904e8cc6377feb69eb 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -42,10 +42,33 @@
 #include "Buffer.h"
 #include "InterpolationFilter.h"
 
-#if ENABLE_SIMD_OPT_BUFFER
-#ifdef TARGET_SIMD_X86
+void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng)
+{
+  int idx = 0;
+
+  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
+  for (int h = 0; h < height; h++)
+  {
+    for (int w = 0; w < width; w++)
+    {
+      int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w];
+      dI = Clip3(-dILimit, dILimit - 1, dI);
+      dst[w] = src[w] + dI;
+      if (!bi)
+      {
+        dst[w] = (dst[w] + offset) >> shiftNum;
+        dst[w] = ClipPel(dst[w], clpRng);
+      }
+
+      idx++;
+    }
+    gradX += gradStride;
+    gradY += gradStride;
+    dst += dstStride;
+    src += srcStride;
+  }
+}
 
-#include "CommonDefX86.h"
 
 template< typename T >
 void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng )
@@ -71,19 +94,15 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
     for (int x = 0; x < width; x += 4)
     {
       b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
-      b = ((b + 1) >> 1);
       dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
 
       b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
-      b = ((b + 1) >> 1);
       dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
 
       b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
-      b = ((b + 1) >> 1);
       dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
 
       b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
-      b = ((b + 1) >> 1);
       dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
     }
     dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
@@ -91,25 +110,28 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
   }
 }
 
+template<bool PAD = true>
 void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
 {
   Pel* srcTmp = pSrc + srcStride + 1;
   Pel* gradXTmp = gradX + gradStride + 1;
   Pel* gradYTmp = gradY + gradStride + 1;
-  int  shift1 = std::max<int>(2, (IF_INTERNAL_PREC - bitDepth));
+  int  shift1 = 6;
 
   for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
   {
     for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
     {
-      gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> shift1;
-      gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> shift1;
+      gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 );
+      gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 );
     }
     gradXTmp += gradStride;
     gradYTmp += gradStride;
     srcTmp += srcStride;
   }
 
+  if (PAD)
+  {
   gradXTmp = gradX + gradStride + 1;
   gradYTmp = gradY + gradStride + 1;
   for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
@@ -129,39 +151,38 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr
   ::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
   ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
   ::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
+  }
 }
 
-void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth)
+void calcBIOSumsCore(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX)
 {
-  int shift4 = std::min<int>(8, (bitDepth - 4));
-  int shift5 = std::min<int>(5, (bitDepth - 7));
-  for (int y = 0; y < heightG; y++)
+  int shift4 = 4;
+  int shift5 = 1;
+
+  for (int y = 0; y < 6; y++)
   {
-    for (int x = 0; x < widthG; x++)
+    for (int x = 0; x < 6; x++)
     {
-      int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4);
-      int tempX = (gradX0[x] + gradX1[x]) >> shift5;
-      int tempY = (gradY0[x] + gradY1[x]) >> shift5;
-      dotProductTemp1[x] = tempX * tempX;
-      dotProductTemp2[x] = tempX * tempY;
-      dotProductTemp3[x] = -tempX * temp;
-      dotProductTemp5[x] = tempY * tempY;
-      dotProductTemp6[x] = -tempY * temp;
+      int tmpGX = (gradX0[x] + gradX1[x]) >> shift5;
+      int tmpGY = (gradY0[x] + gradY1[x]) >> shift5;
+      int tmpDI = (int)((srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4));
+      *sumAbsGX += (tmpGX < 0 ? -tmpGX : tmpGX);
+      *sumAbsGY += (tmpGY < 0 ? -tmpGY : tmpGY);
+      *sumDIX += (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI));
+      *sumDIY += (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI));
+      *sumSignGY_GX += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX));
+
     }
-    srcY0Temp += src0Stride;
-    srcY1Temp += src1Stride;
-    gradX0 += gradStride;
-    gradX1 += gradStride;
-    gradY0 += gradStride;
-    gradY1 += gradStride;
-    dotProductTemp1 += widthG;
-    dotProductTemp2 += widthG;
-    dotProductTemp3 += widthG;
-    dotProductTemp5 += widthG;
-    dotProductTemp6 += widthG;
+    srcY1Tmp += src1Stride;
+    srcY0Tmp += src0Stride;
+    gradX0 += widthG;
+    gradX1 += widthG;
+    gradY0 += widthG;
+    gradY1 += widthG;
   }
 }
 
+
 void calcBlkGradientCore(int sx, int sy, int     *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
 {
   int     *Gx2 = arraysGx2;
@@ -195,12 +216,12 @@ void calcBlkGradientCore(int sx, int sy, int     *arraysGx2, int     *arraysGxGy
   }
 }
 
-#if ENABLE_SIMD_OPT_GBI
-void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int gbiWeight)
+#if ENABLE_SIMD_OPT_BCW
+void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int bcwWeight)
 {
-  int normalizer = ((1 << 16) + (gbiWeight > 0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight;
-  int weight0 = normalizer << g_GbiLog2WeightBase;
-  int weight1 = (g_GbiWeightBase - gbiWeight)*normalizer;
+  int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
+  int weight0 = normalizer << g_BcwLog2WeightBase;
+  int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer;
 #define REM_HF_INC  \
   src += srcStride; \
   dst += dstStride; \
@@ -273,25 +294,24 @@ PelBufferOps::PelBufferOps()
 
   addBIOAvg4      = addBIOAvgCore;
   bioGradFilter   = gradFilterCore;
-  calcBIOPar      = calcBIOParCore;
-  calcBlkGradient = calcBlkGradientCore;
+  calcBIOSums = calcBIOSumsCore;
 
   copyBuffer = copyBufferCore;
   padding = paddingCore;
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
   removeWeightHighFreq8 = removeWeightHighFreq;
   removeWeightHighFreq4 = removeWeightHighFreq;
   removeHighFreq8 = removeHighFreq;
   removeHighFreq4 = removeHighFreq;
 #endif
 
+  profGradFilter = gradFilterCore <false>;
+  applyPROF      = applyPROFCore;
+  roundIntVector = nullptr;
 }
 
 PelBufferOps g_pelBufOP = PelBufferOps();
 
-#endif
-#endif
-
 void copyBufferCore(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height)
 {
   int numBytes = width * sizeof(Pel);
@@ -327,11 +347,11 @@ void paddingCore(Pel *ptr, int stride, int width, int height, int padSize)
   }
 }
 template<>
-void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t gbiIdx)
+void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx)
 {
-  const int8_t w0 = getGbiWeight(gbiIdx, REF_PIC_LIST_0);
-  const int8_t w1 = getGbiWeight(gbiIdx, REF_PIC_LIST_1);
-  const int8_t log2WeightBase = g_GbiLog2WeightBase;
+  const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0);
+  const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1);
+  const int8_t log2WeightBase = g_BcwLog2WeightBase;
 
   const Pel* src0 = other1.buf;
   const Pel* src2 = other2.buf;
@@ -407,6 +427,7 @@ void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& cl
     {
       for (unsigned x = 0; x < width; x++)
       {
+        src[x] = (Pel)Clip3((Pel)(-maxAbsclipBD - 1), (Pel)maxAbsclipBD, src[x]);
         sign = src[x] >= 0 ? 1 : -1;
         absval = sign * src[x];
         int val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
@@ -757,3 +778,74 @@ const CPelUnitBuf PelStorage::getBuf( const UnitArea &unit ) const
   return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
 }
 
+template<>
+void UnitBuf<Pel>::colorSpaceConvert(const UnitBuf<Pel> &other, const bool forward)
+{
+  const Pel* pOrg0 = bufs[COMPONENT_Y].buf;
+  const Pel* pOrg1 = bufs[COMPONENT_Cb].buf;
+  const Pel* pOrg2 = bufs[COMPONENT_Cr].buf;
+  const int  strideOrg = bufs[COMPONENT_Y].stride;
+
+  Pel* pDst0 = other.bufs[COMPONENT_Y].buf;
+  Pel* pDst1 = other.bufs[COMPONENT_Cb].buf;
+  Pel* pDst2 = other.bufs[COMPONENT_Cr].buf;
+  const int strideDst = other.bufs[COMPONENT_Y].stride;
+
+  int width = bufs[COMPONENT_Y].width;
+  int height = bufs[COMPONENT_Y].height;
+  int r, g, b;
+  int y0, cg, co;
+
+  CHECK(bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cb].stride || bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cr].stride, "unequal stride for 444 content");
+  CHECK(other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content");
+  CHECK(bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size")
+
+    if (forward)
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          r = pOrg2[x];
+          g = pOrg0[x];
+          b = pOrg1[x];
+
+          pDst0[x] = (g << 1) + r + b;
+          pDst1[x] = (g << 1) - r - b;
+          pDst2[x] = ((r - b) << 1);
+          pDst0[x] = (pDst0[x] + 2) >> 2;
+          pDst1[x] = (pDst1[x] + 2) >> 2;
+          pDst2[x] = (pDst2[x] + 2) >> 2;
+        }
+        pOrg0 += strideOrg;
+        pOrg1 += strideOrg;
+        pOrg2 += strideOrg;
+        pDst0 += strideDst;
+        pDst1 += strideDst;
+        pDst2 += strideDst;
+      }
+    }
+    else
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          y0 = pOrg0[x];
+          cg = pOrg1[x];
+          co = pOrg2[x];
+
+          pDst0[x] = (y0 + cg);
+          pDst1[x] = (y0 - cg - co);
+          pDst2[x] = (y0 - cg + co);
+        }
+
+        pOrg0 += strideOrg;
+        pOrg1 += strideOrg;
+        pOrg2 += strideOrg;
+        pDst0 += strideDst;
+        pDst1 += strideDst;
+        pDst2 += strideDst;
+      }
+    }
+}
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index 4d34cc3d5e873e3854981dec450c733481c40a59..9b461389d48aa12dc181aa7d4dac732a7caca5c2 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -51,16 +51,15 @@
 // AreaBuf struct
 // ---------------------------------------------------------------------------
 
-#if ENABLE_SIMD_OPT_BUFFER
-#ifdef TARGET_SIMD_X86
-
 struct PelBufferOps
 {
   PelBufferOps();
 
+#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
   void initPelBufOpsX86();
   template<X86_VEXT vext>
   void _initPelBufOpsX86();
+#endif
 
   void ( *addAvg4 )       ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height,            int shift, int offset, const ClpRng& clpRng );
   void ( *addAvg8 )       ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height,            int shift, int offset, const ClpRng& clpRng );
@@ -71,23 +70,23 @@ struct PelBufferOps
   void(*addBIOAvg4)    (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
   void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth);
   void(*calcBIOPar)    (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth);
+  void(*calcBIOSums)   (const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX);
   void(*calcBlkGradient)(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
   void(*copyBuffer)(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height);
   void(*padding)(Pel *dst, int stride, int width, int height, int padSize);
-#if ENABLE_SIMD_OPT_GBI
-  void ( *removeWeightHighFreq8)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
-  void ( *removeWeightHighFreq4)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
+#if ENABLE_SIMD_OPT_BCW
+  void ( *removeWeightHighFreq8)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int bcwWeight);
+  void ( *removeWeightHighFreq4)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int bcwWeight);
   void ( *removeHighFreq8)        ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height);
   void ( *removeHighFreq4)        ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height);
 #endif
+  void (*profGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth);
+  void (*applyPROF)      (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng);
+  void (*roundIntVector) (int* v, int size, unsigned int nShift, const int dmvLimit);
 };
 
 extern PelBufferOps g_pelBufOP;
 
-#endif
-#endif
-
-
 void paddingCore(Pel *ptr, int stride, int width, int height, int padSize);
 void copyBufferCore(Pel *src, int srcStride, Pel *Dst, int dstStride, int width, int height);
 
@@ -118,8 +117,10 @@ struct AreaBuf : public Size
   void subtract             ( const AreaBuf<const T> &other );
   void extendSingleBorderPel();
   void extendBorderPel      (  unsigned margin );
-  void addWeightedAvg       ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t gbiIdx);
-  void removeWeightHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t iGbiWeight);
+  void extendBorderPel(unsigned marginX, unsigned marginY);
+  void padBorderPel         ( unsigned marginX, unsigned marginY, int dir );
+  void addWeightedAvg       ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng, const int8_t bcwIdx);
+  void removeWeightHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t iBcwWeight);
   void addAvg               ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng );
   void removeHighFreq       ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng);
   void updateHistogram      ( std::vector<int32_t>& hist ) const;
@@ -165,6 +166,11 @@ typedef AreaBuf<const TCoeff> CCoeffBuf;
 typedef AreaBuf<      MotionInfo>  MotionBuf;
 typedef AreaBuf<const MotionInfo> CMotionBuf;
 
+typedef AreaBuf<      TCoeff>  PLTescapeBuf;
+typedef AreaBuf<const TCoeff> CPLTescapeBuf;
+
+typedef AreaBuf<      bool>  PLTtypeBuf;
+typedef AreaBuf<const bool> CPLTtypeBuf;
 
 #define SIZE_AWARE_PER_EL_OP( OP, INC )                     \
 if( ( width & 7 ) == 0 )                                    \
@@ -360,6 +366,7 @@ void AreaBuf<T>::subtract( const AreaBuf<const T> &other )
 #undef SUBS_INC
 }
 
+
 template<typename T>
 void AreaBuf<T>::copyClip( const AreaBuf<const T> &src, const ClpRng& clpRng )
 {
@@ -407,10 +414,10 @@ template<>
 void AreaBuf<Pel>::toLast( const ClpRng& clpRng );
 
 template<typename T>
-void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t gbiWeight)
+void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng, const int8_t bcwWeight)
 {
-  const int8_t gbiWeightOther = g_GbiWeightBase - gbiWeight;
-  const int8_t log2WeightBase = g_GbiLog2WeightBase;
+  const int8_t bcwWeightOther = g_BcwWeightBase - bcwWeight;
+  const int8_t log2WeightBase = g_BcwLog2WeightBase;
 
   const Pel* src = other.buf;
   const int  srcStride = other.stride;
@@ -418,22 +425,22 @@ void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip,
   Pel* dst = buf;
   const int  dstStride = stride;
 
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
   if(!bClip)
   {
     if(!(width & 7))
-      g_pelBufOP.removeWeightHighFreq8(dst, dstStride, src, srcStride, width, height, 16, gbiWeight);
+      g_pelBufOP.removeWeightHighFreq8(dst, dstStride, src, srcStride, width, height, 16, bcwWeight);
     else if(!(width & 3))
-      g_pelBufOP.removeWeightHighFreq4(dst, dstStride, src, srcStride, width, height, 16, gbiWeight);
+      g_pelBufOP.removeWeightHighFreq4(dst, dstStride, src, srcStride, width, height, 16, bcwWeight);
     else
       CHECK(true, "Not supported");
   }
   else
   {
 #endif
-    int normalizer = ((1 << 16) + (gbiWeight > 0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight;
+    int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
     int weight0 = normalizer << log2WeightBase;
-    int weight1 = gbiWeightOther * normalizer;
+    int weight1 = bcwWeightOther * normalizer;
 #define REM_HF_INC  \
   src += srcStride; \
   dst += dstStride; \
@@ -453,7 +460,7 @@ void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip,
 #undef REM_HF_INC
 #undef REM_HF_OP
 #undef REM_HF_OP_CLIP
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
   }
 #endif
 }
@@ -467,7 +474,7 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons
         T*  dst       = buf;
   const int dstStride = stride;
 
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
   if (!bClip)
   {
     if(!(width & 7))
@@ -501,7 +508,7 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons
 #undef REM_HF_OP
 #undef REM_HF_OP_CLIP
 
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
   }
 #endif
 }
@@ -520,6 +527,82 @@ void AreaBuf<T>::updateHistogram( std::vector<int32_t>& hist ) const
   }
 }
 
+template<typename T>
+void AreaBuf<T>::extendBorderPel(unsigned marginX, unsigned marginY)
+{
+  T* p = buf;
+  int h = height;
+  int w = width;
+  int s = stride;
+
+  CHECK((w + 2 * marginX) > s, "Size of buffer too small to extend");
+  // do left and right margins
+  for (int y = 0; y < h; y++)
+  {
+    for (int x = 0; x < marginX; x++)
+    {
+      *(p - marginX + x) = p[0];
+      p[w + x] = p[w - 1];
+    }
+    p += s;
+  }
+
+  // p is now the (0,height) (bottom left of image within bigger picture
+  p -= (s + marginX);
+  // p is now the (-margin, height-1)
+  for (int y = 0; y < marginY; y++)
+  {
+    ::memcpy(p + (y + 1) * s, p, sizeof(T) * (w + (marginX << 1)));
+  }
+
+  // p is still (-marginX, height-1)
+  p -= ((h - 1) * s);
+  // p is now (-marginX, 0)
+  for (int y = 0; y < marginY; y++)
+  {
+    ::memcpy(p - (y + 1) * s, p, sizeof(T) * (w + (marginX << 1)));
+  }
+}
+
+template<typename T>
+void AreaBuf<T>::padBorderPel( unsigned marginX, unsigned marginY, int dir )
+{
+  T*  p = buf;
+  int s = stride;
+  int h = height;
+  int w = width;
+
+  CHECK( w  > s, "Size of buffer too small to extend" );
+
+  // top-left margin
+  if ( dir == 1 )
+  {
+    for( int y = 0; y < marginY; y++ )
+    {
+      for( int x = 0; x < marginX; x++ )
+      {
+        p[x] = p[marginX];
+      }
+      p += s;
+    }
+  }
+
+  // bottom-right margin
+  if ( dir == 2 )
+  {
+    p = buf + s * ( h - marginY ) + w - marginX;
+
+    for( int y = 0; y < marginY; y++ )
+    {
+      for( int x = 0; x < marginX; x++ )
+      {
+        p[x] = p[-1];
+      }
+      p += s;
+    }
+  }
+}
+
 template<typename T>
 void AreaBuf<T>::extendBorderPel( unsigned margin )
 {
@@ -680,20 +763,23 @@ struct UnitBuf
   const AreaBuf<T>& Cr() const { return bufs[2]; }
 
   void fill                 ( const T &val );
-  void copyFrom             ( const UnitBuf<const T> &other );
+  void copyFrom             ( const UnitBuf<const T> &other, const bool lumaOnly = false, const bool chromaOnly = false );
   void reconstruct          ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs );
-  void copyClip             ( const UnitBuf<const T> &src, const ClpRngs& clpRngs );
+  void copyClip             ( const UnitBuf<const T> &src, const ClpRngs& clpRngs, const bool lumaOnly = false, const bool chromaOnly = false );
   void subtract             ( const UnitBuf<const T> &other );
-  void addWeightedAvg       ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx = GBI_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false);
+  void addWeightedAvg       ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx = BCW_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false);
   void addAvg               ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false);
   void extendSingleBorderPel();
+  void extendBorderPel(unsigned marginX, unsigned marginY);
+  void padBorderPel         ( unsigned margin, int dir );
   void extendBorderPel      ( unsigned margin );
   void removeHighFreq       ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs
-                            , const int8_t gbiWeight = g_GbiWeights[GBI_DEFAULT]
+                            , const int8_t bcwWeight = g_BcwWeights[BCW_DEFAULT]
                             );
 
         UnitBuf<      T> subBuf (const UnitArea& subArea);
   const UnitBuf<const T> subBuf (const UnitArea& subArea) const;
+  void colorSpaceConvert(const UnitBuf<T> &other, const bool forward);
 };
 
 typedef UnitBuf<      Pel>  PelUnitBuf;
@@ -712,11 +798,14 @@ void UnitBuf<T>::fill( const T &val )
 }
 
 template<typename T>
-void UnitBuf<T>::copyFrom( const UnitBuf<const T> &other )
+void UnitBuf<T>::copyFrom(const UnitBuf<const T> &other, const bool lumaOnly, const bool chromaOnly )
 {
   CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" );
 
-  for( unsigned i = 0; i < bufs.size(); i++ )
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  const size_t compStart = chromaOnly ? 1 : 0;
+  const size_t compEnd   = lumaOnly ? 1 : (unsigned) bufs.size();
+  for( size_t i = compStart; i < compEnd; i++ )
   {
     bufs[i].copyFrom( other.bufs[i] );
   }
@@ -736,11 +825,14 @@ void UnitBuf<T>::subtract( const UnitBuf<const T> &other )
 }
 
 template<typename T>
-void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs& clpRngs)
+void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs &clpRngs, const bool lumaOnly, const bool chromaOnly )
 {
   CHECK( chromaFormat != src.chromaFormat, "Incompatible formats" );
 
-  for( unsigned i = 0; i < bufs.size(); i++ )
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  const size_t compStart = chromaOnly ? 1 : 0;
+  const size_t compEnd   = lumaOnly ? 1 : bufs.size();
+  for( size_t i = compStart; i < compEnd; i++ )
   {
     bufs[i].copyClip( src.bufs[i], clpRngs.comp[i] );
   }
@@ -760,7 +852,7 @@ void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T
 }
 
 template<typename T>
-void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx /* = GBI_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
+void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t bcwIdx /* = BCW_DEFAULT */, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
 {
   const size_t istart = chromaOnly ? 1 : 0;
   const size_t iend = lumaOnly ? 1 : bufs.size();
@@ -769,7 +861,7 @@ void UnitBuf<T>::addWeightedAvg(const UnitBuf<const T> &other1, const UnitBuf<co
 
   for(size_t i = istart; i < iend; i++)
   {
-    bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs.comp[i], gbiIdx);
+    bufs[i].addWeightedAvg(other1.bufs[i], other2.bufs[i], clpRngs.comp[i], bcwIdx);
   }
 }
 
@@ -787,6 +879,15 @@ void UnitBuf<T>::addAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &
   }
 }
 
+template<typename T>
+void UnitBuf<T>::colorSpaceConvert(const UnitBuf<T> &other, const bool forward)
+{
+  THROW("Type not supported");
+}
+
+template<>
+void UnitBuf<Pel>::colorSpaceConvert(const UnitBuf<Pel> &other, const bool forward);
+
 template<typename T>
 void UnitBuf<T>::extendSingleBorderPel()
 {
@@ -796,6 +897,24 @@ void UnitBuf<T>::extendSingleBorderPel()
   }
 }
 
+template<typename T>
+void UnitBuf<T>::extendBorderPel(unsigned marginX, unsigned marginY)
+{
+  for (unsigned i = 0; i < bufs.size(); i++)
+  {
+    bufs[i].extendBorderPel(marginX >> getComponentScaleX(ComponentID(i), chromaFormat), marginY >> getComponentScaleY(ComponentID(i), chromaFormat));
+  }
+}
+
+template<typename T>
+void UnitBuf<T>::padBorderPel( unsigned margin, int dir )
+{
+  for( unsigned i = 0; i < bufs.size(); i++ )
+  {
+    bufs[i].padBorderPel( margin >> getComponentScaleX( ComponentID( i ), chromaFormat ), margin >> getComponentScaleY( ComponentID( i ), chromaFormat ), dir );
+  }
+}
+
 template<typename T>
 void UnitBuf<T>::extendBorderPel( unsigned margin )
 {
@@ -807,12 +926,12 @@ void UnitBuf<T>::extendBorderPel( unsigned margin )
 
 template<typename T>
 void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs
-                               , const int8_t gbiWeight
+                               , const int8_t bcwWeight
                                )
 {
-  if(gbiWeight != g_GbiWeights[GBI_DEFAULT])
+  if(bcwWeight != g_BcwWeights[BCW_DEFAULT])
   {
-    bufs[0].removeWeightHighFreq(other.bufs[0], bClip, clpRngs.comp[0], gbiWeight);
+    bufs[0].removeWeightHighFreq(other.bufs[0], bClip, clpRngs.comp[0], bcwWeight);
     return;
   }
   bufs[0].removeHighFreq(other.bufs[0], bClip, clpRngs.comp[0]);
@@ -885,5 +1004,25 @@ private:
   Pel *m_origin[MAX_NUM_COMPONENT];
 };
 
+struct CompStorage : public PelBuf
+{
+  CompStorage () { m_memory = nullptr; }
+  ~CompStorage() { if (valid()) delete [] m_memory; }
+
+  void create( const Size& size )
+  {
+    CHECK( m_memory, "Trying to re-create an already initialized buffer" );
+    m_memory = new Pel [ size.area() ];
+    *static_cast<PelBuf*>(this) = PelBuf( m_memory, size );
+  }
+  void destroy()
+  {
+    if (valid()) delete [] m_memory;
+    m_memory = nullptr;
+  }
+  bool valid() { return m_memory != nullptr; }
+private:
+  Pel* m_memory;
+};
 
 #endif
diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt
index 06cb4088c813a94e400fb9379486eb14fc5050f1..b12307342f0d454321099a2e3c532f01921a3ff1 100644
--- a/source/Lib/CommonLib/CMakeLists.txt
+++ b/source/Lib/CommonLib/CMakeLists.txt
@@ -51,6 +51,10 @@ if( EXTENSION_360_VIDEO )
   target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_360_VIDEO=1 )
 endif()
 
+if( EXTENSION_HDRTOOLS )
+  target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_HDRTOOLS=1 )
+endif()
+
 if( SET_ENABLE_TRACING )
   if( ENABLE_TRACING )
     target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=1 )
diff --git a/source/Lib/CommonLib/CacheModel.h b/source/Lib/CommonLib/CacheModel.h
index 094390a6379e761613b236b089e6072a80b84c2a..1150c24e0262c4a7be046b6f648de21c92d3867b 100644
--- a/source/Lib/CommonLib/CacheModel.h
+++ b/source/Lib/CommonLib/CacheModel.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/ChromaFormat.cpp b/source/Lib/CommonLib/ChromaFormat.cpp
index 4bccf4336aeb17fb1f5e34c0da2191ad762e4dbc..9a56d89e33ad88e808a2ab60ba34b7845e2bad69 100644
--- a/source/Lib/CommonLib/ChromaFormat.cpp
+++ b/source/Lib/CommonLib/ChromaFormat.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/ChromaFormat.h b/source/Lib/CommonLib/ChromaFormat.h
index 2922914b4d3ab54838f1b976b3400c90b0992c86..14bc517bd07346aef7bbae95b5fb89884c9d9c8a 100644
--- a/source/Lib/CommonLib/ChromaFormat.h
+++ b/source/Lib/CommonLib/ChromaFormat.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -106,37 +106,23 @@ static inline uint64_t getTotalFracBits(const uint32_t width, const uint32_t hei
 //Intra prediction  ====================================================================================================
 //======================================================================================================================
 
-static inline bool filterIntraReferenceSamples (const ChannelType chType, const ChromaFormat chFmt, const bool intraReferenceSmoothingDisabled)
-{
-  return (!intraReferenceSmoothingDisabled) && (isLuma(chType) || (chFmt == CHROMA_444));
-}
-
-
 //------------------------------------------------
 
 static inline int getTransformShift(const int channelBitDepth, const Size size, const int maxLog2TrDynamicRange)
 {
-  return maxLog2TrDynamicRange - channelBitDepth - ( ( g_aucLog2[size.width] + g_aucLog2[size.height] ) >> 1 );
+  return maxLog2TrDynamicRange - channelBitDepth - ( ( floorLog2(size.width) + floorLog2(size.height) ) >> 1 );
 }
 
 
 //------------------------------------------------
 
-static inline int getScaledChromaQP(int unscaledChromaQP, const ChromaFormat chFmt)
-{
-  return g_aucChromaScale[chFmt][Clip3(0, (chromaQPMappingTableSize - 1), unscaledChromaQP)];
-}
-
-
-#if HEVC_USE_SCALING_LISTS
 //======================================================================================================================
 //Scaling lists  =======================================================================================================
 //======================================================================================================================
 
 static inline int getScalingListType(const PredMode predMode, const ComponentID compID)
 {
-  return ((predMode != MODE_INTER) ? 0 : MAX_NUM_COMPONENT) + MAP_CHROMA(compID);
+  return ((predMode == MODE_INTRA) ? 0 : MAX_NUM_COMPONENT) + MAP_CHROMA(compID);
 }
-#endif
 
 #endif
diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h
index 1a47050e37f336b7209d64809ef3940f6111d3c0..375ed6ec212e215c9472c0daca7185bde3b8a6f7 100644
--- a/source/Lib/CommonLib/CodingStatistics.h
+++ b/source/Lib/CommonLib/CodingStatistics.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -58,6 +58,7 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__MERGE_INDEX,
   STATS__CABAC_BITS__MVP_IDX,
   STATS__CABAC_BITS__SPLIT_FLAG,
+  STATS__CABAC_BITS__MODE_CONSTRAINT_FLAG,
   STATS__CABAC_BITS__PART_SIZE,
   STATS__CABAC_BITS__PRED_MODE,
   STATS__CABAC_BITS__INTRA_DIR_ANG,
@@ -76,6 +77,7 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__CHROMA_QP_ADJUSTMENT,
   STATS__CABAC_BITS__QT_CBF,
   STATS__CABAC_BITS__CROSS_COMPONENT_PREDICTION,
+  STATS__CABAC_BITS__JOINT_CB_CR,
   STATS__CABAC_BITS__MTS_FLAGS,
   STATS__CABAC_BITS__LAST_SIG_X_Y,
   STATS__CABAC_BITS__SIG_COEFF_GROUP_FLAG,
@@ -85,12 +87,19 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__GT2_FLAG,
   STATS__CABAC_BITS__SIGN_BIT,
   STATS__CABAC_BITS__ESCAPE_BITS,
+#if TR_ONLY_COEFF_STATS
+  STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG_TS,
+  STATS__CABAC_BITS__PAR_FLAG_TS,
+  STATS__CABAC_BITS__GT1_FLAG_TS,
+  STATS__CABAC_BITS__GT2_FLAG_TS,
+  STATS__CABAC_BITS__SIGN_BIT_TS,
+  STATS__CABAC_BITS__ESCAPE_BITS_TS,
+#endif
   STATS__CABAC_BITS__SAO,
+  STATS__CABAC_BITS__LFNST,
   STATS__CABAC_BITS__ALF,
   STATS__CABAC_TRM_BITS,
   STATS__CABAC_FIXED_BITS,
-  STATS__CABAC_PCM_ALIGN_BITS,
-  STATS__CABAC_PCM_CODE_BITS,
   STATS__BYTE_ALIGNMENT_BITS,
   STATS__TRAILING_BITS,
   STATS__EXPLICIT_RDPCM_BITS,
@@ -100,16 +109,18 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__OTHER,
   STATS__CABAC_BITS__INVALID,
   STATS__CABAC_BITS__IMV_FLAG,
-  STATS__CABAC_BITS__GBI_IDX,
+  STATS__CABAC_BITS__BCW_IDX,
   STATS__CABAC_BITS__SBT_MODE,
   STATS__CABAC_BITS__MH_INTRA_FLAG,
   STATS__CABAC_BITS__TRIANGLE_FLAG,
   STATS__CABAC_BITS__TRIANGLE_INDEX,
   STATS__CABAC_BITS__MULTI_REF_LINE,
   STATS__CABAC_BITS__SYMMVD_FLAG,
+  STATS__CABAC_BITS__BDPCM_MODE,
   STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report.
   STATS__TOOL_AFF,
   STATS__TOOL_EMT,
+  STATS__TOOL_LFNST,
   STATS__TOOL_TOTAL,
   STATS__NUM_STATS
 };
@@ -139,6 +150,7 @@ static inline const char* getName(CodingStatisticsType name)
     "CABAC_BITS__MERGE_INDEX",
     "CABAC_BITS__MVP_IDX",
     "CABAC_BITS__SPLIT_FLAG",
+    "CABAC_BITS__MODE_CONSTRAINT_FLAG",
     "CABAC_BITS__PART_SIZE",
     "CABAC_BITS__PRED_MODE",
     "CABAC_BITS__INTRA_DIR_ANG",
@@ -157,6 +169,7 @@ static inline const char* getName(CodingStatisticsType name)
     "CABAC_BITS__CHROMA_QP_ADJUSTMENT",
     "CABAC_BITS__QT_CBF",
     "CABAC_BITS__CROSS_COMPONENT_PREDICTION",
+    "CABAC_BITS__JOINT_CB_CR",
     "CABAC_BITS__MTS_FLAGS",
     "CABAC_BITS__LAST_SIG_X_Y",
     "CABAC_BITS__SIG_COEFF_GROUP_FLAG",
@@ -166,12 +179,19 @@ static inline const char* getName(CodingStatisticsType name)
     "CABAC_BITS__GT2_FLAG",
     "CABAC_BITS__SIGN_BIT",
     "CABAC_BITS__ESCAPE_BITS",
+#if TR_ONLY_COEFF_STATS
+    "CABAC_BITS__SIG_COEFF_MAP_FLAG_TS",
+    "CABAC_BITS__PAR_FLAG_TS",
+    "CABAC_BITS__GT1_FLAG_TS",
+    "CABAC_BITS__GT2_FLAG_TS",
+    "CABAC_BITS__SIGN_BIT_TS",
+    "CABAC_BITS__ESCAPE_BITS_TS",
+#endif
     "CABAC_BITS__SAO",
+    "CABAC_BITS__LFNST",
     "CABAC_BITS__ALF",
     "CABAC_TRM_BITS",
     "CABAC_FIXED_BITS",
-    "CABAC_PCM_ALIGN_BITS",
-    "CABAC_PCM_CODE_BITS",
     "BYTE_ALIGNMENT_BITS",
     "TRAILING_BITS",
     "EXPLICIT_RDPCM_BITS",
@@ -181,16 +201,18 @@ static inline const char* getName(CodingStatisticsType name)
     "CABAC_BITS__OTHER",
     "CABAC_BITS__INVALID",
     "CABAC_BITS__IMV_FLAG",
-    "CABAC_BITS__GBI_IDX",
+    "CABAC_BITS__BCW_IDX",
     "CABAC_BITS__SBT_MODE",
     "CABAC_BITS__MH_INTRA_FLAG",
     "CABAC_BITS__TRIANGLE_FLAG",
     "CABAC_BITS__TRIANGLE_INDEX",
     "CABAC_BITS__MULTI_REF_LINE",
     "CABAC_BITS__SYMMVD_FLAG",
+    "CABAC_BITS__BDPCM_MODE",
     "TOOL_FRAME",
     "TOOL_AFFINE",
     "TOOL_EMT",
+    "TOOL_LFNST",
     "TOOL_TOTAL"
   };
   CHECK( STATS__NUM_STATS != sizeof( statNames ) / sizeof( char* ) || name >= STATS__NUM_STATS, "stats out of range" );
@@ -301,6 +323,13 @@ public:
     {
       bits += src.bits; count += src.count; sum += src.sum; classCount += src.classCount; return *this;
     }
+
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+    SStat &operator-=(const SStat &src)
+    {
+      bits -= src.bits; count -= src.count; sum -= src.sum; classCount -= src.classCount; return *this;
+    }
+#endif
   };
 
   struct StatTool
@@ -319,12 +348,45 @@ public:
     }
   };
 
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+  struct SStat_max
+  {
+    SStat   max_CABAC_state;
+    SStat   max_EP_state;
+    SStat trf_CABAC_state;
+    SStat trf_EP_state;
+    SStat acc_trf_CABAC_state;
+    SStat acc_trf_EP_state;
+    SStat   prev_CABAC_state;
+    SStat   prev_EP_state;
+    SStat prev_trf_CABAC_state;
+    SStat prev_trf_EP_state;
+
+    void    clear()
+    {
+      max_CABAC_state.clear();
+      max_EP_state.clear();
+      trf_CABAC_state.clear();
+      trf_EP_state.clear();
+      acc_trf_CABAC_state.clear();
+      acc_trf_EP_state.clear();
+      prev_CABAC_state.clear();
+      prev_EP_state.clear();
+      prev_trf_CABAC_state.clear();
+      prev_trf_EP_state.clear();
+    }
+  };
+#endif
+
   class CodingStatisticsData
   {
   private:
     SStat statistics         [STATS__NUM_STATS + 1][CODING_STATS_NUM_SUBCLASSES];
     SStat statistics_ep      [STATS__NUM_STATS + 1][CODING_STATS_NUM_SUBCLASSES];
     StatTool statistics_tool [STATS__NUM_STATS + 1][CODING_STATS_NUM_SUBCLASSES];
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+    SStat_max                    statistics_max;
+#endif
     std::map<std::string, SStat> mappings_ep;
     friend class CodingStatistics;
   };
@@ -421,6 +483,10 @@ private:
     int64_t classCounts[STATS__NUM_STATS];
     std::fill_n( classCounts, ( size_t ) STATS__NUM_STATS, 0 );
 
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+    SStat_max &max = GetStatisticMax();
+#endif
+
     int64_t cr = 0; // CABAC remainder, which is added to "STATS__CABAC_INITIALISATION"
     {
       int64_t totalCABACbits = 0, roundedCABACbits = 0;
@@ -486,6 +552,9 @@ private:
           if( i == STATS__CABAC_INITIALISATION && sCABACorig.bits != 0 )
           {
             thisCABACbits += cr;
+#if EPBINCOUNT_FIX
+            sCABACorig.count = 0;
+#endif
             cr = 0;
           }
           sCABAC.bits       = thisCABACbits;
@@ -493,6 +562,12 @@ private:
           sCABAC.sum        = sCABACorig.sum;
           sCABAC.classCount = classCounts[i];
         }
+#if EPBINCOUNT_FIX
+        if (i == STATS__BYTE_ALIGNMENT_BITS || i == STATS__TRAILING_BITS || i == STATS__NAL_UNIT_HEADER_BITS || i == STATS__EMULATION_PREVENTION_3_BYTES)
+        {
+          sEP.count = 0;
+        }
+#endif
         uint32_t wIdx = CodingStatisticsClassType::GetSubClassWidth( c );
         uint32_t hIdx = CodingStatisticsClassType::GetSubClassHeight( c );
         OutputLine( pName, ':', wIdx, hIdx, CodingStatisticsClassType::GetSubClassString( c ), sCABAC, sEP );
@@ -514,6 +589,18 @@ private:
       {
         cabacSubTotal.classCount = classCounts[i];
         OutputLine( pName, '~', "~~ST~~", "~~ST~~", "~~ST~~", cabacSubTotal, epSubTotal );
+
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+        // For TRF
+        if ((i == STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG) || (i == STATS__CABAC_BITS__PAR_FLAG)
+            || (i == STATS__CABAC_BITS__GT1_FLAG) || (i == STATS__CABAC_BITS__GT2_FLAG)
+            || (i == STATS__CABAC_BITS__ESCAPE_BITS))
+        {
+          max.acc_trf_CABAC_state += cabacSubTotal;
+          max.acc_trf_EP_state += epSubTotal;
+        }
+#endif
+
       }
       if( i == STATS__NAL_UNIT_TOTAL_BODY )
       {
@@ -596,6 +683,12 @@ private:
     OutputDashedLine( "GRAND TOTAL" );
     epTotalBits += cavlcTotalBits;
     OutputLine      ( "TOTAL",                  '~', "~~GT~~", "~~GT~~", "~~GT~~", cabacTotalBits, epTotalBits );
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+    OutputDashedLine("");
+    OutputLine("CABAC MAX FRAME stat", '~', "~~ST~~", "~~ST~~", "~~ST~~", max.max_CABAC_state, max.max_EP_state);
+    OutputLine("CABAC MAX FRAME TRF stat", '~', "~~ST~~", "~~ST~~", "~~ST~~", max.trf_CABAC_state, max.trf_EP_state);
+    OutputLine("CABAC Accumulated TRF stat", '~', "~~ST~~", "~~ST~~", "~~ST~~", max.acc_trf_CABAC_state, max.acc_trf_EP_state);
+#endif
   }
 
   void OutputToolStats()
@@ -704,6 +797,10 @@ public:
 
   static StatTool &GetStatisticTool ( const CodingStatisticsClassType &stat ) { return GetSingletonInstance().data.statistics_tool[stat.type][stat.subClass]; }
 
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+  static SStat_max &GetStatisticMax() { return GetSingletonInstance().data.statistics_max; }
+#endif
+
   static int getNumOnes( int bins )
   {
     CHECK( bins < 0, "Bins should not be nagative" );
@@ -722,7 +819,11 @@ public:
     CHECK( stat.type == STATS__CABAC_BITS__INVALID, "Should never be used." );
     SStat &s = GetStatisticEP( stat );
     s.bits  += numBits;
+#if EPBINCOUNT_FIX
+    s.count += numBits;
+#else
     s.count++;
+#endif
     s.sum   += getNumOnes( value );
   }
 
@@ -730,7 +831,11 @@ public:
   {
     SStat &s = GetStatisticEP( str );
     s.bits  += numBits;
+#if EPBINCOUNT_FIX
+    s.count += numBits;
+#else
     s.count++;
+#endif
     s.sum   += getNumOnes( value );
   }
 
@@ -738,7 +843,11 @@ public:
   {
     SStat &s = GetStatisticEP( pKey );
     s.bits  += numBits;
+#if EPBINCOUNT_FIX
+    s.count += numBits;
+#else
     s.count++;
+#endif
     s.sum   += getNumOnes( value );
   }
 
@@ -768,6 +877,132 @@ public:
     s.count++;
     s.sum   += val;
   }
+
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+  static void UpdateMaxStat(CodingStatisticsData *data)
+  {
+    SStat_max &   max = GetStatisticMax();
+    const int64_t es = CODINGSTATISTICS_ENTROPYSCALE;
+
+    int64_t countTotal = 0;
+    int64_t classCounts[STATS__NUM_STATS];
+    std::fill_n(classCounts, (size_t) STATS__NUM_STATS, 0);
+
+    int64_t cr = 0;   // CABAC remainder, which is added to "STATS__CABAC_INITIALISATION"
+
+    int64_t totalCABACbits = 0, roundedCABACbits = 0;
+    for (int i = STATS__NAL_UNIT_PACKING; i < STATS__NUM_STATS; i++)
+    {
+      int64_t classCount = 0;
+
+      for (uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++)
+      {
+        totalCABACbits += data->statistics[i][c].bits;
+        roundedCABACbits += data->statistics[i][c].bits / es;
+        classCount += data->statistics[i][c].count;
+      }
+
+      for (uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++)
+      {
+        data->statistics[i][c].classCount = classCount;
+      }
+
+      classCounts[i] = classCount;
+      countTotal += classCount;
+    }
+    int64_t remainder = totalCABACbits - roundedCABACbits * es;
+    cr = (remainder + es / 2) / es;
+
+    classCounts[0] = countTotal;
+
+    SStat cabacTotalBits, epTotalBits, cabacTrfTotalBits, epTrfTotalBits;
+
+    cabacTotalBits.classCount = countTotal;
+    epTotalBits.classCount = countTotal;
+    cabacTrfTotalBits.classCount = countTotal;
+    epTrfTotalBits.classCount    = countTotal;
+
+    // Calculate the actual bin and bit count
+    for (int i = 0; i < STATS__NUM_STATS; i++)
+    {
+      for (uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++)
+      {
+        SStat &sCABACorig = data->statistics[i][c];
+        SStat &sEP = data->statistics_ep[i][c];
+
+        if (sCABACorig.bits == 0 && sEP.bits == 0)
+        {
+          continue;
+        }
+
+        SStat sCABAC;
+        {
+          int64_t thisCABACbits = sCABACorig.bits / es;
+          if (i == STATS__CABAC_INITIALISATION && sCABACorig.bits != 0)
+          {
+            thisCABACbits += cr;
+#if EPBINCOUNT_FIX
+            sCABACorig.count = 0;
+#endif
+            cr = 0;
+          }
+          sCABAC.bits = thisCABACbits;
+          sCABAC.count = sCABACorig.count;
+          sCABAC.sum = sCABACorig.sum;
+          sCABAC.classCount = classCounts[i];
+        }
+#if EPBINCOUNT_FIX
+        if ( i == STATS__BYTE_ALIGNMENT_BITS || i == STATS__TRAILING_BITS || i == STATS__NAL_UNIT_HEADER_BITS || i == STATS__EMULATION_PREVENTION_3_BYTES )
+        {
+          sEP.count = 0;
+        }
+#endif
+
+        if( i != STATS__NAL_UNIT_TOTAL_BODY )
+        {
+          cabacTotalBits += sCABAC;
+          epTotalBits += sEP;
+
+         // For TRF
+         if ((i == STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG) || (i == STATS__CABAC_BITS__PAR_FLAG)
+             || (i == STATS__CABAC_BITS__GT1_FLAG) || (i == STATS__CABAC_BITS__GT2_FLAG)
+             || (i == STATS__CABAC_BITS__ESCAPE_BITS))
+         {
+           cabacTrfTotalBits += sCABAC;
+           epTrfTotalBits += sEP;
+         }
+        }
+      }
+    }
+
+    SStat delta_CABAC = cabacTotalBits;
+    SStat delta_EP = epTotalBits;
+    SStat delta_trf_CABAC = cabacTrfTotalBits;
+    SStat delta_trf_EP = epTrfTotalBits;
+
+    delta_CABAC -= max.prev_CABAC_state;
+    delta_EP -= max.prev_EP_state;
+
+    delta_trf_CABAC -= max.prev_trf_CABAC_state;
+    delta_trf_EP -= max.prev_trf_EP_state;
+    int64_t max_frame_bins = EPBIN_WEIGHT_FACTOR * max.max_CABAC_state.count + max.max_EP_state.count;
+    int64_t cur_frame_bins = EPBIN_WEIGHT_FACTOR * delta_CABAC.count + delta_EP.count;
+
+    if (cur_frame_bins > max_frame_bins)
+    {
+      max.max_CABAC_state = delta_CABAC;
+      max.max_EP_state = delta_EP;
+      max.trf_CABAC_state = delta_trf_CABAC;
+      max.trf_EP_state = delta_trf_EP;
+    }
+
+    max.prev_CABAC_state = cabacTotalBits;
+    max.prev_EP_state = epTotalBits;
+
+    max.prev_trf_CABAC_state = cabacTrfTotalBits;
+    max.prev_trf_EP_state = epTrfTotalBits;
+  }
+#endif
 };
 
 #endif
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index d45ca4e86b9575af64fe7806abd2d7738d985259..336daef4fdb05d365d34eff86c583b47ac09e3c8 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -67,15 +67,23 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu
   , m_cuCache ( cuCache )
   , m_puCache ( puCache )
   , m_tuCache ( tuCache )
+  , bestParent ( nullptr )
+  , tmpColorSpaceCost(MAX_DOUBLE)
+  , firstColorSpaceSelected(true)
+  , resetIBCBuffer (false)
 {
   for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
   {
     m_coeffs[ i ] = nullptr;
     m_pcmbuf[ i ] = nullptr;
-
     m_offsets[ i ] = 0;
   }
 
+  for (uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++)
+  {
+    m_runType[i] = nullptr;
+  }
+
   for( uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ )
   {
     m_cuIdx   [ i ] = nullptr;
@@ -86,7 +94,11 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu
 
   m_motionBuf     = nullptr;
   features.resize( NUM_ENC_FEATURES );
-
+  treeType = TREE_D;
+  modeType = MODE_TYPE_ALL;
+  tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
+  tmpColorSpaceIntraCost[1] = MAX_DOUBLE;
+  firstColorSpaceTestOnly = false;
 }
 
 void CodingStructure::destroy()
@@ -183,14 +195,96 @@ void CodingStructure::setDecomp(const UnitArea &_area, const bool _isCoded /*= t
   }
 }
 
+const int CodingStructure::signalModeCons( const PartSplit split, Partitioner &partitioner, const ModeType modeTypeParent ) const
+{
+  if (CS::isDualITree(*this) || modeTypeParent != MODE_TYPE_ALL || partitioner.currArea().chromaFormat == CHROMA_444 || partitioner.currArea().chromaFormat == CHROMA_400 )
+    return LDT_MODE_TYPE_INHERIT;
+  int minLumaArea = partitioner.currArea().lumaSize().area();
+  if (split == CU_QUAD_SPLIT || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT) // the area is split into 3 or 4 parts
+  {
+    minLumaArea = minLumaArea >> 2;
+  }
+  else if (split == CU_VERT_SPLIT || split == CU_HORZ_SPLIT) // the area is split into 2 parts
+  {
+    minLumaArea = minLumaArea >> 1;
+  }
+  int minChromaBlock = minLumaArea >> (getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, partitioner.currArea().chromaFormat) + getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, partitioner.currArea().chromaFormat));
+  bool is2xNChroma = (partitioner.currArea().chromaSize().width == 4 && split == CU_VERT_SPLIT) || (partitioner.currArea().chromaSize().width == 8 && split == CU_TRIV_SPLIT);
+  return minChromaBlock >= 16 && !is2xNChroma ? LDT_MODE_TYPE_INHERIT : ((minLumaArea < 32) || slice->isIntra()) ? LDT_MODE_TYPE_INFER : LDT_MODE_TYPE_SIGNAL;
+}
+
+void CodingStructure::clearCuPuTuIdxMap( const UnitArea &_area, uint32_t numCu, uint32_t numPu, uint32_t numTu, uint32_t* pOffset )
+{
+  UnitArea clippedArea = clipArea( _area, *picture );
+  uint32_t numCh = ::getNumberValidChannels( _area.chromaFormat );
+  for( uint32_t i = 0; i < numCh; i++ )
+  {
+    const CompArea &_selfBlk = area.blocks[i];
+    const CompArea     &_blk = clippedArea.blocks[i];
+
+    const UnitScale& scale = unitScale[_blk.compID];
+    const Area scaledSelf = scale.scale( _selfBlk );
+    const Area scaledBlk = scale.scale( _blk );
+    const size_t offset = rsAddr( scaledBlk.pos(), scaledSelf.pos(), scaledSelf.width );
+    unsigned *idxPtrCU = m_cuIdx[i] + offset;
+    AreaBuf<uint32_t>( idxPtrCU, scaledSelf.width, scaledBlk.size() ).fill( 0 );
+
+    unsigned *idxPtrPU = m_puIdx[i] + offset;
+    AreaBuf<uint32_t>( idxPtrPU, scaledSelf.width, scaledBlk.size() ).fill( 0 );
+
+    unsigned *idxPtrTU = m_tuIdx[i] + offset;
+    AreaBuf<uint32_t>( idxPtrTU, scaledSelf.width, scaledBlk.size() ).fill( 0 );
+  }
+
+  //pop cu/pu/tus
+  for( int i = m_numTUs; i > numTu; i-- )
+  {
+    m_tuCache.cache( tus.back() );
+    tus.pop_back();
+    m_numTUs--;
+  }
+  for( int i = m_numPUs; i > numPu; i-- )
+  {
+    m_puCache.cache( pus.back() );
+    pus.pop_back();
+    m_numPUs--;
+  }
+  for( int i = m_numCUs; i > numCu; i-- )
+  {
+    m_cuCache.cache( cus.back() );
+    cus.pop_back();
+    m_numCUs--;
+  }
+  for( int i = 0; i < 3; i++ )
+  {
+    m_offsets[i] = pOffset[i];
+  }
+}
+
+CodingUnit* CodingStructure::getLumaCU( const Position &pos )
+{
+  const ChannelType effChType = CHANNEL_TYPE_LUMA;
+  const CompArea &_blk = area.blocks[effChType];
+  CHECK( !_blk.contains( pos ), "must contain the pos" );
 
+  const unsigned idx = m_cuIdx[effChType][rsAddr( pos, _blk.pos(), _blk.width, unitScale[effChType] )];
+
+  if( idx != 0 ) return cus[idx - 1];
+  else           return nullptr;
+}
 
 CodingUnit* CodingStructure::getCU( const Position &pos, const ChannelType effChType )
 {
   const CompArea &_blk = area.blocks[effChType];
 
-  if( !_blk.contains( pos ) )
+  if( !_blk.contains( pos ) || (treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA) )
   {
+    //keep this check, which is helpful to identify bugs
+    if( treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA )
+    {
+      CHECK( parent == nullptr, "parent shall be valid; consider using function getLumaCU()" );
+      CHECK( parent->treeType != TREE_D, "wrong parent treeType " );
+    }
     if( parent ) return parent->getCU( pos, effChType );
     else         return nullptr;
   }
@@ -207,8 +301,13 @@ const CodingUnit* CodingStructure::getCU( const Position &pos, const ChannelType
 {
   const CompArea &_blk = area.blocks[effChType];
 
-  if( !_blk.contains( pos ) )
+  if( !_blk.contains( pos ) || (treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA) )
   {
+    if( treeType == TREE_C && effChType == CHANNEL_TYPE_LUMA )
+    {
+      CHECK( parent == nullptr, "parent shall be valid; consider using function getLumaCU()" );
+      CHECK( parent->treeType != TREE_D, "wrong parent treeType" );
+    }
     if( parent ) return parent->getCU( pos, effChType );
     else         return nullptr;
   }
@@ -286,9 +385,11 @@ TransformUnit* CodingStructure::getTU( const Position &pos, const ChannelType ef
           }
           else
           {
-            while( pos != tus[idx - 1 + extraIdx]->blocks[getFirstComponentOfChannel( effChType )].pos() )
+            while( !tus[idx - 1 + extraIdx]->blocks[getFirstComponentOfChannel( effChType )].contains( pos ) )
             {
               extraIdx++;
+              CHECK( tus[idx - 1 + extraIdx]->cu->treeType == TREE_C, "tu searched by position points to a chroma tree CU" );
+              CHECK( extraIdx > 3, "extraIdx > 3" );
             }
           }
         }
@@ -327,9 +428,11 @@ const TransformUnit * CodingStructure::getTU( const Position &pos, const Channel
           }
           else
           {
-            while( pos != tus[idx - 1 + extraIdx]->blocks[effChType].pos() )
+            while ( !tus[idx - 1 + extraIdx]->blocks[getFirstComponentOfChannel( effChType )].contains(pos) )
             {
               extraIdx++;
+              CHECK( tus[idx - 1 + extraIdx]->cu->treeType == TREE_C, "tu searched by position points to a chroma tree CU" );
+              CHECK( extraIdx > 3, "extraIdx > 3" );
             }
           }
         }
@@ -355,13 +458,15 @@ CodingUnit& CodingStructure::addCU( const UnitArea &unit, const ChannelType chTy
   cu->firstTU   = nullptr;
   cu->lastTU    = nullptr;
   cu->chType    = chType;
+  cu->treeType = treeType;
+  cu->modeType = modeType;
 
   CodingUnit *prevCU = m_numCUs > 0 ? cus.back() : nullptr;
 
   if( prevCU )
   {
     prevCU->next = cu;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
     CHECK( prevCU->cacheId != cu->cacheId, "Inconsintent cacheId between previous and current CU" );
 #endif
@@ -405,7 +510,7 @@ PredictionUnit& CodingStructure::addPU( const UnitArea &unit, const ChannelType
   pu->cs     = this;
   pu->cu     = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType );
   pu->chType = chType;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
   CHECK( pu->cacheId != pu->cu->cacheId, "Inconsintent cacheId between the PU and assigned CU" );
   CHECK( pu->cu->firstPU != nullptr, "Without an RQT the firstPU should be null" );
@@ -416,7 +521,7 @@ PredictionUnit& CodingStructure::addPU( const UnitArea &unit, const ChannelType
   if( prevPU && prevPU->cu == pu->cu )
   {
     prevPU->next = pu;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
     CHECK( prevPU->cacheId != pu->cacheId, "Inconsintent cacheId between previous and current PU" );
 #endif
@@ -466,7 +571,7 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c
   tu->cs     = this;
   tu->cu     = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType );
   tu->chType = chType;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
   if( tu->cu )
     CHECK( tu->cacheId != tu->cu->cacheId, "Inconsintent cacheId between the TU and assigned CU" );
@@ -479,7 +584,7 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c
   {
     prevTU->next = tu;
     tu->prev     = prevTU;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
     CHECK( prevTU->cacheId != tu->cacheId, "Inconsintent cacheId between previous and current TU" );
 #endif
@@ -501,12 +606,13 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c
 
   TCoeff *coeffs[5] = { nullptr, nullptr, nullptr, nullptr, nullptr };
   Pel    *pcmbuf[5] = { nullptr, nullptr, nullptr, nullptr, nullptr };
+  bool   *runType[5]   = { nullptr, nullptr, nullptr, nullptr, nullptr };
 
   uint32_t numCh = ::getNumberValidComponents( area.chromaFormat );
 
-  for( uint32_t i = 0; i < numCh; i++ )
+  for (uint32_t i = 0; i < numCh; i++)
   {
-    if( !tu->blocks[i].valid() )
+    if (!tu->blocks[i].valid())
     {
       continue;
     }
@@ -514,35 +620,39 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c
     if (i < ::getNumberValidChannels(area.chromaFormat))
     {
       const CompArea &_selfBlk = area.blocks[i];
-      const CompArea     &_blk = tu-> blocks[i];
+      const CompArea     &_blk = tu->blocks[i];
 
-      bool isIspTu = tu->cu != nullptr && tu->cu->ispMode && isLuma( _blk.compID );
+      bool isIspTu = tu->cu != nullptr && tu->cu->ispMode && isLuma(_blk.compID);
 
       bool isFirstIspTu = false;
-      if( isIspTu )
+      if (isIspTu)
       {
-        isFirstIspTu = CU::isISPFirst( *tu->cu, _blk, getFirstComponentOfChannel( ChannelType( i ) ) );
+        isFirstIspTu = CU::isISPFirst(*tu->cu, _blk, getFirstComponentOfChannel(ChannelType(i)));
       }
-      if( !isIspTu || isFirstIspTu )
+      if (!isIspTu || isFirstIspTu)
       {
         const UnitScale& scale = unitScale[_blk.compID];
 
-        const Area scaledSelf  = scale.scale( _selfBlk );
-        const Area scaledBlk   = isIspTu ? scale.scale( tu->cu->blocks[i] ) : scale.scale( _blk );
-        unsigned *idxPtr       = m_tuIdx[i] + rsAddr( scaledBlk.pos(), scaledSelf.pos(), scaledSelf.width );
-        CHECK( *idxPtr, "Overwriting a pre-existing value, should be '0'!" );
-        AreaBuf<uint32_t>( idxPtr, scaledSelf.width, scaledBlk.size() ).fill( idx );
+        const Area scaledSelf = scale.scale(_selfBlk);
+        const Area scaledBlk = isIspTu ? scale.scale(tu->cu->blocks[i]) : scale.scale(_blk);
+        unsigned *idxPtr = m_tuIdx[i] + rsAddr(scaledBlk.pos(), scaledSelf.pos(), scaledSelf.width);
+        CHECK(*idxPtr, "Overwriting a pre-existing value, should be '0'!");
+        AreaBuf<uint32_t>(idxPtr, scaledSelf.width, scaledBlk.size()).fill(idx);
       }
     }
 
     coeffs[i] = m_coeffs[i] + m_offsets[i];
     pcmbuf[i] = m_pcmbuf[i] + m_offsets[i];
 
+    if (i < MAX_NUM_CHANNEL_TYPE)
+    {
+      if (m_runType[i] != nullptr) runType[i] = m_runType[i] + m_offsets[i];
+    }
+
     unsigned areaSize = tu->blocks[i].area();
     m_offsets[i] += areaSize;
   }
-
-  tu->init( coeffs, pcmbuf );
+  tu->init(coeffs, pcmbuf, runType);
 
   return *tu;
 }
@@ -551,8 +661,41 @@ CUTraverser CodingStructure::traverseCUs( const UnitArea& unit, const ChannelTyp
 {
   CodingUnit* firstCU = getCU( isLuma( effChType ) ? unit.lumaPos() : unit.chromaPos(), effChType );
   CodingUnit* lastCU = firstCU;
-
+  if( !CS::isDualITree( *this ) ) //for a more generalized separate tree
+  {
+    bool bContinue = true;
+    CodingUnit* currCU = firstCU;
+    while( bContinue )
+    {
+      if( currCU == nullptr )
+      {
+        bContinue = false;
+        lastCU = currCU;
+      }
+      else if( currCU->chType != effChType )
+      {
+        lastCU = currCU;
+        currCU = currCU->next;
+      }
+      else
+      {
+        if( unit.contains( *currCU ) )
+        {
+          lastCU = currCU;
+          currCU = currCU->next;
+        }
+        else
+        {
+          bContinue = false;
+          lastCU = currCU;
+        }
+      }
+    }
+  }
+  else
+  {
   do { } while( lastCU && ( lastCU = lastCU->next ) && unit.contains( *lastCU ) );
+  }
 
   return CUTraverser( firstCU, lastCU );
 }
@@ -621,9 +764,9 @@ void CodingStructure::allocateVectorsAtPicLevel()
 
 
 
-void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer)
+void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer, const bool isPLTused)
 {
-  createInternals( UnitArea( _chromaFormat, _area ), isTopLayer );
+  createInternals(UnitArea(_chromaFormat, _area), isTopLayer, isPLTused);
 
   if( isTopLayer ) return;
 
@@ -633,9 +776,9 @@ void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _are
   m_orgr.create( area );
 }
 
-void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer)
+void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer, const bool isPLTused)
 {
-  createInternals( _unit, isTopLayer );
+  createInternals(_unit, isTopLayer, isPLTused);
 
   if( isTopLayer ) return;
 
@@ -645,7 +788,7 @@ void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer)
   m_orgr.create( area );
 }
 
-void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLayer )
+void CodingStructure::createInternals(const UnitArea& _unit, const bool isTopLayer, const bool isPLTused)
 {
   area = _unit;
 
@@ -673,7 +816,7 @@ void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLa
     m_offsets[i] = 0;
   }
 
-  if( !isTopLayer ) createCoeffs();
+  if( !isTopLayer ) createCoeffs(isPLTused);
 
   unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area();
   m_motionBuf       = new MotionInfo[_lumaAreaScaled];
@@ -705,6 +848,66 @@ void CodingStructure::addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> &
   lut.push_back(mi);
 }
 
+void CodingStructure::resetPrevPLT(PLTBuf& prevPLT)
+{
+  for (int comp = 0; comp < MAX_NUM_CHANNEL_TYPE; comp++)
+  {
+    prevPLT.curPLTSize[comp] = 0;
+  }
+
+  for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++)
+  {
+    memset(prevPLT.curPLT[comp], 0, MAXPLTPREDSIZE * sizeof(Pel));
+  }
+}
+
+void CodingStructure::reorderPrevPLT(PLTBuf& prevPLT, uint8_t curPLTSize[MAX_NUM_CHANNEL_TYPE], Pel curPLT[MAX_NUM_COMPONENT][MAXPLTSIZE], bool reuseflag[MAX_NUM_CHANNEL_TYPE][MAXPLTPREDSIZE], uint32_t compBegin, uint32_t numComp, bool jointPLT)
+{
+  Pel stuffedPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE];
+  uint8_t tempCurPLTsize[MAX_NUM_CHANNEL_TYPE];
+  uint8_t stuffPLTsize[MAX_NUM_COMPONENT];
+
+  for (int i = compBegin; i < (compBegin + numComp); i++)
+  {
+    ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+    tempCurPLTsize[comID] = curPLTSize[comID];
+    stuffPLTsize[i] = 0;
+    memcpy(stuffedPLT[i], curPLT[i], curPLTSize[comID] * sizeof(Pel));
+  }
+
+  for (int ch = compBegin; ch < (compBegin + numComp); ch++)
+  {
+    ComponentID comID = jointPLT ? (ComponentID)compBegin : ((ch > 0) ? COMPONENT_Cb : COMPONENT_Y);
+    if (ch > 1) break;
+    for (int i = 0; i < prevPLT.curPLTSize[comID]; i++)
+    {
+      if (tempCurPLTsize[comID] + stuffPLTsize[ch] >= MAXPLTPREDSIZE)
+        break;
+
+      if (!reuseflag[comID][i])
+      {
+        if (ch == COMPONENT_Y)
+        {
+          stuffedPLT[0][tempCurPLTsize[comID] + stuffPLTsize[ch]] = prevPLT.curPLT[0][i];
+        }
+        else
+        {
+          stuffedPLT[1][tempCurPLTsize[comID] + stuffPLTsize[ch]] = prevPLT.curPLT[1][i];
+          stuffedPLT[2][tempCurPLTsize[comID] + stuffPLTsize[ch]] = prevPLT.curPLT[2][i];
+        }
+        stuffPLTsize[ch]++;
+      }
+    }
+  }
+
+  for (int i = compBegin; i < (compBegin + numComp); i++)
+  {
+    ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+    prevPLT.curPLTSize[comID] = curPLTSize[comID] + stuffPLTsize[comID];
+    memcpy(prevPLT.curPLT[i], stuffedPLT[i], prevPLT.curPLTSize[comID] * sizeof(Pel));
+  }
+}
+
 void CodingStructure::rebindPicBufs()
 {
   CHECK( parent, "rebindPicBufs can only be used for the top level CodingStructure" );
@@ -722,7 +925,7 @@ void CodingStructure::rebindPicBufs()
   }
 }
 
-void CodingStructure::createCoeffs()
+void CodingStructure::createCoeffs(const bool isPLTused)
 {
   const unsigned numCh = getNumberValidComponents( area.chromaFormat );
 
@@ -733,6 +936,16 @@ void CodingStructure::createCoeffs()
     m_coeffs[i] = _area > 0 ? ( TCoeff* ) xMalloc( TCoeff, _area ) : nullptr;
     m_pcmbuf[i] = _area > 0 ? ( Pel*    ) xMalloc( Pel,    _area ) : nullptr;
   }
+
+  if (isPLTused)
+  {
+    for (unsigned i = 0; i < numCh - 1; i++)
+    {
+      unsigned _area = area.blocks[i].area();
+
+      m_runType[i] = _area > 0 ? (bool*)xMalloc(bool, _area) : nullptr;
+    }
+  }
 }
 
 void CodingStructure::destroyCoeffs()
@@ -742,6 +955,11 @@ void CodingStructure::destroyCoeffs()
     if( m_coeffs[i] ) { xFree( m_coeffs[i] ); m_coeffs[i] = nullptr; }
     if( m_pcmbuf[i] ) { xFree( m_pcmbuf[i] ); m_pcmbuf[i] = nullptr; }
   }
+
+  for (uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++)
+  {
+    if (m_runType[i]) { xFree(m_runType[i]);   m_runType[i] = nullptr; }
+  }
 }
 
 void CodingStructure::initSubStructure( CodingStructure& subStruct, const ChannelType _chType, const UnitArea &subArea, const bool &isTuEnc )
@@ -768,11 +986,14 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe
   subStruct.picture   = picture;
 
   subStruct.sps       = sps;
-#if HEVC_VPS
   subStruct.vps       = vps;
-#endif
   subStruct.pps       = pps;
-  subStruct.aps       = aps;
+  subStruct.picHeader = picHeader;
+  memcpy(subStruct.alfApss, alfApss, sizeof(alfApss));
+
+  subStruct.lmcsAps = lmcsAps;
+  subStruct.scalinglistAps = scalinglistAps;
+
   subStruct.slice     = slice;
   subStruct.baseQP    = baseQP;
   subStruct.prevQP[_chType]
@@ -783,7 +1004,12 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe
 
   subStruct.motionLut = motionLut;
 
-  subStruct.initStructData( currQP[_chType], isLossless );
+  subStruct.prevPLT = prevPLT;
+
+  subStruct.treeType  = treeType;
+  subStruct.modeType  = modeType;
+
+  subStruct.initStructData( currQP[_chType] );
 
   if( isTuEnc )
   {
@@ -844,75 +1070,8 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
 
     motionLut = subStruct.motionLut;
   }
-#if ENABLE_WPP_PARALLELISM
-
-  if( nullptr == parent )
-  {
-#pragma omp critical
-    {
-      fracBits += subStruct.fracBits;
-      dist     += subStruct.dist;
-      cost     += subStruct.cost;
-      costDbOffset += subStruct.costDbOffset;
-      if( parent )
-      {
-        // allow this to be false at the top level
-        CHECKD( !area.contains( subArea ), "Trying to use a sub-structure not contained in self" );
-      }
-
-      // copy the CUs over
-      if( subStruct.m_isTuEnc )
-      {
-        // don't copy if the substruct was created for encoding of the TUs
-      }
-      else
-      {
-        for( const auto &pcu : subStruct.cus )
-        {
-          // add an analogue CU into own CU store
-          const UnitArea &cuPatch = *pcu;
-
-          CodingUnit &cu = addCU( cuPatch, chType );
-
-          // copy the CU info from subPatch
-          cu = *pcu;
-        }
-      }
-
-      // copy the PUs over
-      if( subStruct.m_isTuEnc )
-      {
-        // don't copy if the substruct was created for encoding of the TUs
-      }
-      else
-      {
-        for( const auto &ppu : subStruct.pus )
-        {
-          // add an analogue PU into own PU store
-          const UnitArea &puPatch = *ppu;
+  prevPLT = subStruct.prevPLT;
 
-          PredictionUnit &pu = addPU( puPatch, chType );
-
-          // copy the PU info from subPatch
-          pu = *ppu;
-        }
-      }
-      // copy the TUs over
-      for( const auto &ptu : subStruct.tus )
-      {
-        // add an analogue TU into own TU store
-        const UnitArea &tuPatch = *ptu;
-
-        TransformUnit &tu = addTU( tuPatch, chType );
-
-        // copy the TU info from subPatch
-        tu = *ptu;
-      }
-    }
-
-    return;
-  }
-#endif
 
   fracBits += subStruct.fracBits;
   dist     += subStruct.dist;
@@ -935,8 +1094,7 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
     {
       // add an analogue CU into own CU store
       const UnitArea &cuPatch = *pcu;
-
-      CodingUnit &cu = addCU( cuPatch, chType );
+      CodingUnit &cu = addCU( cuPatch, pcu->chType );
 
       // copy the CU info from subPatch
       cu = *pcu;
@@ -954,8 +1112,7 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
     {
       // add an analogue PU into own PU store
       const UnitArea &puPatch = *ppu;
-
-      PredictionUnit &pu = addPU( puPatch, chType );
+      PredictionUnit &pu = addPU( puPatch, ppu->chType );
 
       // copy the PU info from subPatch
       pu = *ppu;
@@ -966,8 +1123,7 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
   {
     // add an analogue TU into own TU store
     const UnitArea &tuPatch = *ptu;
-
-    TransformUnit &tu = addTU( tuPatch, chType );
+    TransformUnit &tu = addTU( tuPatch, ptu->chType );
 
     // copy the TU info from subPatch
     tu = *ptu;
@@ -1026,6 +1182,7 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel
 
     motionLut = other.motionLut;
   }
+  prevPLT = other.prevPLT;
 
   if( copyTUs )
   {
@@ -1079,7 +1236,7 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel
   }
 }
 
-void CodingStructure::initStructData( const int &QP, const bool &_isLosses, const bool &skipMotBuf )
+void CodingStructure::initStructData( const int &QP, const bool &skipMotBuf )
 {
   clearPUs();
   clearTUs();
@@ -1088,7 +1245,6 @@ void CodingStructure::initStructData( const int &QP, const bool &_isLosses, cons
   if( QP < MAX_INT )
   {
     currQP[0] = currQP[1] = QP;
-    isLossless            = _isLosses;
   }
 
   if (!skipMotBuf && (!parent || ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && !m_isTuEnc)))
@@ -1338,15 +1494,14 @@ const CPelUnitBuf CodingStructure::getBuf( const UnitArea &unit, const PictureTy
 const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const CodingUnit& curCu, const ChannelType _chType ) const
 {
   const CodingUnit* cu = getCU( pos, _chType );
-#if HEVC_TILES_WPP
   // exists       same slice and tile                  cu precedes curCu in encoding order
   //                                                  (thus, is either from parent CS in RD-search or its index is lower)
-  if( cu && CU::isSameSliceAndTile( *cu, curCu ) && ( cu->cs != curCu.cs || cu->idx <= curCu.idx ) )
-#else
-  // exists       same slice                          cu precedes curCu in encoding order
-  //                                                  (thus, is either from parent CS in RD-search or its index is lower)
-  if(cu && CU::isSameSlice(*cu, curCu) && (cu->cs != curCu.cs || cu->idx <= curCu.idx))
-#endif
+  const bool wavefrontsEnabled = curCu.slice->getPPS()->getEntropyCodingSyncEnabledFlag();
+  int ctuSizeBit = floorLog2(curCu.cs->sps->getMaxCUWidth());
+  int xNbY  = pos.x << getChannelTypeScaleX( _chType, curCu.chromaFormat );
+  int xCurr = curCu.blocks[_chType].x << getChannelTypeScaleX( _chType, curCu.chromaFormat );
+  bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true;
+  if( cu && CU::isSameSliceAndTile( *cu, curCu ) && ( cu->cs != curCu.cs || cu->idx <= curCu.idx ) && addCheck)
   {
     return cu;
   }
@@ -1356,32 +1511,28 @@ const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const C
   }
 }
 
-#if HEVC_TILES_WPP
-const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType ) const
+const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const Position curPos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType ) const
 {
   const CodingUnit* cu = getCU( pos, _chType );
-  return ( cu && cu->slice->getIndependentSliceIdx() == curSliceIdx && cu->tileIdx == curTileIdx ) ? cu : nullptr;
-}
-#else
-const CodingUnit* CodingStructure::getCURestricted(const Position &pos, const unsigned curSliceIdx, const ChannelType _chType) const
-{
-  const CodingUnit* cu = getCU(pos, _chType);
-  return (cu && cu->slice->getIndependentSliceIdx() == curSliceIdx ) ? cu : nullptr;
+  const bool wavefrontsEnabled = this->slice->getPPS()->getEntropyCodingSyncEnabledFlag();
+  int ctuSizeBit = floorLog2(this->sps->getMaxCUWidth());
+  int xNbY  = pos.x << getChannelTypeScaleX( _chType, this->area.chromaFormat );
+  int xCurr = curPos.x << getChannelTypeScaleX( _chType, this->area.chromaFormat );
+  bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true;
+  return ( cu && cu->slice->getIndependentSliceIdx() == curSliceIdx && cu->tileIdx == curTileIdx && addCheck ) ? cu : nullptr;
 }
-#endif
 
 const PredictionUnit* CodingStructure::getPURestricted( const Position &pos, const PredictionUnit& curPu, const ChannelType _chType ) const
 {
   const PredictionUnit* pu = getPU( pos, _chType );
-#if HEVC_TILES_WPP
   // exists       same slice and tile                  pu precedes curPu in encoding order
   //                                                  (thus, is either from parent CS in RD-search or its index is lower)
-  if( pu && CU::isSameSliceAndTile( *pu->cu, *curPu.cu ) && ( pu->cs != curPu.cs || pu->idx <= curPu.idx ) )
-#else
-  // exists       same slice                           pu precedes curPu in encoding order
-  //                                                  (thus, is either from parent CS in RD-search or its index is lower)
-  if(pu && CU::isSameSlice(*pu->cu, *curPu.cu) && (pu->cs != curPu.cs || pu->idx <= curPu.idx))
-#endif
+  const bool wavefrontsEnabled = curPu.cu->slice->getPPS()->getEntropyCodingSyncEnabledFlag();
+  int ctuSizeBit = floorLog2(curPu.cs->sps->getMaxCUWidth());
+  int xNbY  = pos.x << getChannelTypeScaleX( _chType, curPu.chromaFormat );
+  int xCurr = curPu.blocks[_chType].x << getChannelTypeScaleX( _chType, curPu.chromaFormat );
+  bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true;
+  if( pu && CU::isSameSliceAndTile( *pu->cu, *curPu.cu ) && ( pu->cs != curPu.cs || pu->idx <= curPu.idx ) && addCheck )
   {
     return pu;
   }
@@ -1394,15 +1545,14 @@ const PredictionUnit* CodingStructure::getPURestricted( const Position &pos, con
 const TransformUnit* CodingStructure::getTURestricted( const Position &pos, const TransformUnit& curTu, const ChannelType _chType ) const
 {
   const TransformUnit* tu = getTU( pos, _chType );
-#if HEVC_TILES_WPP
   // exists       same slice and tile                  tu precedes curTu in encoding order
   //                                                  (thus, is either from parent CS in RD-search or its index is lower)
-  if( tu && CU::isSameSliceAndTile( *tu->cu, *curTu.cu ) && ( tu->cs != curTu.cs || tu->idx <= curTu.idx ) )
-#else
-  // exists       same slice                           tu precedes curTu in encoding order
-  //                                                  (thus, is either from parent CS in RD-search or its index is lower)
-  if(tu && CU::isSameSlice(*tu->cu, *curTu.cu) && (tu->cs != curTu.cs || tu->idx <= curTu.idx))
-#endif
+  const bool wavefrontsEnabled = curTu.cu->slice->getPPS()->getEntropyCodingSyncEnabledFlag();
+  int ctuSizeBit = floorLog2(curTu.cs->sps->getMaxCUWidth());
+  int xNbY  = pos.x << getChannelTypeScaleX( _chType, curTu.chromaFormat );
+  int xCurr = curTu.blocks[_chType].x << getChannelTypeScaleX( _chType, curTu.chromaFormat );
+  bool addCheck = (wavefrontsEnabled && (xNbY >> ctuSizeBit) >= (xCurr >> ctuSizeBit) + 1 ) ? false : true;
+  if( tu && CU::isSameSliceAndTile( *tu->cu, *curTu.cu ) && ( tu->cs != curTu.cs || tu->idx <= curTu.idx ) && addCheck )
   {
     return tu;
   }
@@ -1412,36 +1562,3 @@ const TransformUnit* CodingStructure::getTURestricted( const Position &pos, cons
   }
 }
 
-IbcLumaCoverage CodingStructure::getIbcLumaCoverage(const CompArea& chromaArea) const
-{
-  CHECK(chType != CHANNEL_TYPE_CHROMA, "Error");
-
-  const unsigned int unitAreaSubBlock = MIN_PU_SIZE * MIN_PU_SIZE;
-  CompArea lumaArea = CompArea(COMPONENT_Y, chromaArea.chromaFormat, chromaArea.lumaPos(), recalcSize(chromaArea.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size()));
-  lumaArea = clipArea(lumaArea, picture->block(COMPONENT_Y));
-  const unsigned int fullArea = lumaArea.area();
-  unsigned int ibcArea = 0;
-  for (SizeType y = 0; y < lumaArea.height; y += MIN_PU_SIZE)
-  {
-    for (SizeType x = 0; x < lumaArea.width; x += MIN_PU_SIZE)
-    {
-      Position pos = lumaArea.offset(x, y);
-      if (picture->cs->getMotionInfo(pos).isInter) // need to change if inter slice allows dualtree
-      {
-        ibcArea += unitAreaSubBlock;
-      }
-    }
-  }
-
-  IbcLumaCoverage coverage = IBC_LUMA_COVERAGE_FULL;
-  if (ibcArea == 0)
-  {
-    coverage = IBC_LUMA_COVERAGE_NONE;
-  }
-  else if (ibcArea < fullArea)
-  {
-    coverage = IBC_LUMA_COVERAGE_PARTIAL;
-  }
-
-  return coverage;
-}
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index 99556f67dccb85c6a7167cac72e6fb1c3e162d34..317e330ba33e86f62432527e1ab5182a0c0ad072 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -57,15 +57,11 @@ enum PictureType
   PIC_PREDICTION,
   PIC_RESIDUAL,
   PIC_ORG_RESI,
+  PIC_RECON_WRAP,
+  PIC_ORIGINAL_INPUT,
+  PIC_TRUE_ORIGINAL_INPUT,
   NUM_PIC_TYPES
 };
-enum IbcLumaCoverage
-{
-  IBC_LUMA_COVERAGE_FULL = 0,
-  IBC_LUMA_COVERAGE_PARTIAL,
-  IBC_LUMA_COVERAGE_NONE,
-  NUM_IBC_LUMA_COVERAGE,
-};
 extern XUCache g_globalUnitCache;
 
 // ---------------------------------------------------------------------------
@@ -84,31 +80,31 @@ public:
   Slice           *slice;
 
   UnitScale        unitScale[MAX_NUM_COMPONENT];
-  ChannelType chType;
 
   int         baseQP;
   int         prevQP[MAX_NUM_CHANNEL_TYPE];
   int         currQP[MAX_NUM_CHANNEL_TYPE];
   int         chromaQpAdj;
-  Position    sharedBndPos;
-  Size        sharedBndSize;
-  bool        isLossless;
   const SPS *sps;
   const PPS *pps;
-  APS *      aps;
-#if HEVC_VPS
+  PicHeader *picHeader;
+  APS*       alfApss[ALF_CTB_MAX_NUM_APS];
+  APS *      lmcsAps;
+  APS *      scalinglistAps;
   const VPS *vps;
-#endif
   const PreCalcValues* pcv;
 
   CodingStructure(CUCache&, PUCache&, TUCache&);
-  void create( const UnitArea &_unit, const bool isTopLayer );
-  void create( const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer );
+
+  void create(const UnitArea &_unit, const bool isTopLayer, const bool isPLTused);
+  void create(const ChromaFormat &_chromaFormat, const Area& _area, const bool isTopLayer, const bool isPLTused);
+
   void destroy();
   void releaseIntermediateData();
 
   void rebindPicBufs();
-  void createCoeffs();
+
+  void createCoeffs(const bool isPLTused);
   void destroyCoeffs();
 
   void allocateVectorsAtPicLevel();
@@ -127,6 +123,7 @@ public:
   const TransformUnit  *getTU(const Position &pos, const ChannelType _chType, const int subTuIdx = -1) const;
 
   CodingUnit     *getCU(const Position &pos, const ChannelType _chType);
+  CodingUnit     *getLumaCU( const Position &pos );
   PredictionUnit *getPU(const Position &pos, const ChannelType _chType);
   TransformUnit  *getTU(const Position &pos, const ChannelType _chType, const int subTuIdx = -1);
 
@@ -138,11 +135,7 @@ public:
   PredictionUnit *getPU(const ChannelType &_chType ) { return getPU(area.blocks[_chType].pos(), _chType); }
   TransformUnit  *getTU(const ChannelType &_chType ) { return getTU(area.blocks[_chType].pos(), _chType); }
 
-#if HEVC_TILES_WPP
-  const CodingUnit     *getCURestricted(const Position &pos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType) const;
-#else
-  const CodingUnit     *getCURestricted(const Position &pos, const unsigned curSliceIdx,                            const ChannelType _chType) const;
-#endif
+  const CodingUnit     *getCURestricted(const Position &pos, const Position curPos, const unsigned curSliceIdx, const unsigned curTileIdx, const ChannelType _chType) const;
   const CodingUnit     *getCURestricted(const Position &pos, const CodingUnit& curCu,                               const ChannelType _chType) const;
   const PredictionUnit *getPURestricted(const Position &pos, const PredictionUnit& curPu,                           const ChannelType _chType) const;
   const TransformUnit  *getTURestricted(const Position &pos, const TransformUnit& curTu,                            const ChannelType _chType) const;
@@ -158,7 +151,6 @@ public:
   cCUTraverser    traverseCUs(const UnitArea& _unit, const ChannelType _chType) const;
   cPUTraverser    traversePUs(const UnitArea& _unit, const ChannelType _chType) const;
   cTUTraverser    traverseTUs(const UnitArea& _unit, const ChannelType _chType) const;
-  IbcLumaCoverage getIbcLumaCoverage(const CompArea& chromaArea) const;
   // ---------------------------------------------------------------------------
   // encoding search utilities
   // ---------------------------------------------------------------------------
@@ -172,8 +164,10 @@ public:
   uint64_t      fracBits;
   Distortion  dist;
   Distortion  interHad;
+  TreeType    treeType; //because partitioner can not go deep to tu and cu coding (e.g., addCU()), need another variable for indicating treeType
+  ModeType    modeType;
 
-  void initStructData  (const int &QP = MAX_INT, const bool &_isLosses = false, const bool &skipMotBuf = false);
+  void initStructData  (const int &QP = MAX_INT, const bool &skipMotBuf = false);
   void initSubStructure(      CodingStructure& cs, const ChannelType chType, const UnitArea &subArea, const bool &isTuEnc);
 
   void copyStructure   (const CodingStructure& cs, const ChannelType chType, const bool copyTUs = false, const bool copyRecoBuffer = false);
@@ -183,10 +177,17 @@ public:
   void clearTUs();
   void clearPUs();
   void clearCUs();
+  const int signalModeCons( const PartSplit split, Partitioner &partitioner, const ModeType modeTypeParent ) const;
+  void clearCuPuTuIdxMap  ( const UnitArea &_area, uint32_t numCu, uint32_t numPu, uint32_t numTu, uint32_t* pOffset );
+  void getNumCuPuTuOffset ( uint32_t* pArray )
+  {
+    pArray[0] = m_numCUs;     pArray[1] = m_numPUs;     pArray[2] = m_numTUs;
+    pArray[3] = m_offsets[0]; pArray[4] = m_offsets[1]; pArray[5] = m_offsets[2];
+  }
 
 
 private:
-  void createInternals(const UnitArea& _unit, const bool isTopLayer);
+  void createInternals(const UnitArea& _unit, const bool isTopLayer, const bool isPLTused);
 
 public:
 
@@ -198,6 +199,10 @@ public:
 
   void addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS>& lut, const MotionInfo &mi);
 
+  PLTBuf prevPLT;
+  void resetPrevPLT(PLTBuf& prevPLT);
+  void reorderPrevPLT(PLTBuf& prevPLT, uint8_t curPLTSize[MAX_NUM_CHANNEL_TYPE], Pel curPLT[MAX_NUM_COMPONENT][MAXPLTSIZE], bool reuseflag[MAX_NUM_CHANNEL_TYPE][MAXPLTPREDSIZE], uint32_t compBegin, uint32_t numComp, bool jointPLT);
+
 private:
 
   // needed for TU encoding
@@ -225,12 +230,18 @@ private:
 
   TCoeff *m_coeffs [ MAX_NUM_COMPONENT ];
   Pel    *m_pcmbuf [ MAX_NUM_COMPONENT ];
-
+  bool   *m_runType[ MAX_NUM_CHANNEL_TYPE ];
   int     m_offsets[ MAX_NUM_COMPONENT ];
 
   MotionInfo *m_motionBuf;
 
 public:
+  CodingStructure *bestParent;
+  double        tmpColorSpaceCost;
+  bool          firstColorSpaceSelected;
+  double        tmpColorSpaceIntraCost[2];
+  bool          firstColorSpaceTestOnly;
+  bool resetIBCBuffer;
 
   MotionBuf getMotionBuf( const     Area& _area );
   MotionBuf getMotionBuf( const UnitArea& _area ) { return getMotionBuf( _area.Y() ); }
@@ -262,6 +273,7 @@ public:
   const CPelBuf       getRecoBuf(const CompArea &blk) const;
          PelUnitBuf   getRecoBuf(const UnitArea &unit);
   const CPelUnitBuf   getRecoBuf(const UnitArea &unit) const;
+         PelUnitBuf&  getRecoBufRef() { return m_reco; }
 
          PelBuf       getOrgResiBuf(const CompArea &blk);
   const CPelBuf       getOrgResiBuf(const CompArea &blk) const;
@@ -314,8 +326,5 @@ private:
 
 static inline uint32_t getNumberValidTBlocks(const PreCalcValues& pcv) { return (pcv.chrFormat==CHROMA_400) ? 1 : ( pcv.multiBlock422 ? MAX_NUM_TBLOCKS : MAX_NUM_COMPONENT ); }
 
-inline unsigned toWSizeIdx( const CodingStructure* cs ) { return gp_sizeIdxInfo->idxFrom( cs->area.lwidth() ); }
-inline unsigned toHSizeIdx( const CodingStructure* cs ) { return gp_sizeIdxInfo->idxFrom( cs->area.lheight() ); }
-
 #endif
 
diff --git a/source/Lib/CommonLib/Common.h b/source/Lib/CommonLib/Common.h
index 81c92278b2bd60a59c558424bcdbedf0f1defb5b..9d30b8393ea28a4b619d2c3b9d3d2d256b33bcd3 100644
--- a/source/Lib/CommonLib/Common.h
+++ b/source/Lib/CommonLib/Common.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 6d37b49542cebf6ca0ff73f6b19249b37067d5fb..f24085c106c07b6c6028cd2bd81a9fcccf32d1f8 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -58,6 +58,8 @@
 #if _MSC_VER < 1900
 #error "MS Visual Studio version not supported. Please upgrade to Visual Studio 2015 or higher (or use other compilers)"
 #endif
+
+#include <intrin.h>
 #endif
 
 //! \ingroup CommonLib
@@ -143,11 +145,13 @@ static const int MAX_NUM_REF =                                     16; ///< max.
 static const int MAX_QP =                                          63;
 static const int NOT_VALID =                                       -1;
 
+
 static const int AMVP_MAX_NUM_CANDS =                               2; ///< AMVP: advanced motion vector prediction - max number of final candidates
 static const int AMVP_MAX_NUM_CANDS_MEM =                           3; ///< AMVP: advanced motion vector prediction - max number of candidates
 static const int AMVP_DECIMATION_FACTOR =                           2;
 static const int MRG_MAX_NUM_CANDS =                                6; ///< MERGE
 static const int AFFINE_MRG_MAX_NUM_CANDS =                         5; ///< AFFINE MERGE
+static const int IBC_MRG_MAX_NUM_CANDS =                            6; ///< IBC MERGE
 
 static const int MAX_TLAYER =                                       7; ///< Explicit temporal layer QP offset - max number of temporal layer
 
@@ -164,15 +168,33 @@ static const int MAX_NUM_PICS_IN_SOP =                           1024;
 static const int MAX_NESTING_NUM_OPS =                           1024;
 static const int MAX_NESTING_NUM_LAYER =                           64;
 
-#if HEVC_VPS
 static const int MAX_VPS_NUM_HRD_PARAMETERS =                       1;
-static const int MAX_VPS_OP_SETS_PLUS1 =                         1024;
-static const int MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1 =         1;
-#endif
-
+static const int MAX_VPS_LAYERS =                                  64;
+static const int MAX_VPS_SUBLAYERS =                                7;
+static const int MAX_NUM_REF_LAYERS =                               7;
+static const int MAX_NUM_OLSS =                                   256;
+static const int MAX_VPS_OLS_MODE_IDC =                             2;
 static const int MAXIMUM_INTRA_FILTERED_WIDTH =                    16;
 static const int MAXIMUM_INTRA_FILTERED_HEIGHT =                   16;
 
+static const int MIP_MAX_WIDTH =                                   MAX_TB_SIZEY;
+static const int MIP_MAX_HEIGHT =                                  MAX_TB_SIZEY;
+
+static const int MAX_NUM_ALF_ALTERNATIVES_CHROMA =                  8;
+static const int MAX_NUM_ALF_CLASSES         =                     25;
+static const int MAX_NUM_ALF_LUMA_COEFF      =                     13;
+static const int MAX_NUM_ALF_CHROMA_COEFF    =                      7;
+static const int MAX_ALF_FILTER_LENGTH       =                      7;
+static const int MAX_NUM_ALF_COEFF           =                     MAX_ALF_FILTER_LENGTH * MAX_ALF_FILTER_LENGTH / 2 + 1;
+static const int MAX_ALF_PADDING_SIZE        =                      4;
+
+static const int ALF_FIXED_FILTER_NUM        =                     64;
+static const int ALF_CTB_MAX_NUM_APS         =                      8;
+static const int NUM_FIXED_FILTER_SETS       =                     16;
+static const int NUM_TOTAL_FILTER_SETS       =                     NUM_FIXED_FILTER_SETS + ALF_CTB_MAX_NUM_APS;
+
+
+static const int MAX_BDOF_APPLICATION_REGION =                     16;
 
 static const int MAX_CPB_CNT =                                     32; ///< Upper bound of (cpb_cnt_minus1 + 1)
 static const int MAX_NUM_LAYER_IDS =                               64;
@@ -182,31 +204,37 @@ static const int CU_DQP_EG_k =                                      0; ///< expg
 
 static const int SBH_THRESHOLD =                                    4; ///< value of the fixed SBH controlling threshold
 
-static const int C1FLAG_NUMBER =                                    8; ///< maximum number of largerThan1 flag coded in one chunk: 16 in HM5
-static const int C2FLAG_NUMBER =                                    1; ///< maximum number of largerThan2 flag coded in one chunk: 16 in HM5
-
 static const int MAX_NUM_VPS =                                     16;
+static const int MAX_NUM_DPS =                                     16;
 static const int MAX_NUM_SPS =                                     16;
 static const int MAX_NUM_PPS =                                     64;
 static const int MAX_NUM_APS =                                     32;  //Currently APS ID has 5 bits
+static const int NUM_APS_TYPE_LEN =                                 3;  //Currently APS Type has 3 bits
+static const int MAX_NUM_APS_TYPE =                                 8;  //Currently APS Type has 3 bits so the max type is 8
 
+static const int MAX_TILE_COLS =                                   20;  ///< Maximum number of tile columns
+static const int MAX_TILE_ROWS =                                   22;  ///< Maximum number of tile rows
+static const int MAX_TILES =            MAX_TILE_COLS * MAX_TILE_ROWS;  ///< Maximum number of tiles
+static const int MAX_SLICES =                                     600;  ///< Maximum number of slices per picture
 static const int MLS_GRP_NUM =                                   1024; ///< Max number of coefficient groups, max(16, 256)
 
 static const int MLS_CG_SIZE =                                      4; ///< Coefficient group size of 4x4; = MLS_CG_LOG2_WIDTH + MLS_CG_LOG2_HEIGHT
 
-static const int ADJ_QUANT_SHIFT =                                  7;
-static const int ADJ_DEQUANT_SHIFT =            ( ADJ_QUANT_SHIFT + 1 );
 
 static const int RVM_VCEGAM10_M =                                   4;
 
 static const int MAX_REF_LINE_IDX =                                 3; //highest refLine offset in the list
 static const int MRL_NUM_REF_LINES =                                3; //number of candidates in the array
-static const int MULTI_REF_LINE_IDX[4] =               { 0, 1, 3, 0 };
+static const int MULTI_REF_LINE_IDX[4] =               { 0, 1, 2, 0 };
+
+static const int PRED_REG_MIN_WIDTH =                               4;  // Minimum prediction region width for ISP subblocks
 
 static const int NUM_LUMA_MODE =                                   67; ///< Planar + DC + 65 directional mode (4*16 + 1)
 static const int NUM_LMC_MODE =                                    1 + 2; ///< LMC + MDLM_T + MDLM_L
 static const int NUM_INTRA_MODE = (NUM_LUMA_MODE + NUM_LMC_MODE);
 
+static const int NUM_EXT_LUMA_MODE =                               28;
+
 static const int NUM_DIR =           (((NUM_LUMA_MODE - 3) >> 2) + 1);
 static const int PLANAR_IDX =                                       0; ///< index for intra PLANAR mode
 static const int DC_IDX =                                           1; ///< index for intra DC     mode
@@ -214,6 +242,7 @@ static const int HOR_IDX =                    (1 * (NUM_DIR - 1) + 2); ///< inde
 static const int DIA_IDX =                    (2 * (NUM_DIR - 1) + 2); ///< index for intra DIAGONAL   mode
 static const int VER_IDX =                    (3 * (NUM_DIR - 1) + 2); ///< index for intra VERTICAL   mode
 static const int VDIA_IDX =                   (4 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL  mode
+static const int BDPCM_IDX =                  (5 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL  mode
 static const int NOMODE_IDX =                               MAX_UCHAR; ///< indicating uninitialized elements
 
 static const int NUM_CHROMA_MODE = (5 + NUM_LMC_MODE); ///< total number of chroma modes
@@ -222,21 +251,21 @@ static const int MDLM_L_IDX =                          LM_CHROMA_IDX + 1; ///< M
 static const int MDLM_T_IDX =                          LM_CHROMA_IDX + 2; ///< MDLM_T
 static const int DM_CHROMA_IDX =                       NUM_INTRA_MODE; ///< chroma mode index for derived from luma intra mode
 
-static const uint8_t INTER_MODE_IDX =                               255; ///< index for inter modes
-
 static const uint32_t  NUM_TRAFO_MODES_MTS =                            6; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128
 static const uint32_t  MTS_INTRA_MAX_CU_SIZE =                         32; ///< Max Intra CU size applying EMT, supported values: 8, 16, 32, 64, 128
 static const uint32_t  MTS_INTER_MAX_CU_SIZE =                         32; ///< Max Inter CU size applying EMT, supported values: 8, 16, 32, 64, 128
 static const int NUM_MOST_PROBABLE_MODES = 6;
 static const int LM_SYMBOL_NUM = (1 + NUM_LMC_MODE);
 
-static const int FAST_UDI_MAX_RDMODE_NUM =              NUM_LUMA_MODE; ///< maximum number of RD comparison in fast-UDI estimation loop
+static const int MAX_NUM_MIP_MODE =                                32; ///< maximum number of MIP pred. modes
+static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE); ///< maximum number of RD comparison in fast-UDI estimation loop
 
-static const int MDCS_ANGLE_LIMIT =                                 9; ///< 0 = Horizontal/vertical only, 1 = Horizontal/vertical +/- 1, 2 = Horizontal/vertical +/- 2 etc...
+static const int MAX_LFNST_COEF_NUM =                              16;
 
-static const int MDCS_MAXIMUM_WIDTH =                               8; ///< (measured in pixels) TUs with width greater than this can only use diagonal scan
-static const int MDCS_MAXIMUM_HEIGHT =                              8; ///< (measured in pixels) TUs with height greater than this can only use diagonal scan
+static const int LFNST_LAST_SIG_LUMA =                              1;
+static const int LFNST_LAST_SIG_CHROMA =                            1;
 
+static const int NUM_LFNST_NUM_PER_SET =                            3;
 
 static const int LOG2_MAX_NUM_COLUMNS_MINUS1 =                      7;
 static const int LOG2_MAX_NUM_ROWS_MINUS1 =                         7;
@@ -250,18 +279,20 @@ static const int LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL = 1 << MV
 static const int LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS = 1 << MV_FRACTIONAL_BITS_INTERNAL;
 static const int CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS = 1 << (MV_FRACTIONAL_BITS_INTERNAL + 1);
 
+static const int MAX_NUM_SUB_PICS =                               255;
 static const int MAX_NUM_LONG_TERM_REF_PICS =                      33;
 static const int NUM_LONG_TERM_REF_PIC_SPS =                        0;
 
 
 static const int MAX_QP_OFFSET_LIST_SIZE =                          6; ///< Maximum size of QP offset list is 6 entries
+static const int MAX_NUM_CQP_MAPPING_TABLES =                       3; ///< Maximum number of chroma QP mapping tables (Cb, Cr and joint Cb-Cr)
+static const int MIN_QP_VALUE_FOR_16_BIT   =                      -48; ////< Minimum value for QP (-6*(bitdepth - 8) ) for bit depth 16 ; actual minimum QP value is bit depth dependent
+static const int MAX_NUM_QP_VALUES =    MAX_QP + 1 - MIN_QP_VALUE_FOR_16_BIT; ////< Maximum number of QP values possible - bit depth dependent
 
 // Cost mode support
 static const int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP =      0; ///< QP to use for lossless coding.
 static const int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME =4; ///< QP' to use for mixed_lossy_lossless coding.
 
-static const int CR_FROM_CB_REG_COST_SHIFT                        = 9;
-
 static const int RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS =     4;
 
 static const int RExt__PREDICTION_WEIGHTING_ANALYSIS_DC_PRECISION = 0; ///< Additional fixed bit precision used during encoder-side weighting prediction analysis. Currently only used when high_precision_prediction_weighting_flag is set, for backwards compatibility reasons.
@@ -273,7 +304,8 @@ static const int MAX_CU_SIZE =                        1<<MAX_CU_DEPTH;
 static const int MIN_CU_LOG2 =                                      2;
 static const int MIN_PU_SIZE =                                      4;
 static const int MAX_NUM_PARTS_IN_CTU =                         ( ( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 ) );
-static const int MAX_LOG2_DIFF_CU_TR_SIZE =                         2;
+static const int MAX_NUM_TUS =                                     16; ///< Maximum number of TUs within one CU. When max TB size is 32x32, up to 16 TUs within one CU (128x128) is supported
+static const int MAX_LOG2_DIFF_CU_TR_SIZE =                         3;
 static const int MAX_CU_TILING_PARTITIONS = 1 << ( MAX_LOG2_DIFF_CU_TR_SIZE << 1 );
 
 static const int JVET_C0024_ZERO_OUT_TH =                          32;
@@ -283,9 +315,12 @@ static const int SCALING_LIST_REM_NUM =                             6;
 
 static const int QUANT_SHIFT =                                     14; ///< Q(4) = 2^14
 static const int IQUANT_SHIFT =                                     6;
-static const int SCALE_BITS =                                      15; ///< Precision for fractional bit estimates
 
-static const int SCALING_LIST_NUM = MAX_NUM_COMPONENT * NUMBER_OF_PREDICTION_MODES; ///< list number for quantization matrix
+static constexpr int    SCALE_BITS      = 15;   // Precision for fractional bit estimates
+static constexpr double FRAC_BITS_SCALE = 1.0 / (1 << SCALE_BITS);
+
+static constexpr int SCALING_LIST_PRED_MODES = 2;
+static const int SCALING_LIST_NUM = MAX_NUM_COMPONENT * SCALING_LIST_PRED_MODES; ///< list number for quantization matrix
 
 static const int SCALING_LIST_START_VALUE =                         8; ///< start value for dpcm mode
 static const int MAX_MATRIX_COEF_NUM =                             64; ///< max coefficient number for quantization matrix
@@ -294,13 +329,10 @@ static const int SCALING_LIST_BITS =                                8; ///< bit
 static const int LOG2_SCALING_LIST_NEUTRAL_VALUE =                  4; ///< log2 of the value that, when used in a scaling list, has no effect on quantisation
 static const int SCALING_LIST_DC =                                 16; ///< default DC value
 
-static const int CONTEXT_STATE_BITS =                               6;
 static const int LAST_SIGNIFICANT_GROUPS =                         14;
-static const int MAX_GR_ORDER_RESIDUAL =                           10;
 
 static const int AFFINE_MIN_BLOCK_SIZE =                            4; ///< Minimum affine MC block size
 
-
 static const int MMVD_REFINE_STEP =                                 8; ///< max number of distance step
 static const int MMVD_MAX_REFINE_NUM =                              (MMVD_REFINE_STEP * 4); ///< max number of candidate from a base candidate
 static const int MMVD_BASE_MV_NUM =                                 2; ///< max number of base candidate
@@ -308,20 +340,23 @@ static const int MMVD_ADD_NUM =                                     (MMVD_MAX_RE
 static const int MMVD_MRG_MAX_RD_NUM =                              MRG_MAX_NUM_CANDS;
 static const int MMVD_MRG_MAX_RD_BUF_NUM =                          (MMVD_MRG_MAX_RD_NUM + 1);///< increase buffer size by 1
 
-static const int MAX_NUM_REG_BINS_4x4SUBBLOCK =                    32; ///< max number of context-coded bins (incl. gt2 bins) per 4x4 subblock
-static const int MAX_NUM_GT2_BINS_4x4SUBBLOCK =                     4; ///< max number of gt2 bins per 4x4 subblock
-static const int MAX_NUM_REG_BINS_2x2SUBBLOCK =                     8; ///< max number of context-coded bins (incl. gt2 bins) per 2x2 subblock (chroma)
-static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK =                     2; ///< max number of gt2 bins per 2x2 subblock (chroma)
+static const int MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA =      28;
+static const int MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA =    28;
 
 static const int BIO_EXTEND_SIZE              =                     1;
 static const int BIO_TEMP_BUFFER_SIZE         =                     (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE) * (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE);
 
-static const int GBI_NUM =                                          5; ///< the number of weight options
-static const int GBI_DEFAULT =                                      ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5
-static const int GBI_SIZE_CONSTRAINT =                            256; ///< disabling GBi if cu size is smaller than 256
+static const int PROF_BORDER_EXT_W            =                     1;
+static const int PROF_BORDER_EXT_H            =                     1;
+static const int BCW_NUM =                                          5; ///< the number of weight options
+static const int BCW_DEFAULT =                                      ((uint8_t)(BCW_NUM >> 1)); ///< Default weighting index representing for w=0.5
+static const int BCW_SIZE_CONSTRAINT =                            256; ///< disabling Bcw if cu size is smaller than 256
 static const int MAX_NUM_HMVP_CANDS =                              (MRG_MAX_NUM_CANDS-1); ///< maximum number of HMVP candidates to be stored and used in merge list
 static const int MAX_NUM_HMVP_AVMPCANDS =                          4; ///< maximum number of HMVP candidates to be used in AMVP list
 
+static const int ALF_VB_POS_ABOVE_CTUROW_LUMA = 4;
+static const int ALF_VB_POS_ABOVE_CTUROW_CHMA = 2;
+
 #if W0038_DB_OPT
 static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS =           8 ;
 #endif
@@ -341,25 +376,20 @@ static const int DMVR_NUM_ITERATION = 2;
 //for I slice luma CTB configuration para.
 static const int    MAX_BT_DEPTH  =                                 4;      ///<  <=7
 static const int    MAX_BT_SIZE   =                                32;      ///<  [1<<MIN_QT_SIZE, 1<<CTU_LOG2]
-static const int    MIN_BT_SIZE   =                                 4;      ///<  can be set down to 1<<MIN_CU_LOG2
 
 static const int    MAX_TT_SIZE   =                                32;      ///<  [1<<MIN_QT_SIZE, 1<<CTU_LOG2]
 static const int    MAX_TT_SIZE_C =                                32;      ///<  [1<<MIN_QT_SIZE, 1<<CTU_LOG2]
-static const int    MIN_TT_SIZE   =                                 4;      ///<  can be set down to 1<<MIN_CU_LOG2
-static const int    MIN_TT_SIZE_C =                                 4;      ///<  can be set down to 1<<MIN_CU_LOG2
                                                                             //for P/B slice CTU config. para.
 static const int    MAX_BT_DEPTH_INTER =                            4;      ///< <=7
 static const int    MAX_BT_SIZE_INTER  =                          128;      ///< for initialization, [1<<MIN_BT_SIZE_INTER, 1<<CTU_LOG2]
-static const int    MIN_BT_SIZE_INTER  =                            4;      ///<
 
                                                                             //for I slice chroma CTB configuration para. (in luma samples)
 static const int    MAX_BT_DEPTH_C      =                           0;      ///< <=7
 static const int    MAX_BT_SIZE_C       =                          64;      ///< [1<<MIN_QT_SIZE_C, 1<<CTU_LOG2], in luma samples
-static const int    MIN_BT_SIZE_C       =                           4;      ///< can be set down to 4, in luma samples
 
 static const int    MAX_TT_SIZE_INTER  =                           64;      ///< for initialization, [1<<MIN_CU_LOG2, 64]
-static const int    MIN_TT_SIZE_INTER  =                            4;      ///<
-
+static const int    MIN_DUALTREE_CHROMA_WIDTH  =                    4;
+static const int    MIN_DUALTREE_CHROMA_SIZE   =                   16;
 static const SplitSeries SPLIT_BITS         =                       5;
 static const SplitSeries SPLIT_DMULT        =                       5;
 static const SplitSeries SPLIT_MASK         =                      31;      ///< = (1 << SPLIT_BITS) - 1
@@ -382,11 +412,6 @@ static const int MAX_TESTED_QPs =   ( 1 + 1 + ( MAX_DELTA_QP << 1 ) );      ///<
 
 static const int COM16_C806_TRANS_PREC =                            0;
 
-static const int NUM_MERGE_IDX_EXT_CTX =                            5;
-static const unsigned E0104_ALF_MAX_TEMPLAYERID =                  5;       // define to zero to switch of  code
-static const unsigned C806_ALF_TEMPPRED_NUM =                      6;
-
-
 static const int NTAPS_LUMA               =                         8; ///< Number of taps for luma
 static const int NTAPS_CHROMA             =                         4; ///< Number of taps for chroma
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
@@ -396,9 +421,9 @@ static const int MAX_LADF_INTERVALS       =                         5; /// max n
 static const int NTAPS_BILINEAR           =                         2; ///< Number of taps for bilinear filter
 
 static const int ATMVP_SUB_BLOCK_SIZE =                             3; ///< sub-block size for ATMVP
-static const int TRIANGLE_MAX_NUM_UNI_CANDS =                       5;
+static const int TRIANGLE_MAX_NUM_UNI_CANDS =                       6;
 static const int TRIANGLE_MAX_NUM_CANDS_MEM =                       7;
-static const int TRIANGLE_MAX_NUM_CANDS =                          40;
+static const int TRIANGLE_MAX_NUM_CANDS = TRIANGLE_MAX_NUM_UNI_CANDS * (TRIANGLE_MAX_NUM_UNI_CANDS - 1) * 2;
 static const int TRIANGLE_MAX_NUM_SATD_CANDS =                      3;
 static const int TRIANGLE_MIN_SIZE =                            8 * 8;
 
@@ -406,6 +431,11 @@ static const int SBT_MAX_SIZE =                                    64; ///< maxi
 static const int SBT_NUM_SL =                                      10; ///< maximum number of historical PU decision saved for a CU
 static const int SBT_NUM_RDO =                                      2; ///< maximum number of SBT mode tried for a PU
 
+static const int NUM_INTER_CU_INFO_SAVE =                           8; ///< maximum number of inter cu information saved for fast algorithm
+static const int LDT_MODE_TYPE_INHERIT =                            0; ///< No need to signal mode_constraint_flag, and the modeType of the region is inherited from its parent node
+static const int LDT_MODE_TYPE_INFER =                              1; ///< No need to signal mode_constraint_flag, and the modeType of the region is inferred as MODE_TYPE_INTRA
+static const int LDT_MODE_TYPE_SIGNAL =                             2; ///< Need to signal mode_constraint_flag, and the modeType of the region is determined by the flag
+
 static const int IBC_MAX_CAND_SIZE = 16; // max block size for ibc search
 static const int IBC_NUM_CANDIDATES = 64; ///< Maximum number of candidates to store/test
 static const int CHROMA_REFINEMENT_CANDIDATES = 8; /// 8 candidates BV to choose from
@@ -417,10 +447,41 @@ static constexpr int MV_MANTISSA_BITCOUNT    = 6;
 static constexpr int MV_MANTISSA_UPPER_LIMIT = ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1);
 static constexpr int MV_MANTISSA_LIMIT       = (1 << (MV_MANTISSA_BITCOUNT - 1));
 static constexpr int MV_EXPONENT_MASK        = ((1 << MV_EXPONENT_BITCOUNT) - 1);
+
+static constexpr int MV_BITS =                                   18;
+static constexpr int MV_MAX =              (1 << (MV_BITS - 1)) - 1;
+static constexpr int MV_MIN =                 -(1 << (MV_BITS - 1));
+
+static const int MVD_MAX =                            (1 << 17) - 1;
+static const int MVD_MIN =                               -(1 << 17);
+
 static const int PIC_ANALYZE_CW_BINS =                           32;
 static const int PIC_CODE_CW_BINS =                              16;
-static const int FP_PREC =                                       14;
+static const int LMCS_SEG_NUM =                                  32;
+static const int FP_PREC =                                       11;
 static const int CSCALE_FP_PREC =                                11;
+static const int  NEIG_NUM_LOG  =                                 6;
+static const int  NEIG_NUM =                      1 << NEIG_NUM_LOG;
+static const int LOG2_PALETTE_CG_SIZE =                           4;
+static const int RUN_IDX_THRE =                                   4;
+static const int MAX_CU_BLKSIZE_PLT =                            64;
+static const int NUM_TRELLIS_STATE =                              3;
+static const double ENC_CHROMA_WEIGHTING =                      0.8;
+static const int MAXPLTPREDSIZE = 63;
+static const int MAXPLTSIZE = 31;
+static const double PLT_CHROMA_WEIGHTING =                      0.8;
+static const int PLT_ENCBITDEPTH = 8;
+static const int PLT_FAST_RATIO = 100;
+#if RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+static const int  EPBIN_WEIGHT_FACTOR =                           4;
+#endif
+static const int ENC_PPS_ID_RPR =                                 3;
+static const int SCALE_RATIO_BITS =                              14;
+static const int MAX_SCALING_RATIO =                              2;  // max downsampling ratio for RPR
+static const std::pair<int, int> SCALE_1X = std::pair<int, int>( 1 << SCALE_RATIO_BITS, 1 << SCALE_RATIO_BITS );  // scale ratio 1x
+static const int DELTA_QP_FOR_Y_Cg =                             -5;
+static const int DELTA_QP_FOR_Co =                               -3;
+
 // ====================================================================================================================
 // Macro functions
 // ====================================================================================================================
@@ -566,10 +627,16 @@ static inline int floorLog2(uint32_t x)
 {
   if (x == 0)
   {
+    // note: ceilLog2() expects -1 as return value
     return -1;
   }
 #ifdef __GNUC__
   return 31 - __builtin_clz(x);
+#else
+#ifdef _MSC_VER
+  unsigned long r = 0;
+  _BitScanReverse(&r, x);
+  return r;
 #else
   int result = 0;
   if (x & 0xffff0000)
@@ -599,8 +666,15 @@ static inline int floorLog2(uint32_t x)
   }
   return result;
 #endif
+#endif
 }
 
+static inline int ceilLog2(uint32_t x)
+{
+  return (x==0) ? -1 : floorLog2(x - 1) + 1;
+}
+
+
 //CASE-BREAK for breakpoints
 #if defined ( _MSC_VER ) && defined ( _DEBUG )
 #define _CASE(_x) if(_x)
@@ -616,7 +690,7 @@ static inline int floorLog2(uint32_t x)
 #define _UNIT_AREA_AT(_a,_x,_y,_w,_h)
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 #include <omp.h>
 
 #define PARL_PARAM(DEF) , DEF
diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp
index 6e64f1b2f727f234d743dbe65ea691f1eb553004..dd8f0d97f69cfb07adc86d77787a93377a91f2b2 100644
--- a/source/Lib/CommonLib/ContextModelling.cpp
+++ b/source/Lib/CommonLib/ContextModelling.cpp
@@ -41,35 +41,25 @@
 #include "Picture.h"
 
 
-#if HEVC_USE_SIGN_HIDING
-CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID component, bool signHide)
-#else
-CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID component )
-#endif
+CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, bool bdpcm )
   : m_compID                    (component)
   , m_chType                    (toChannelType(m_compID))
   , m_width                     (tu.block(m_compID).width)
   , m_height                    (tu.block(m_compID).height)
-  , m_log2CGWidth               ( g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][0] )
-  , m_log2CGHeight              ( g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][1] )
+  , m_log2CGWidth               ( g_log2SbbSize[ floorLog2(m_width) ][ floorLog2(m_height) ][0] )
+  , m_log2CGHeight              ( g_log2SbbSize[ floorLog2(m_width) ][ floorLog2(m_height) ][1] )
   , m_log2CGSize                (m_log2CGWidth + m_log2CGHeight)
   , m_widthInGroups(std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) >> m_log2CGWidth)
   , m_heightInGroups(std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_height) >> m_log2CGHeight)
-  , m_log2BlockWidth            (g_aucLog2[m_width])
-  , m_log2BlockHeight           (g_aucLog2[m_height])
+  , m_log2BlockWidth            ((unsigned)floorLog2(m_width))
+  , m_log2BlockHeight           ((unsigned)floorLog2(m_height))
   , m_maxNumCoeff               (m_width * m_height)
-#if HEVC_USE_SIGN_HIDING
   , m_signHiding                (signHide)
-#endif
   , m_extendedPrecision         (tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
   , m_maxLog2TrDynamicRange     (tu.cs->sps->getMaxLog2TrDynamicRange(m_chType))
-#if HEVC_USE_MDCS
-  , m_scanType                  (CoeffScanType(TU::getCoefScanIdx( tu, m_compID)))
-#else
   , m_scanType                  (SCAN_DIAG)
-#endif
-  , m_scan                      (g_scanOrder     [m_chType][SCAN_GROUPED_4x4][m_scanType][gp_sizeIdxInfo->idxFrom(m_width        )][gp_sizeIdxInfo->idxFrom(m_height        )])
-  , m_scanCG                    (g_scanOrder     [m_chType][SCAN_UNGROUPED  ][m_scanType][gp_sizeIdxInfo->idxFrom(m_widthInGroups)][gp_sizeIdxInfo->idxFrom(m_heightInGroups)])
+  , m_scan                      (g_scanOrder     [SCAN_GROUPED_4x4][m_scanType][gp_sizeIdxInfo->idxFrom(m_width        )][gp_sizeIdxInfo->idxFrom(m_height        )])
+  , m_scanCG                    (g_scanOrder     [SCAN_UNGROUPED  ][m_scanType][gp_sizeIdxInfo->idxFrom(m_widthInGroups)][gp_sizeIdxInfo->idxFrom(m_heightInGroups)])
   , m_CtxSetLastX               (Ctx::LastX[m_chType])
   , m_CtxSetLastY               (Ctx::LastY[m_chType])
   , m_maxLastPosX(g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) - 1])
@@ -78,7 +68,11 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp
   , m_lastOffsetY               (0)
   , m_lastShiftX                (0)
   , m_lastShiftY                (0)
-  , m_TrafoBypass               (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() &&  (tu.cu->transQuantBypass || tu.mtsIdx==1))
+#if JVET_P0058_CHROMA_TS
+  , m_TrafoBypass               (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.cu->transQuantBypass || tu.mtsIdx[m_compID] == MTS_SKIP))
+#else
+  , m_TrafoBypass               (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() &&  (tu.cu->transQuantBypass || tu.mtsIdx==MTS_SKIP))
+#endif
   , m_scanPosLast               (-1)
   , m_subSetId                  (-1)
   , m_subSetPos                 (-1)
@@ -92,27 +86,22 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp
   , m_sigFlagCtxSet             { Ctx::SigFlag[m_chType], Ctx::SigFlag[m_chType+2], Ctx::SigFlag[m_chType+4] }
   , m_parFlagCtxSet             ( Ctx::ParFlag[m_chType] )
   , m_gtxFlagCtxSet             { Ctx::GtxFlag[m_chType], Ctx::GtxFlag[m_chType+2] }
+  , m_sigGroupCtxIdTS           (-1)
+  , m_tsSigFlagCtxSet           ( Ctx::TsSigFlag )
+  , m_tsParFlagCtxSet           ( Ctx::TsParFlag )
+  , m_tsGtxFlagCtxSet           ( Ctx::TsGtxFlag )
+  , m_tsLrg1FlagCtxSet          (Ctx::TsLrg1Flag)
+  , m_tsSignFlagCtxSet          (Ctx::TsResidualSign)
   , m_sigCoeffGroupFlag         ()
+  , m_bdpcm                     (bdpcm)
 {
   // LOGTODO
   unsigned log2sizeX = m_log2BlockWidth;
   unsigned log2sizeY = m_log2BlockHeight;
-#if HEVC_USE_MDCS
-  if (m_scanType == SCAN_VER)
-  {
-    std::swap(log2sizeX, log2sizeY);
-    std::swap(const_cast<unsigned&>(m_maxLastPosX), const_cast<unsigned&>(m_maxLastPosY));
-  }
-#endif
   if (m_chType == CHANNEL_TYPE_CHROMA)
   {
-#if HEVC_USE_MDCS
-    const_cast<int&>(m_lastShiftX) = Clip3( 0, 2, int( ( m_scanType == SCAN_VER ? m_height : m_width  ) >> 3) );
-    const_cast<int&>(m_lastShiftY) = Clip3( 0, 2, int( ( m_scanType == SCAN_VER ? m_width  : m_height ) >> 3) );
-#else
     const_cast<int&>(m_lastShiftX) = Clip3( 0, 2, int( m_width  >> 3) );
     const_cast<int&>(m_lastShiftY) = Clip3( 0, 2, int( m_height >> 3) );
-#endif
   }
   else
   {
@@ -141,33 +130,38 @@ void CoeffCodingContext::initSubblock( int SubsetId, bool sigGroupFlag )
   unsigned  sigRight  = unsigned( ( CGPosX + 1 ) < m_widthInGroups  ? m_sigCoeffGroupFlag[ m_subSetPos + 1               ] : false );
   unsigned  sigLower  = unsigned( ( CGPosY + 1 ) < m_heightInGroups ? m_sigCoeffGroupFlag[ m_subSetPos + m_widthInGroups ] : false );
   m_sigGroupCtxId     = Ctx::SigCoeffGroup[m_chType]( sigRight | sigLower );
+  unsigned  sigLeft   = unsigned( CGPosX > 0 ? m_sigCoeffGroupFlag[m_subSetPos - 1              ] : false );
+  unsigned  sigAbove  = unsigned( CGPosY > 0 ? m_sigCoeffGroupFlag[m_subSetPos - m_widthInGroups] : false );
+  m_sigGroupCtxIdTS   = Ctx::TsSigCoeffGroup( sigLeft  + sigAbove );
 }
 
 
+unsigned DeriveCtx::CtxModeConsFlag( const CodingStructure& cs, Partitioner& partitioner )
+{
+  assert( partitioner.chType == CHANNEL_TYPE_LUMA );
+  const Position pos = partitioner.currArea().blocks[partitioner.chType];
+  const unsigned curSliceIdx = cs.slice->getIndependentSliceIdx();
+  const unsigned curTileIdx = cs.picture->brickMap->getBrickIdxRsMap( partitioner.currArea().lumaPos() );
 
+  const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), pos, curSliceIdx, curTileIdx, partitioner.chType );
+  const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), pos, curSliceIdx, curTileIdx, partitioner.chType );
+
+  unsigned ctxId = ((cuAbove && cuAbove->predMode == MODE_INTRA) || (cuLeft && cuLeft->predMode == MODE_INTRA)) ? 1 : 0;
+  return ctxId;
+}
 
 
 void DeriveCtx::CtxSplit( const CodingStructure& cs, Partitioner& partitioner, unsigned& ctxSpl, unsigned& ctxQt, unsigned& ctxHv, unsigned& ctxHorBt, unsigned& ctxVerBt, bool* _canSplit /*= nullptr */ )
 {
   const Position pos         = partitioner.currArea().blocks[partitioner.chType];
   const unsigned curSliceIdx = cs.slice->getIndependentSliceIdx();
-#if HEVC_TILES_WPP
-  const unsigned curTileIdx  = cs.picture->tileMap->getTileIdxMap( partitioner.currArea().lumaPos() );
-#endif
+  const unsigned curTileIdx  = cs.picture->brickMap->getBrickIdxRsMap( partitioner.currArea().lumaPos() );
 
   // get left depth
-#if HEVC_TILES_WPP
-  const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, curTileIdx, partitioner.chType );
-#else
-  const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), curSliceIdx, partitioner.chType );
-#endif
+  const CodingUnit* cuLeft = cs.getCURestricted( pos.offset( -1, 0 ), pos, curSliceIdx, curTileIdx, partitioner.chType );
 
   // get above depth
-#if HEVC_TILES_WPP
-  const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, curTileIdx, partitioner.chType );
-#else
-  const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), curSliceIdx, partitioner.chType );
-#endif
+  const CodingUnit* cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), pos, curSliceIdx, curTileIdx, partitioner.chType );
 
   bool canSplit[6];
 
@@ -255,29 +249,22 @@ void DeriveCtx::CtxSplit( const CodingStructure& cs, Partitioner& partitioner, u
   ctxVerBt = ( partitioner.currMtDepth <= 1 ? 3 : 2 );
 }
 
-unsigned DeriveCtx::CtxQtCbf( const ComponentID compID, const unsigned trDepth, const bool prevCbCbf, const int ispIdx )
+unsigned DeriveCtx::CtxQtCbf( const ComponentID compID, const bool prevCbf, const int ispIdx )
 {
   if( ispIdx && isLuma( compID ) )
   {
-    return 2 + (int)prevCbCbf;
+    return 2 + (int)prevCbf;
   }
   if( compID == COMPONENT_Cr )
   {
-    return ( prevCbCbf ? 1 : 0 );
-  }
-  if( isChroma( compID ) )
-  {
-    return trDepth;
-  }
-  else
-  {
-    return ( trDepth == 0 ? 1 : 0 );
+    return ( prevCbf ? 1 : 0 );
   }
+  return 0;
 }
 
 unsigned DeriveCtx::CtxInterDir( const PredictionUnit& pu )
 {
-  return Clip3( 0, 3, 7 - ( ( g_aucLog2[pu.lumaSize().width] + g_aucLog2[pu.lumaSize().height] + 1 ) >> 1 ) );    // VG-ASYMM DONE
+  return ( 7 - ((floorLog2(pu.lumaSize().width) + floorLog2(pu.lumaSize().height) + 1) >> 1) );
 }
 
 unsigned DeriveCtx::CtxAffineFlag( const CodingUnit& cu )
@@ -310,35 +297,6 @@ unsigned DeriveCtx::CtxSkipFlag( const CodingUnit& cu )
 }
 
 
-unsigned DeriveCtx::CtxIMVFlag( const CodingUnit& cu )
-{
-  const CodingStructure *cs = cu.cs;
-  unsigned ctxId = 0;
-
-  // Get BCBP of left PU
-  const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L );
-  ctxId = ( cuLeft && cuLeft->imv ) ? 1 : 0;
-
-  // Get BCBP of above PU
-  const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L );
-  ctxId += ( cuAbove && cuAbove->imv ) ? 1 : 0;
-
-  return ctxId;
-}
-
-unsigned DeriveCtx::CtxTriangleFlag( const CodingUnit& cu )
-{
-  const CodingStructure *cs = cu.cs;
-  unsigned ctxId = 0;
-
-  const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L );
-  ctxId = ( cuLeft && cuLeft->triangle ) ? 1 : 0;
-
-  const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L );
-  ctxId += ( cuAbove && cuAbove->triangle ) ? 1 : 0;
-
-  return ctxId;
-}
 
 unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu )
 {
@@ -354,10 +312,11 @@ unsigned DeriveCtx::CtxIBCFlag(const CodingUnit& cu)
 {
   const CodingStructure *cs = cu.cs;
   unsigned ctxId = 0;
-  const CodingUnit *cuLeft = cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L);
+  const Position pos = cu.chType == CHANNEL_TYPE_CHROMA ? cu.chromaPos() : cu.lumaPos();
+  const CodingUnit *cuLeft = cs->getCURestricted(pos.offset(-1, 0), cu, cu.chType);
   ctxId += (cuLeft && CU::isIBC(*cuLeft)) ? 1 : 0;
 
-  const CodingUnit *cuAbove = cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L);
+  const CodingUnit *cuAbove = cs->getCURestricted(pos.offset(0, -1), cu, cu.chType);
   ctxId += (cuAbove && CU::isIBC(*cuAbove)) ? 1 : 0;
   return ctxId;
 }
@@ -365,10 +324,11 @@ unsigned DeriveCtx::CtxIBCFlag(const CodingUnit& cu)
 void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx )
 {
   CHECK( candIdx >= numValidMergeCand, "Merge candidate does not exist" );
-
+  pu.regularMergeFlag        = !(pu.mhIntraFlag || pu.cu->triangle);
   pu.mergeFlag               = true;
   pu.mmvdMergeFlag = false;
   pu.interDir                = interDirNeighbours[candIdx];
+  pu.cu->imv = (!pu.cu->triangle && useAltHpelIf[candIdx]) ? IMV_HPEL : 0;
   pu.mergeIdx                = candIdx;
   pu.mergeType               = mrgTypeNeighbours[candIdx];
   pu.mv     [REF_PIC_LIST_0] = mvFieldNeighbours[(candIdx << 1) + 0].mv;
@@ -385,6 +345,7 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx )
   {
     pu.bv = pu.mv[REF_PIC_LIST_0];
     pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT); // used for only integer resolution
+    pu.cu->imv = pu.cu->imv == IMV_HPEL ? 0 : pu.cu->imv;
   }
   pu.cu->GBiIdx = ( interDirNeighbours[candIdx] == 3 ) ? GBiIdx[candIdx] : GBI_DEFAULT;
 
@@ -447,10 +408,9 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
     {
       const int scale = PU::getDistScaleFactor(currPoc, poc0, currPoc, poc1);
       tempMv[1] = tempMv[0];
-#if MMVD_LTRP
-      const bool bIsL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm;
-      const bool bIsL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm;
-      if (bIsL0RefLongTerm || bIsL1RefLongTerm)
+      const bool isL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm;
+      const bool isL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm;
+      if (isL0RefLongTerm || isL1RefLongTerm)
       {
         if ((poc1 - currPoc)*(poc0 - currPoc) > 0)
         {
@@ -462,16 +422,14 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
         }
       }
       else
-#endif
       tempMv[0] = tempMv[1].scaleMv(scale);
     }
     else
     {
       const int scale = PU::getDistScaleFactor(currPoc, poc1, currPoc, poc0);
-#if MMVD_LTRP
-      const bool bIsL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm;
-      const bool bIsL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm;
-      if (bIsL0RefLongTerm || bIsL1RefLongTerm)
+      const bool isL0RefLongTerm = slice.getRefPic(REF_PIC_LIST_0, refList0)->longTerm;
+      const bool isL1RefLongTerm = slice.getRefPic(REF_PIC_LIST_1, refList1)->longTerm;
+      if (isL0RefLongTerm || isL1RefLongTerm)
       {
         if ((poc1 - currPoc)*(poc0 - currPoc) > 0)
         {
@@ -483,7 +441,6 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
         }
       }
       else
-#endif
       tempMv[1] = tempMv[0].scaleMv(scale);
     }
 
@@ -545,6 +502,7 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
   pu.mmvdMergeFlag = true;
   pu.mmvdMergeIdx = candIdx;
   pu.mergeFlag = true;
+  pu.regularMergeFlag = true;
   pu.mergeIdx = candIdx;
   pu.mergeType = MRG_TYPE_DEFAULT_N;
   pu.mvd[REF_PIC_LIST_0] = Mv();
@@ -553,8 +511,50 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
   pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
   pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
   pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
+  pu.cu->imv = mmvdUseAltHpelIf[fPosBaseIdx] ? IMV_HPEL : 0;
 
   pu.cu->GBiIdx = (interDirNeighbours[fPosBaseIdx] == 3) ? GBiIdx[fPosBaseIdx] : GBI_DEFAULT;
 
+  for (int refList = 0; refList < 2; refList++)
+  {
+    if (pu.refIdx[refList] >= 0)
+    {
+      pu.mv[refList].clipToStorageBitDepth();
+    }
+  }
+
+
   PU::restrictBiPredMergeCandsOne(pu);
 }
+
+unsigned DeriveCtx::CtxMipFlag( const CodingUnit& cu )
+{
+  const CodingStructure *cs = cu.cs;
+  unsigned ctxId = 0;
+
+  const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L );
+  ctxId = (cuLeft && cuLeft->mipFlag) ? 1 : 0;
+
+  const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L );
+  ctxId += (cuAbove && cuAbove->mipFlag) ? 1 : 0;
+
+  ctxId  = (cu.lwidth() > 2*cu.lheight() || cu.lheight() > 2*cu.lwidth()) ? 3 : ctxId;
+
+  return ctxId;
+}
+
+#if JVET_P0077_LINE_CG_PALETTE
+unsigned DeriveCtx::CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist )
+{
+  uint8_t *ucCtxLut = (prevRunType == PLT_RUN_INDEX) ? g_paletteRunLeftLut : g_paletteRunTopLut;
+  if ( dist <= RUN_IDX_THRE )
+  {
+     return ucCtxLut[dist];
+  }
+  else
+  { 
+    return ucCtxLut[RUN_IDX_THRE];
+  }
+}
+#endif
+
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index ea58093d5ba0b6ec9b246dfa93f22fb8dbc7754a..ef1df8aed4967d581822b1729a85e5ddab2f853e 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -51,16 +51,16 @@
 struct CoeffCodingContext
 {
 public:
-#if HEVC_USE_SIGN_HIDING
-  CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide);
-#else
-  CoeffCodingContext( const TransformUnit& tu, ComponentID component );
-#endif
+  CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, bool bdpcm = false );
 public:
   void  initSubblock     ( int SubsetId, bool sigGroupFlag = false );
 public:
   void  resetSigGroup   ()                      { m_sigCoeffGroupFlag.reset( m_subSetPos ); }
   void  setSigGroup     ()                      { m_sigCoeffGroupFlag.set( m_subSetPos ); }
+  bool  noneSigGroup    ()                      { return m_sigCoeffGroupFlag.none(); }
+  int   lastSubSet      ()                      { return ( maxNumCoeff() - 1 ) >> log2CGSize(); }
+  bool  isLastSubSet    ()                      { return lastSubSet() == m_subSetId; }
+  bool  only1stSigGroup ()                      { return m_sigCoeffGroupFlag.count()-m_sigCoeffGroupFlag[lastSubSet()]==0; }
   void  setScanPosLast  ( int       posLast )   { m_scanPosLast = posLast; }
 public:
   ComponentID     compID          ()                        const { return m_compID; }
@@ -83,11 +83,9 @@ public:
   bool            isNotFirst      ()                        const { return ( m_subSetId != 0 ); }
   bool            isSigGroup(int scanPosCG) const { return m_sigCoeffGroupFlag[m_scanCG[scanPosCG].idx]; }
   bool            isSigGroup      ()                        const { return m_sigCoeffGroupFlag[ m_subSetPos ]; }
-#if HEVC_USE_SIGN_HIDING
   bool            signHiding      ()                        const { return m_signHiding; }
   bool            hideSign        ( int       posFirst,
                                     int       posLast   )   const { return ( m_signHiding && ( posLast - posFirst >= SBH_THRESHOLD ) ); }
-#endif
   CoeffScanType   scanType        ()                        const { return m_scanType; }
   unsigned        blockPos(int scanPos) const { return m_scan[scanPos].idx; }
   unsigned        posX(int scanPos) const { return m_scan[scanPos].x; }
@@ -96,7 +94,13 @@ public:
   unsigned        maxLastPosY     ()                        const { return m_maxLastPosY; }
   unsigned        lastXCtxId      ( unsigned  posLastX  )   const { return m_CtxSetLastX( m_lastOffsetX + ( posLastX >> m_lastShiftX ) ); }
   unsigned        lastYCtxId      ( unsigned  posLastY  )   const { return m_CtxSetLastY( m_lastOffsetY + ( posLastY >> m_lastShiftY ) ); }
-  unsigned        sigGroupCtxId   ()                        const { return m_sigGroupCtxId; }
+  int             numCtxBins      ()                        const { return   m_remainingContextBins;      }
+  void            setNumCtxBins   ( int n )                       {          m_remainingContextBins  = n; }
+  unsigned        sigGroupCtxId   ( bool ts = false     )   const { return ts ? m_sigGroupCtxIdTS : m_sigGroupCtxId; }
+  bool            bdpcm           ()                        const { return m_bdpcm; }
+
+  void            decimateNumCtxBins(int n) { m_remainingContextBins -= n; }
+  void            increaseNumCtxBins(int n) { m_remainingContextBins += n; }
 
   unsigned sigCtxIdAbs( int scanPos, const TCoeff* coeff, const int state )
   {
@@ -128,11 +132,15 @@ public:
       }
     }
 #undef UPDATE
-    int ctxOfs = std::min( sumAbs, 5 ) + ( diag < 2 ? 6 : 0 );
+
+
+    int ctxOfs = std::min((sumAbs+1)>>1, 3) + ( diag < 2 ? 4 : 0 );
+
     if( m_chType == CHANNEL_TYPE_LUMA )
     {
-      ctxOfs += diag < 5 ? 6 : 0;
+      ctxOfs += diag < 5 ? 4 : 0;
     }
+
     m_tmplCpDiag = diag;
     m_tmplCpSum1 = sumAbs - numPos;
     return m_sigFlagCtxSet[std::max( 0, state-1 )]( ctxOfs );
@@ -152,8 +160,7 @@ public:
   unsigned parityCtxIdAbs   ( uint8_t offset )  const { return m_parFlagCtxSet   ( offset ); }
   unsigned greater1CtxIdAbs ( uint8_t offset )  const { return m_gtxFlagCtxSet[1]( offset ); }
   unsigned greater2CtxIdAbs ( uint8_t offset )  const { return m_gtxFlagCtxSet[0]( offset ); }
-
-  unsigned templateAbsSum( int scanPos, const TCoeff* coeff )
+  unsigned templateAbsSum( int scanPos, const TCoeff* coeff, int baseLevel )
   {
     const uint32_t  posY  = m_scan[scanPos].y;
     const uint32_t  posX  = m_scan[scanPos].x;
@@ -179,9 +186,170 @@ public:
         sum += abs(pData[m_width << 1]);
       }
     }
-    return std::min(sum, 31);
+    return std::max(std::min(sum - 5 * baseLevel, 31), 0);
+  }
+
+  unsigned sigCtxIdAbsTS( int scanPos, const TCoeff* coeff )
+  {
+    const uint32_t  posY   = m_scan[scanPos].y;
+    const uint32_t  posX   = m_scan[scanPos].x;
+    const TCoeff*   posC   = coeff + posX + posY * m_width;
+    int             numPos = 0;
+#define UPDATE(x) {int a=abs(x);numPos+=!!a;}
+    if( posX > 0 )
+    {
+      UPDATE( posC[-1] );
+    }
+    if( posY > 0 )
+    {
+      UPDATE( posC[-(int)m_width] );
+    }
+#undef UPDATE
+
+    return m_tsSigFlagCtxSet( numPos );
+  }
+
+  unsigned parityCtxIdAbsTS   ()                  const { return m_tsParFlagCtxSet(      0 ); }
+  unsigned greaterXCtxIdAbsTS ( uint8_t offset )  const { return m_tsGtxFlagCtxSet( offset ); }
+
+  unsigned lrg1CtxIdAbsTS(int scanPos, const TCoeff* coeff, int bdpcm)
+  {
+    const uint32_t  posY = m_scan[scanPos].y;
+    const uint32_t  posX = m_scan[scanPos].x;
+    const TCoeff*   posC = coeff + posX + posY * m_width;
+
+    int             numPos = 0;
+#define UPDATE(x) {int a=abs(x);numPos+=!!a;}
+
+    if (bdpcm)
+    {
+      numPos = 3;
+    }
+    else
+    {
+      if (posX > 0)
+      {
+        UPDATE(posC[-1]);
+      }
+      if (posY > 0)
+      {
+        UPDATE(posC[-(int)m_width]);
+      }
+    }
+
+#undef UPDATE
+    return m_tsLrg1FlagCtxSet(numPos);
+  }
+
+  unsigned signCtxIdAbsTS(int scanPos, const TCoeff* coeff, int bdpcm)
+  {
+    const uint32_t  posY = m_scan[scanPos].y;
+    const uint32_t  posX = m_scan[scanPos].x;
+    const TCoeff*   pData = coeff + posX + posY * m_width;
+
+    int rightSign = 0, belowSign = 0;
+    unsigned signCtx = 0;
+
+    if (posX > 0)
+    {
+      rightSign = pData[-1];
+    }
+    if (posY > 0)
+    {
+      belowSign = pData[-(int)m_width];
+    }
+
+    if ((rightSign == 0 && belowSign == 0) || ((rightSign*belowSign) < 0))
+    {
+      signCtx = 0;
+    }
+    else if (rightSign >= 0 && belowSign >= 0)
+    {
+      signCtx = 1;
+    }
+    else
+    {
+      signCtx = 2;
+    }
+    if (bdpcm)
+    {
+      signCtx += 3;
+    }
+    return m_tsSignFlagCtxSet(signCtx);
+  }
+
+  void neighTS(int &rightPixel, int &belowPixel, int scanPos, const TCoeff* coeff)
+  {
+    const uint32_t  posY = m_scan[scanPos].y;
+    const uint32_t  posX = m_scan[scanPos].x;
+    const TCoeff*   data = coeff + posX + posY * m_width;
+
+    rightPixel = belowPixel = 0;
+
+    if (posX > 0)
+    {
+      rightPixel = data[-1];
+    }
+    if (posY > 0)
+    {
+      belowPixel = data[-(int)m_width];
+    }
+  }
+
+  int deriveModCoeff(int rightPixel, int belowPixel, int absCoeff, int bdpcm = 0)
+  {
+    
+    if (absCoeff == 0)
+      return 0;
+    int pred1, absBelow = abs(belowPixel), absRight = abs(rightPixel);
+
+    int absCoeffMod = absCoeff;
+
+    if (bdpcm == 0)
+    {
+      pred1 = std::max(absBelow, absRight);
+
+      if (absCoeff == pred1)
+      {
+        absCoeffMod = 1;
+      }
+      else
+      {
+        absCoeffMod = absCoeff < pred1 ? absCoeff + 1 : absCoeff;
+      }
+    }
+
+    return(absCoeffMod);
   }
 
+  int decDeriveModCoeff(int rightPixel, int belowPixel, int absCoeff)
+  {
+    
+    if (absCoeff == 0)
+      return 0;
+
+    int pred1, absBelow = abs(belowPixel), absRight = abs(rightPixel);
+    pred1 = std::max(absBelow, absRight);
+
+    int absCoeffMod;
+
+    if (absCoeff == 1 && pred1 > 0)
+    {
+      absCoeffMod = pred1;
+    }
+    else
+    {
+      absCoeffMod = absCoeff - (absCoeff <= pred1);
+    }
+    return(absCoeffMod);
+  }
+
+  unsigned templateAbsSumTS( int scanPos, const TCoeff* coeff )
+  {
+    return 1;
+  }
+
+  int                       regBinLimit;
 
 private:
   // constant
@@ -197,9 +365,7 @@ private:
   const unsigned            m_log2BlockWidth;
   const unsigned            m_log2BlockHeight;
   const unsigned            m_maxNumCoeff;
-#if HEVC_USE_SIGN_HIDING
   const bool                m_signHiding;
-#endif
   const bool                m_extendedPrecision;
   const int                 m_maxLog2TrDynamicRange;
   CoeffScanType             m_scanType;
@@ -228,7 +394,15 @@ private:
   CtxSet                    m_sigFlagCtxSet[3];
   CtxSet                    m_parFlagCtxSet;
   CtxSet                    m_gtxFlagCtxSet[2];
+  unsigned                  m_sigGroupCtxIdTS;
+  CtxSet                    m_tsSigFlagCtxSet;
+  CtxSet                    m_tsParFlagCtxSet;
+  CtxSet                    m_tsGtxFlagCtxSet;
+  CtxSet                    m_tsLrg1FlagCtxSet;
+  CtxSet                    m_tsSignFlagCtxSet;
+  int                       m_remainingContextBins;
   std::bitset<MLS_GRP_NUM>  m_sigCoeffGroupFlag;
+  const bool                m_bdpcm;
 };
 
 
@@ -236,18 +410,31 @@ class CUCtx
 {
 public:
   CUCtx()              : isDQPCoded(false), isChromaQpAdjCoded(false),
-                         qgStart(false),
-                         numNonZeroCoeffNonTs(0) {}
+                         qgStart(false)
+                         {
+                           violatesLfnstConstrained[CHANNEL_TYPE_LUMA  ] = false;
+                           violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
+                           lfnstLastScanPos                              = false;
+                           violatesMtsCoeffConstraint                    = false;
+                         }
   CUCtx(int _qp)       : isDQPCoded(false), isChromaQpAdjCoded(false),
                          qgStart(false),
-                         numNonZeroCoeffNonTs(0), qp(_qp) {}
+                         qp(_qp)
+                         {
+                           violatesLfnstConstrained[CHANNEL_TYPE_LUMA  ] = false;
+                           violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
+                           lfnstLastScanPos                              = false;
+                           violatesMtsCoeffConstraint                    = false;
+                         }
   ~CUCtx() {}
 public:
   bool      isDQPCoded;
   bool      isChromaQpAdjCoded;
   bool      qgStart;
-  uint32_t      numNonZeroCoeffNonTs;
-  int8_t     qp;                   // used as a previous(last) QP and for QP prediction
+  bool      lfnstLastScanPos;
+  int8_t    qp;                   // used as a previous(last) QP and for QP prediction
+  bool      violatesLfnstConstrained[MAX_NUM_CHANNEL_TYPE];
+  bool      violatesMtsCoeffConstraint;
 };
 
 class MergeCtx
@@ -257,7 +444,7 @@ public:
   ~MergeCtx() {}
 public:
   MvField       mvFieldNeighbours [ MRG_MAX_NUM_CANDS << 1 ]; // double length for mv of both lists
-  uint8_t       GBiIdx            [ MRG_MAX_NUM_CANDS      ];
+  uint8_t       BcwIdx            [ MRG_MAX_NUM_CANDS      ];
   unsigned char interDirNeighbours[ MRG_MAX_NUM_CANDS      ];
   MergeType     mrgTypeNeighbours [ MRG_MAX_NUM_CANDS      ];
   int           numValidMergeCand;
@@ -267,6 +454,8 @@ public:
   MotionBuf     subPuMvpExtMiBuf;
   MvField mmvdBaseMv[MMVD_BASE_MV_NUM][2];
   void setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx);
+  bool          mmvdUseAltHpelIf  [ MMVD_BASE_MV_NUM ];
+  bool          useAltHpelIf      [ MRG_MAX_NUM_CANDS ];
   void setMergeInfo( PredictionUnit& pu, int candIdx );
 };
 
@@ -279,7 +468,7 @@ public:
   MvField       mvFieldNeighbours[AFFINE_MRG_MAX_NUM_CANDS << 1][3]; // double length for mv of both lists
   unsigned char interDirNeighbours[AFFINE_MRG_MAX_NUM_CANDS];
   EAffineModel  affineType[AFFINE_MRG_MAX_NUM_CANDS];
-  uint8_t       GBiIdx[AFFINE_MRG_MAX_NUM_CANDS];
+  uint8_t       BcwIdx[AFFINE_MRG_MAX_NUM_CANDS];
   int           numValidMergeCand;
   int           maxNumMergeCand;
 
@@ -291,14 +480,15 @@ public:
 namespace DeriveCtx
 {
 void     CtxSplit     ( const CodingStructure& cs, Partitioner& partitioner, unsigned& ctxSpl, unsigned& ctxQt, unsigned& ctxHv, unsigned& ctxHorBt, unsigned& ctxVerBt, bool* canSplit = nullptr );
-unsigned CtxQtCbf     ( const ComponentID compID, const unsigned trDepth, const bool prevCbCbf = false, const int ispIdx = 0 );
+unsigned CtxModeConsFlag( const CodingStructure& cs, Partitioner& partitioner );
+unsigned CtxQtCbf     ( const ComponentID compID, const bool prevCbf = false, const int ispIdx = 0 );
 unsigned CtxInterDir  ( const PredictionUnit& pu );
 unsigned CtxSkipFlag  ( const CodingUnit& cu );
-unsigned CtxIMVFlag   ( const CodingUnit& cu );
 unsigned CtxAffineFlag( const CodingUnit& cu );
-unsigned CtxTriangleFlag( const CodingUnit& cu );
 unsigned CtxPredModeFlag( const CodingUnit& cu );
 unsigned CtxIBCFlag(const CodingUnit& cu);
+unsigned CtxMipFlag   ( const CodingUnit& cu );
+unsigned CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist );
 }
 
 #endif // __CONTEXTMODELLING__
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index dedb921d422e4b6c6e0f9be24b40316af664802d..749b8673911c7b3ddbd2404daa1bcc53481c306c 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -120,24 +120,13 @@ const BinFracBits ProbModelTables::m_binFracBits[256] = {
   { { 0x28beb, 0x0057e } }, { { 0x2a658, 0x004c0 } }, { { 0x2c531, 0x00403 } }, { { 0x2ea40, 0x00346 } },
   { { 0x318a9, 0x0028b } }, { { 0x356cb, 0x001d0 } }, { { 0x3b520, 0x00116 } }, { { 0x48000, 0x0005c } },
 };
-
-const uint16_t ProbModelTables::m_inistateToCount[128] = {
-  614,   647,   681,   718,   756,   797,   839,   884,   932,   982,   1034,  1089,  1148,  1209,  1274,  1342,
-  1414,  1490,  1569,  1653,  1742,  1835,  1933,  2037,  2146,  2261,  2382,  2509,  2643,  2785,  2934,  3091,
-  3256,  3430,  3614,  3807,  4011,  4225,  4452,  4690,  4941,  5205,  5483,  5777,  6086,  6412,  6755,  7116,
-  7497,  7898,  8320,  8766,  9235,  9729,  10249, 10798, 11375, 11984, 12625, 13300, 14012, 14762, 15551, 16384,
-  16384, 17216, 18005, 18755, 19467, 20142, 20783, 21392, 21969, 22518, 23038, 23532, 24001, 24447, 24869, 25270,
-  25651, 26012, 26355, 26681, 26990, 27284, 27562, 27826, 28077, 28315, 28542, 28756, 28960, 29153, 29337, 29511,
-  29676, 29833, 29982, 30124, 30258, 30385, 30506, 30621, 30730, 30834, 30932, 31025, 31114, 31198, 31277, 31353,
-  31425, 31493, 31558, 31619, 31678, 31733, 31785, 31835, 31883, 31928, 31970, 32011, 32049, 32086, 32120, 32153
-};
-
 void BinProbModel_Std::init( int qp, int initId )
 {
-  int slope     = ( ( initId >>  4 )  * 5 ) - 45;
-  int offset    = ( ( initId  & 15 ) << 3 ) - 16;
-  int inistate  = ( ( slope   * qp ) >> 4 ) + offset;
-  const int p1 = m_inistateToCount[inistate < 0 ? 0 : inistate > 127 ? 127 : inistate];
+  int slope = (initId >> 3) - 4;
+  int offset = ((initId & 7) * 18) + 1;
+  int inistate = ((slope   * (qp - 16)) >> 1) + offset;
+  int state_clip = inistate < 1 ? 1 : inistate > 127 ? 127 : inistate;
+  const int p1 = (state_clip << 8);
   m_state[0]   = p1 & MASK_0;
   m_state[1]   = p1 & MASK_1;
 }
@@ -192,592 +181,731 @@ CtxSet ContextSetCfg::addCtxSet( std::initializer_list<std::initializer_list<uin
 }
 
 
-
-#define CNU 154 // dummy initialization value for unused context models 'Context model Not Used'
+#define CNU 35
 std::vector<std::vector<uint8_t>> ContextSetCfg::sm_InitTables(NUMBER_OF_SLICE_TYPES + 1);
 
 // clang-format off
 const CtxSet ContextSetCfg::SplitFlag = ContextSetCfg::addCtxSet
 ({
-  // |-------- do split ctx -------------------|
-  { 122, 124, 141, 108, 125, 156, 138, 126, 143, },
-  { 93, 139, 171, 124, 125, 141, 139, 141, 158, },
-  { 138, 154, 172, 124, 140, 142, 154, 127, 175, },
-  { 9, 13, 8, 8, 13, 12, 5, 10, 12, },
+  {  18,  27,  15,  18,  28,  30,  19,   7,  23, },
+  {  11,  35,  53,  12,   6,  30,  13,  15,  31, },
+  {  19,  28,  38,  27,  29,  38,  28,  38,  31, },
+  {  12,  13,   8,   8,  13,  12,   5,   9,   9, },
 });
 
 const CtxSet ContextSetCfg::SplitQtFlag = ContextSetCfg::addCtxSet
 ({
-  { 138, 140, 142, 136, 138, 140, },
-  { 139, 126, 142, 107, 138, 125, },
-  { 139, 125, 127, 136, 153, 126, },
-  { 0, 8, 8, 12, 12, 8, },
+  {  26,  36,  38,  33,  34,  21, },
+  {  20,  14,  23,  18,  19,   6, },
+  {  27,   6,  15,  25,  19,  22, },
+  {   0,   8,   8,  12,  12,   9, },
 });
 
 const CtxSet ContextSetCfg::SplitHvFlag = ContextSetCfg::addCtxSet
 ({
-  { 154, 168, 155, 139, 155, },
-  { 169, 168, 170, 153, 170, },
-  { 154, 168, 140, 153, 169, },
-  { 10, 9, 9, 8, 8, },
+  {  43,  42,  37,  35,  44, },
+  {  36,  35,  37,  27,  52, },
+  {  43,  42,  29,  27,  44, },
+  {   9,   8,   9,   8,   8, },
 });
 
 const CtxSet ContextSetCfg::Split12Flag = ContextSetCfg::addCtxSet
 ({
-  { 154, 140, 154, 140, },
-  { 169, 155, 154, 140, },
-  { 154, 170, 154, 170, },
-  { 12, 12, 12, 12, },
+  {  28,  29,  28,  29, },
+  {  43,  37,  21,  22, },
+  {  51,  45,  36,  45, },
+  {  12,  13,  12,  13, },
+});
+
+const CtxSet ContextSetCfg::ModeConsFlag = ContextSetCfg::addCtxSet
+({
+  {  25,  20, },
+  {  25,  12, },
+  { CNU, CNU, },
+  {   1,   0, },
 });
 
 const CtxSet ContextSetCfg::SkipFlag = ContextSetCfg::addCtxSet
 ({
-  { 197, 214, 216, },
-  { 197, 198, 185, },
-  { 40, 138, 154, },
-  { 5, 8, 8, },
+  {  57,  60,  53, },
+  {  57,  59,  45, },
+  {   0,  26,  28, },
+  {   5,   4,   8, },
 });
 
 const CtxSet ContextSetCfg::MergeFlag = ContextSetCfg::addCtxSet
 ({
-  { 111, },
-  { 111, },
-  { 153, },
-  { 5, },
+  {   6, },
+  {   6, },
+  {  26, },
+  {   4, },
+});
+
+const CtxSet ContextSetCfg::RegularMergeFlag = ContextSetCfg::addCtxSet
+({
+  {  46,  15, },
+  {  38,   7, },
+  { CNU, CNU, },
+  {   5,   5, },
 });
 
 const CtxSet ContextSetCfg::MergeIdx = ContextSetCfg::addCtxSet
 ({
-  { 138, },
-  { 154, },
-  { 153, },
-  { 8, },
+  {  33, },
+  {  35, },
+  {  34, },
+  {   4, },
 });
 
 const CtxSet ContextSetCfg::MmvdFlag = ContextSetCfg::addCtxSet
 ({
-  { 120, },
-  { 122, },
+  {  40, },
+  {  26, },
   { CNU, },
-  { 8, },
+  {   4, },
 });
 
 const CtxSet ContextSetCfg::MmvdMergeIdx = ContextSetCfg::addCtxSet
 ({
-  { 154, },
-  { 154, },
+  {  43, },
+  {  43, },
   { CNU, },
-  { 10, },
+  {  10, },
 });
 
 const CtxSet ContextSetCfg::MmvdStepMvpIdx = ContextSetCfg::addCtxSet
 ({
-  { 213, },
-  { 244, },
+  {  51, },
+  {  60, },
   { CNU, },
-  { 1, },
-});
-
-const CtxSet ContextSetCfg::PartSize = ContextSetCfg::addCtxSet
-({
-  {  CNU, CNU, CNU, CNU,},
-  {  CNU, CNU, CNU, CNU,},
-  {  CNU, CNU, CNU, CNU,},
-  { DWS, DWS, DWS, DWS, }
+  {   0, },
 });
 
 const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet
 ({
-  { 192, 168, },
-  { 165, 139, },
+  {  40,  35, },
+  {  40,  35, },
   { CNU, CNU, },
-  { 5, 2, },
+  {   5,   1, },
 });
 
 const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet
 ({
-  { 90, 212, CNU, },
-  { 118, 212, CNU, },
-  { 119, 169, CNU, },
-  { 8, 8, DWS, },
+  {  25,  58, },
+  {  25,  50, },
+  {  25,  59, },
+  {   6,   8, },
 });
 
 const CtxSet ContextSetCfg::IntraLumaMpmFlag = ContextSetCfg::addCtxSet
 ({
-  { 154, },
-  { 154, },
-  { 170, },
-  { 6, },
+  {  29, },
+  {  36, },
+  {  45, },
+  {   6, },
+});
+
+const CtxSet ContextSetCfg::IntraLumaPlanarFlag = ContextSetCfg::addCtxSet
+({
+  {  13,   6, },
+  {  12,  20, },
+  {  13,  28, },
+  {   1,   5, },
+});
+
+const CtxSet ContextSetCfg::CclmModeFlag = ContextSetCfg::addCtxSet
+({
+  {  26, },
+  {  34, },
+  {  59, },
+  {   4, },
+});
+
+const CtxSet ContextSetCfg::CclmModeIdx = ContextSetCfg::addCtxSet
+({
+  {  27, },
+  {  27, },
+  {  27, },
+  {   9, },
 });
 
 const CtxSet ContextSetCfg::IntraChromaPredMode = ContextSetCfg::addCtxSet
 ({
-  {  137, 139, 140,},
-  {  138, 139, 169,},
-  {  154, 139, 154,},
-  {    5,   8,   9,},
+  {  25, },
+  {  18, },
+  {  34, },
+  {   5, },
+});
+
+const CtxSet ContextSetCfg::MipFlag = ContextSetCfg::addCtxSet
+({
+  {  56,  57,  50,  26, },
+  {  41,  57,  58,  26, },
+  {  33,  49,  42,  25, },
+  {   9,  10,   9,   6, },
 });
 
 const CtxSet ContextSetCfg::DeltaQP = ContextSetCfg::addCtxSet
 ({
-  {  154, 154, 154,},
-  {  154, 154, 154,},
-  {  154, 154, 154,},
-  { DWS, DWS, DWS, }
+  { CNU, CNU, },
+  { CNU, CNU, },
+  { CNU, CNU, },
+  { DWS, DWS, },
 });
 
 const CtxSet ContextSetCfg::InterDir = ContextSetCfg::addCtxSet
 ({
-  { 111, 125, 110, 94, 192, },
-  { 126, 111, 110, 94, 208, },
-  { CNU, CNU, CNU, CNU, CNU, },
-  { 0, 0, 4, 5, 0, },
+  {  14,   6,   5,   4,   3,  40, },
+  {   7,   6,   5,   4,  11,  40, },
+  { CNU, CNU, CNU, CNU, CNU, CNU, },
+  {   0,   0,   1,   4,   1,   0, },
 });
 
 const CtxSet ContextSetCfg::RefPic = ContextSetCfg::addCtxSet
 ({
-  { 125, 139, },
-  { 138, 168, },
+  {  20,  20, },
+  {  27,  35, },
   { CNU, CNU, },
-  { 4, 5, },
+  {   0,   4, },
+});
+
+const CtxSet ContextSetCfg::SubblockMergeFlag = ContextSetCfg::addCtxSet
+({
+  {  40,  51,  45, },
+  {  48,  57,  44, },
+  { CNU, CNU, CNU, },
+  {   4,   4,   4, },
 });
 
 const CtxSet ContextSetCfg::AffineFlag = ContextSetCfg::addCtxSet
 ({
-  { 179, 169, 171, },
-  { 180, 168, 155, },
+  {  12,  13,   6, },
+  {  12,  13,   6, },
   { CNU, CNU, CNU, },
-  { 8, 5, 4, },
+  {   4,   0,   0, },
 });
 
 const CtxSet ContextSetCfg::AffineType = ContextSetCfg::addCtxSet
 ({
-  { 138, },
-  { 153, },
+  {  35, },
+  {  35, },
   { CNU, },
-  { 4, },
+  {   4, },
 });
 
 const CtxSet ContextSetCfg::AffMergeIdx = ContextSetCfg::addCtxSet
-( {
-  { 109, },
-  { 95, },
+({
+  {   4, },
+  {   5, },
   { CNU, },
-  { 0, },
-} );
+  {   0, },
+});
 
-const CtxSet ContextSetCfg::GBiIdx = ContextSetCfg::addCtxSet
+const CtxSet ContextSetCfg::BcwIdx = ContextSetCfg::addCtxSet
 ({
-  // 4 ctx for 1st bin; 1 ctx for each of rest bins
-  { 228, CNU, CNU, CNU, 125, 155, 175, },
-  { 242, CNU, CNU, CNU, 154, 170, 237, },
-  { CNU, CNU, CNU, CNU, CNU, CNU, CNU, },
-  { 4, DWS, DWS, DWS, 4, 0, 0, },
+  {   5, },
+  {   4, },
+  { CNU, },
+  {   0, },
 });
 
 const CtxSet ContextSetCfg::Mvd = ContextSetCfg::addCtxSet
 ({
-  { 169, 183, },
-  { 155, 154, },
-  { 141, 156, },
-  { 9, 5, },
+  {  51,  58, },
+  {  44,  43, },
+  {  14,  45, },
+  {   9,   5, },
+});
+
+const CtxSet ContextSetCfg::BDPCMMode = ContextSetCfg::addCtxSet
+({
+  {  19,  28, },
+  {  40,  36, },
+  {  19,  35, },
+  {   4,   4, },
 });
 
 const CtxSet ContextSetCfg::QtRootCbf = ContextSetCfg::addCtxSet
 ({
-  { 109, },
-  { 95, },
-  { 110, },
-  { 4, },
+  {  12, },
+  {   5, },
+  {   6, },
+  {   4, },
 });
 
+const CtxSet ContextSetCfg::ACTFlag = ContextSetCfg::addCtxSet
+({
+  {  CNU, },
+  {  CNU, },
+  {  CNU, },
+  {  DWS, },
+  });
+
 const CtxSet ContextSetCfg::QtCbf[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 141, 127, 139, 140, },
-    { 142, 127, 139, 140, },
-    { CNU, 111, 124, 111, },
-    { 1, 5, 9, 8, },
+    {  15,  13,   5,  14, },
+    {  23,   4,  20,   7, },
+    {   7,  19,   5,   7, },
+    {   5,   1,   8,   9, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 163, 154, CNU, CNU, CNU, },
-    { 164, 154, CNU, CNU, CNU, },
-    { 109, CNU, CNU, CNU, CNU, },
-    { 5, 8, DWS, DWS, DWS, },
+    {  25, CNU, },
+    {  25, CNU, },
+    {   4, CNU, },
+    {   5, DWS, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 161, 154, },
-    { 192, 154, },
-    { 151, 155, },
-    { 5, 5, },
-  }),
+    {   9,  44, CNU, },
+    {  25,  29, CNU, },
+    {  33,  28, CNU, },
+    {   2,   1, DWS, },
+  })
 };
 
 const CtxSet ContextSetCfg::SigCoeffGroup[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 105, 155, },
-    { 106, 156, },
-    { 107, 158, },
-    { 8, 5, },
+    {  25,  45, },
+    {  25,  30, },
+    {  18,  31, },
+    {   8,   5, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 91, 155, },
-    { 90, 141, },
-    { 76, 127, },
-    { 5, 8, },
-  }),
-  ContextSetCfg::addCtxSet
-  ({
-    { CNU, CNU, },
-    { CNU, CNU, },
-    { CNU, CNU, },
-    { DWS, DWS, }
-  }),
-  ContextSetCfg::addCtxSet
-  ({
-    { CNU, CNU, },
-    { CNU, CNU, },
-    { CNU, CNU, },
-    { DWS, DWS, }
-  }),
+    {  25,  45, },
+    {  25,  52, },
+    {  25,   7, },
+    {   5,   8, },
+  })
 };
 
 const CtxSet ContextSetCfg::SigFlag[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 88, 166, 152, 182, 168, 154, 0, 167, 182, 168, 183, 155, 193, 213, 183, 183, 169, 185, },
-    { 132, 152, 167, 168, 183, 140, 177, 182, 168, 154, 169, 155, 180, 213, 183, 169, 184, 156, },
-    { 89, 138, 153, 139, 154, 140, 134, 139, 139, 140, 140, 141, 137, 170, 169, 170, 141, 157, },
-    { 12, 9, 9, 9, 9, 10, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 9, },
+    {  17,  41,  49,  36,   1,  49,  50,  37,  48,  51,  58,  45, },
+    {  17,  41,  42,  29,  25,  49,  43,  37,  33,  51,  51,  30, },
+    {  25,  19,  28,  14,  25,  20,  29,  30,  19,  37,  30,  38, },
+    {  12,   9,   9,  10,   9,   9,   9,  10,   8,   8,   8,  10, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 72, 167, 153, 168, 154, 155, 180, 199, 183, 199, 199, 186, },
-    { 133, 138, 153, 139, 154, 140, 181, 229, 169, 229, 170, 157, },
-    { 43, 153, 168, 169, 154, 155, 152, 215, 155, 201, 171, 143, },
-    { 9, 9, 12, 9, 13, 13, 5, 5, 8, 8, 8, 9, },
+    {   9,  49,  42,  21,  48,  59,  59,  53, },
+    {  17,  19,  20,  29,  41,  59,  60,  38, },
+    {  25,  27,  28,  37,  49,  53,  53,  46, },
+    {   9,   9,   9,  13,   4,   5,   8,   9, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 152, 156, 201, 186, 186, 187, 182, 248, 188, 232, 188, 205, 182, 223, 223, 223, 223, 223, },
-    { 123, 142, 157, 172, 172, 218, 138, 249, 248, 248, 219, 223, 139, 223, 223, 223, 223, 223, },
-    { 93, 142, 157, 143, 188, 175, 138, 238, 205, 238, 253, 237, 139, 223, 223, 223, 223, 253, },
-    { 9, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 0, 0, 0, 0, 0, },
+    {  26,  45,  53,  46,  49,  54,  61,  39,  42,  39,  39,  39, },
+    {  19,  38,  38,  46,  34,  54,  54,  39,   6,  39,  39,  39, },
+    {  11,  38,  46,  54,  27,  39,  39,  39,  36,  39,  39,  39, },
+    {   9,  13,   8,   8,   8,   8,   8,   5,   8,   0,   0,   0, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 182, 171, 143, 158, 172, 189, 183, 223, 223, 223, 223, 223, },
-    { 168, 156, 173, 216, 172, 219, 169, 223, 223, 223, 223, 223, },
-    { 152, 173, 157, 187, 204, 253, 170, 223, 223, 223, 223, 223, },
-    { 8, 9, 12, 8, 8, 8, 4, 0, 2, 2, 2, 2, },
+    {  34,  45,  38,  31,  58,  39,  39,  39, },
+    {  35,  45,  53,  54,  44,  39,  39,  39, },
+    {  19,  46,  38,  39,  52,  39,  39,  39, },
+    {   8,  12,   8,   8,   4,   0,   0,   0, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 123, 173, 223, 191, 232, 251, 212, 223, 223, 236, 206, 223, 192, 223, 223, 223, 223, 223, },
-    { 123, 175, 223, 175, 218, 223, 138, 223, 223, 223, 222, 223, 196, 223, 223, 223, 223, 223, },
-    { 107, 174, 223, 238, 251, 223, 63, 223, 223, 238, 223, 238, 12, 223, 223, 223, 223, 223, },
-    { 8, 8, 4, 8, 8, 8, 8, 0, 0, 4, 8, 5, 4, 2, 2, 2, 2, 1, },
+    {  19,  54,  39,  39,  50,  39,  39,  39,   0,  39,  39,  39, },
+    {  19,  39,  54,  39,  19,  39,  39,  39,  56,  39,  39,  39, },
+    {  18,  39,  39,  39,  19,  39,  39,  39,   0,  39,  39,  39, },
+    {   8,   8,   8,   8,   8,   0,   4,   4,   0,   0,   0,   0, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 167, 201, 223, 248, 219, 223, 181, 223, 223, 223, 223, 223, },
-    { 167, 171, 223, 175, 248, 223, 152, 223, 223, 223, 223, 223, },
-    { 166, 234, 223, 236, 248, 223, 108, 223, 223, 223, 223, 223, },
-    { 8, 8, 5, 8, 8, 8, 5, 1, 2, 2, 2, 2, },
-  }),
+    {  34,  38,  54,  39,  41,  39,  39,  39, },
+    {  34,  38,  62,  39,  26,  39,  39,  39, },
+    {  26,  39,  39,  39,  19,  39,  39,  39, },
+    {   8,   8,   8,   8,   4,   0,   0,   0, },
+  })
 };
 
-
 const CtxSet ContextSetCfg::ParFlag[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 121, 105, 136, 152, 138, 183, 90, 122, 167, 153, 168, 135, 152, 153, 168, 139, 151, 153, 139, 168, 154, },
-    { 121, 119, 136, 137, 138, 153, 104, 122, 138, 153, 139, 106, 138, 153, 168, 139, 137, 153, 168, 139, 139, },
-    { 121, 135, 137, 152, 138, 153, 91, 137, 138, 153, 139, 151, 138, 153, 139, 139, 138, 168, 139, 154, 139, },
-    { 8, 9, 12, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13, 13, 13, },
+    {  33,  40,  25,  41,  26,  42,  25,  33,  26,  34,  27,  25,  41,  42,  42,  35,  33,  27,  35,  42,  43, },
+    {  18,  17,  33,  18,  34,  42,  25,  33,  26,  42,  27,  25,  34,  42,  42,  35,  26,  27,  42,  20,  20, },
+    {  33,  25,  18,  26,  34,  27,  25,  26,  19,  42,  35,  33,  19,  27,  35,  35,  34,  42,  20,  43,  20, },
+    {   8,   9,  12,  13,  13,  13,  10,  13,  13,  13,  13,  13,  13,  13,  13,  13,  10,  13,  13,  13,  13, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 151, 120, 152, 138, 153, 153, 136, 168, 154, 168, 154, },
-    { 135, 120, 137, 138, 138, 153, 136, 153, 168, 139, 154, },
-    { 136, 135, 152, 153, 138, 153, 136, 168, 154, 139, 154, },
-    { 8, 10, 12, 12, 13, 13, 10, 10, 13, 13, 13, },
-  }),
+    {  33,  25,  26,  19,  19,  27,  33,  42,  43,  35,  43, },
+    {  25,  25,  26,  11,  19,  27,  33,  42,  50,  20,  43, },
+    {  33,  25,  26,  42,  19,  27,  26,  50,  35,  20,  43, },
+    {   9,  13,  12,  12,  13,  13,  13,  13,  13,  13,  13, },
+  })
 };
 
 const CtxSet ContextSetCfg::GtxFlag[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 31, 73, 118, 75, 152, 109, 42, 44, 105, 107, 109, 0, 119, 136, 152, 124, 118, 136, 138, 153, 140, },
-    { 14, 116, 86, 119, 106, 152, 0, 72, 120, 151, 138, 116, 90, 107, 152, 153, 104, 107, 123, 153, 154, },
-    { 90, 72, 119, 135, 137, 138, 43, 60, 106, 137, 109, 58, 106, 108, 109, 124, 121, 138, 139, 154, 155, },
-    { 4, 1, 8, 8, 4, 2, 5, 9, 9, 8, 9, 9, 9, 9, 8, 9, 9, 8, 9, 8, 8, },
+    {  25,   0,   0,  17,  25,  18,   0,   9,  25,  33,  19,   0,  25,  33,  26,  20,  25,  33,  34,  35,  29, },
+    {  17,   0,   1,  17,  25,  18,   0,   9,  25,  33,  34,   9,  25,  18,  26,  20,  25,  18,  19,  27,  21, },
+    {  25,   1,  40,  25,  33,  11,  17,  25,  25,  18,   4,  17,  33,  26,  19,   5,  33,  19,  20,  28,  22, },
+    {   1,   5,   9,   9,   9,   6,   5,   9,  10,  10,   9,   9,   9,   9,   9,   9,   6,   8,   9,   9,   9, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 119, 101, 134, 151, 107, 123, 118, 122, 124, 140, 155, },
-    { 117, 0, 90, 106, 92, 93, 147, 136, 138, 154, 140, },
-    { 194, 40, 120, 122, 122, 138, 103, 121, 153, 154, 155, },
-    { 2, 5, 8, 8, 8, 6, 6, 8, 8, 8, 7, },
+    {  25,   1,  40,  33,  26,   4,  25,  33,  27,  36,  37, },
+    {  17,   9,  25,  10,   3,   4,  17,  33,  19,  28,  29, },
+    {  48,   9,  25,  18,  26,  35,  25,  26,  35,  28,  37, },
+    {   1,   5,   8,   8,   8,   6,   6,   9,   8,   8,  10, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 43, 177, 181, 168, 154, 170, 133, 167, 139, 154, 155, 164, 153, 154, 169, 155, 181, 183, 169, 185, 186, },
-    { 101, 133, 137, 153, 139, 140, 134, 138, 139, 154, 155, 136, 153, 154, 140, 170, 138, 154, 155, 170, 186, },
-    { 134, 120, 123, 153, 139, 140, 92, 124, 154, 125, 111, 138, 154, 140, 155, 141, 154, 140, 185, 171, 143, },
-    { 8, 5, 9, 9, 12, 9, 9, 10, 13, 12, 10, 9, 10, 10, 10, 10, 8, 9, 8, 8, 10, },
+    {   0,   0,  33,  34,  35,  36,  25,  34,  35,  28,  29,  40,  42,  43,  36,  30,  56,  43,  44,  45,  38, },
+    {   0,  17,  26,  19,  20,  21,  25,  34,  20,  28,  29,  33,  27,  28,  29,  22,  34,  28,  44,  37,  38, },
+    {  25,  25,  11,  27,  20,  21,  18,  12,  28,  21,  22,  34,  28,  29,  29,  30,  28,  29,  45,  30,  23, },
+    {   9,   5,  10,  13,  13,  10,   9,  10,  13,  13,  13,   9,  10,  10,  10,  13,   8,   9,   9,  10,  13, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 0, 178, 153, 154, 140, 140, 196, 170, 186, 157, 188, },
-    { 0, 135, 153, 139, 125, 140, 182, 155, 156, 142, 159, },
-    { 163, 136, 153, 154, 125, 140, 183, 170, 201, 187, 174, },
-    { 6, 9, 10, 12, 12, 10, 5, 9, 8, 8, 9, },
-  }),
+    {   0,  40,  42,  20,  21,  29,  57,  52,  53,  38,  46, },
+    {   0,  25,  27,  20,  13,   6,  57,  52,  30,  38,  31, },
+    {  40,  33,  27,  28,  21,  37,  51,  37,  53,  38,  46, },
+    {   9,   9,  10,  12,  12,  10,   5,   9,   9,   9,  12, },
+  })
 };
 
 const CtxSet ContextSetCfg::LastX[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 111, 111, 110, 111, 111, 139, 111, 126, 111, 139, 126, 126, 111, 111, 169, 154, 111, 110, 110, 139, CNU, CNU, CNU, CNU, CNU, },
-    { 125, 110, 109, 125, 125, 123, 111, 111, 95, 123, 126, 111, 110, 95, 169, 154, 140, 139, 139, 138, CNU, CNU, CNU, CNU, CNU, },
-    { 125, 140, 124, 111, 111, 109, 111, 126, 125, 123, 111, 141, 111, 125, 79, 155, 142, 170, 140, 183, CNU, CNU, CNU, CNU, CNU, },
-    { 8, 5, 5, 5, 4, 4, 5, 4, 4, 0, 5, 1, 0, 0, 0, 1, 1, 0, 0, 0, DWS, DWS, DWS, DWS, DWS, },
+    {  14,   6,   5,   7,  14,   4,   7,   7,   6,  12,  29,   7,   6,   6,  20,  28,   7,  13,  13,  20, },
+    {   6,  13,  12,   6,   6,   4,  14,  14,   5,  12,  29,  14,  13,   5,  36,  28,  14,  13,  20,  19, },
+    {  13,   5,   4,   6,  14,   4,   6,  14,  21,  11,  14,   7,  14,  13,  11,  21,  37,  37,  21,  50, },
+    {   8,   5,   4,   5,   4,   4,   5,   4,   1,   0,   4,   1,   0,   0,   0,   1,   1,   0,   0,   0, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 122, 124, 63, CNU, },
-    { 138, 123, 92, CNU, },
-    { 138, 108, 47, CNU, },
-    { 2, 1, 1, DWS, },
-  }),
+    {  11,   5,   3, },
+    {  12,   4,  18, },
+    {  12,   4,   3, },
+    {   2,   1,   1, },
+  })
 };
 
 const CtxSet ContextSetCfg::LastY[] =
 {
   ContextSetCfg::addCtxSet
   ({
-    { 125, 125, 139, 125, 111, 139, 111, 111, 110, 110, 140, 126, 125, 125, 140, 139, 111, 110, 124, 181, CNU, CNU, CNU, CNU, CNU, },
-    { 95, 95, 109, 110, 110, 108, 125, 111, 124, 123, 140, 111, 110, 124, 139, 125, 126, 110, 124, 182, CNU, CNU, CNU, CNU, CNU, },
-    { 110, 110, 109, 125, 111, 123, 111, 126, 95, 108, 111, 127, 111, 95, 78, 169, 157, 141, 125, 138, CNU, CNU, CNU, CNU, CNU, },
-    { 8, 5, 8, 5, 5, 4, 5, 5, 4, 0, 5, 5, 1, 0, 0, 1, 4, 1, 0, 0, DWS, DWS, DWS, DWS, DWS, },
+    {  13,   5,   5,   6,   6,  12,  14,   6,   5,   5,  14,   7,   5,  12,  21,  13,   7,  13,  12,  41, },
+    {   5,   5,  12,   6,   6,  19,   6,  14,   5,  19,  29,   7,  13,   5,  36,  21,   7,  13,   5,  27, },
+    {  13,   5,   4,   6,   6,  11,  14,  14,   5,  11,  14,  22,  14,  12,   3,  21,  37,  52,  28,  34, },
+    {   8,   5,   8,   5,   5,   4,   5,   5,   4,   0,   5,   5,   1,   0,   0,   1,   4,   0,   0,   0, },
   }),
   ContextSetCfg::addCtxSet
   ({
-    { 122, 124, 123, CNU, },
-    { 108, 123, 121, CNU, },
-    { 123, 123, 91, CNU, },
-    { 2, 2, 2, DWS, },
-  }),
+    {  11,   5,  19, },
+    {  11,   4,  18, },
+    {  12,   4,   3, },
+    {   6,   2,   2, },
+  })
 };
 
-
 const CtxSet ContextSetCfg::MVPIdx = ContextSetCfg::addCtxSet
 ({
-  { 153, },
-  { 168, },
-  { 168, },
-  { 10, },
+  {  34, },
+  {  34, },
+  {  42, },
+  {  12, },
 });
 
 const CtxSet ContextSetCfg::SmvdFlag = ContextSetCfg::addCtxSet
-( {
-  { 154, },
-  { 125, },
+({
+  {  50, },
+  {  28, },
   { CNU, },
-  { 8, },
-} );
+  {   5, },
+});
 
 const CtxSet ContextSetCfg::SaoMergeFlag = ContextSetCfg::addCtxSet
 ({
-  { 47, },
-  { 244, },
-  { 199, },
-  { 0, },
+  {  10, },
+  {  60, },
+  {  52, },
+  {   0, },
 });
 
 const CtxSet ContextSetCfg::SaoTypeIdx = ContextSetCfg::addCtxSet
 ({
-  { 47, },
-  { 95, },
-  { 95, },
-  { 0, },
+  {  10, },
+  {   5, },
+  {   5, },
+  {   0, },
 });
 
+const CtxSet ContextSetCfg::LFNSTIdx = ContextSetCfg::addCtxSet
+({
+  {  52,  37,  33, },
+  {  45,  45,  18, },
+  { CNU,  52,  33, },
+  {   9,   9,   5, },
+});
 
-const CtxSet ContextSetCfg::TransquantBypassFlag = ContextSetCfg::addCtxSet
+const CtxSet ContextSetCfg::PLTFlag = ContextSetCfg::addCtxSet
 ({
-  {  154,},
-  {  154,},
-  {  154,},
-  { DWS, }
+  { CNU, },
+  { CNU, },
+  { CNU, },
+  { DWS, },
+});
+
+const CtxSet ContextSetCfg::RotationFlag = ContextSetCfg::addCtxSet
+({
+  { CNU, },
+  { CNU, },
+  { CNU, },
+  { DWS, },
+});
+
+const CtxSet ContextSetCfg::RunTypeFlag = ContextSetCfg::addCtxSet
+({
+  { CNU, },
+  { CNU, },
+  { CNU, },
+  { DWS, },
+});
+
+const CtxSet ContextSetCfg::IdxRunModel = ContextSetCfg::addCtxSet
+({
+  { CNU, CNU, CNU, CNU, CNU, },
+  { CNU, CNU, CNU, CNU, CNU, },
+  { CNU, CNU, CNU, CNU, CNU, },
+  { DWS, DWS, DWS, DWS, DWS, },
+});
+
+const CtxSet ContextSetCfg::CopyRunModel = ContextSetCfg::addCtxSet
+({
+  { CNU, CNU, CNU, },
+  { CNU, CNU, CNU, },
+  { CNU, CNU, CNU, },
+  { DWS, DWS, DWS, },
 });
 
 const CtxSet ContextSetCfg::RdpcmFlag = ContextSetCfg::addCtxSet
 ({
-  {  139, 139,},
-  {  139, 139,},
-  {  CNU, CNU,},
-  { DWS, DWS, }
+  { CNU, CNU, },
+  { CNU, CNU, },
+  { CNU, CNU, },
+  { DWS, DWS, },
 });
 
 const CtxSet ContextSetCfg::RdpcmDir = ContextSetCfg::addCtxSet
 ({
-  {  139, 139,},
-  {  139, 139,},
-  {  CNU, CNU,},
-  { DWS, DWS, }
+  { CNU, CNU, },
+  { CNU, CNU, },
+  { CNU, CNU, },
+  { DWS, DWS, },
+});
+
+const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet
+({
+    { 25,  17, },
+    { 25,  17, },
+    { 25,   1, },
+    {  1,   1, },
 });
 
-const CtxSet ContextSetCfg::MTSIndex = ContextSetCfg::addCtxSet
+const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
 ({
-  { CNU, 155, 155, 140, 140, CNU, 216, 153, 153, 0, CNU, },
-  { CNU, 155, 155, 140, 140, CNU, 233, 167, 153, 0, CNU, },
-  { CNU, CNU, 140, 140, 140, CNU, 219, 138, 153, 0, CNU, },
-  { DWS, 8, 8, 8, 8, DWS, 4, 8, 9, 3, DWS, },
+    { 37,  25,  27,   0, },
+    { 30,  40,  27,   0, },
+    { 13,   0,  35,   0, },
+    {  8,   0,   9,   0, },
 });
 
 const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet
 ({
-  { 152, 154, },
-  { 166, 154, },
-  { 152, 154, },
-  { 8, 5, },
+  {  33,  43, },
+  {  33,  36, },
+  {  33,  43, },
+  {   9,   2, },
 });
 
 const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet
-( {
-  { 168, 183, },
-  { 197, 183, },
+({
+  {  49,  50, },
+  {  49,  50, },
   { CNU, CNU, },
-  { 4, 8, },
-} );
+  {   1,   5, },
+});
 
 const CtxSet ContextSetCfg::SbtQuadFlag = ContextSetCfg::addCtxSet
-( {
-  { 168, },
-  { 168, },
+({
+  {  42, },
+  {  42, },
   { CNU, },
-  { 9, },
-} );
+  {  10, },
+});
 
 const CtxSet ContextSetCfg::SbtHorFlag = ContextSetCfg::addCtxSet
-( {
-  { 139, 154, 139, },
-  { 139, 154, 139, },
+({
+  {  35,  51,  27, },
+  {  20,  43,  12, },
   { CNU, CNU, CNU, },
-  { 8, 5, 4, },
-} );
+  {   8,   4,   1, },
+});
 
 const CtxSet ContextSetCfg::SbtPosFlag = ContextSetCfg::addCtxSet
-( {
-  { 154, },
-  { 154, },
+({
+  {  28, },
+  {  28, },
   { CNU, },
-  { 13, },
-} );
+  {  13, },
+});
 
 const CtxSet ContextSetCfg::CrossCompPred = ContextSetCfg::addCtxSet
 ({
-  {  154, 154, 154, 154, 154, 154, 154, 154, 154, 154,},
-  {  154, 154, 154, 154, 154, 154, 154, 154, 154, 154,},
-  {  154, 154, 154, 154, 154, 154, 154, 154, 154, 154,},
-  { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, }
+  { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, },
+  { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, },
+  { CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, CNU, },
+  { DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, DWS, },
 });
 
 const CtxSet ContextSetCfg::ChromaQpAdjFlag = ContextSetCfg::addCtxSet
 ({
-  {  154,},
-  {  154,},
-  {  154,},
-  { DWS, }
+  { CNU, },
+  { CNU, },
+  { CNU, },
+  { DWS, },
 });
 
 const CtxSet ContextSetCfg::ChromaQpAdjIdc = ContextSetCfg::addCtxSet
 ({
-  {  154,},
-  {  154,},
-  {  154,},
-  { DWS, }
+  { CNU, },
+  { CNU, },
+  { CNU, },
+  { DWS, },
 });
 
 const CtxSet ContextSetCfg::ImvFlag = ContextSetCfg::addCtxSet
 ({
-  { 212, 199, 215, 180, 183, 242, },
-  { 213, 229, 244, 166, 198, 244, },
-  { CNU, CNU, CNU, 152, CNU, CNU, },
-  { 1, 4, 4, 5, 1, 0, },
+  {  51,  33,  50,  60,  45, },
+  {  59,  48,  58,  60,  60, },
+  { CNU,  34, CNU, CNU, CNU, },
+  {   0,   5,   1,   0,   4, },
 });
 
-const CtxSet ContextSetCfg::ctbAlfFlag =
-{
-  ContextSetCfg::addCtxSet
-  ( {
-    { 154, 186, 174, 183, 233, 250, 168, 248, 250, },
-    { 139, 186, 203, 183, 247, 249, 183, 232, 249, },
-    { 219, 236, 238, 232, 249, 235, 246, 234, 251, },
-    { 0, 0, 4, 0, 0, 1, 0, 0, 1, },
-  } )
-};
-
-const CtxSet ContextSetCfg::MHIntraFlag = ContextSetCfg::addCtxSet
+const CtxSet ContextSetCfg::ctbAlfFlag = ContextSetCfg::addCtxSet
 ({
-  { 225, },
-  { 197, },
-  { CNU, },
-  { 1, },
+  {  26,  52,  46,  18,  61,  54,  18,  61,  54, },
+  {   6,  23,  46,  12,  61,  54,   5,  46,  54, },
+  {  39,  39,  39,  62,  39,  39,  31,  39,  39, },
+  {   0,   0,   0,   0,   0,   0,   0,   0,   0, },
 });
 
-const CtxSet ContextSetCfg::MHIntraPredMode = ContextSetCfg::addCtxSet
+const CtxSet ContextSetCfg::ctbAlfAlternative = ContextSetCfg::addCtxSet
 ({
-  { 156, CNU, CNU, CNU, },
-  { 156, CNU, CNU, CNU, },
-  { CNU, CNU, CNU, CNU, },
-  { 9, DWS, DWS, DWS, },
+  {  11,  11, },
+  {  20,  12, },
+  {  28,  28, },
+  {   0,   0, },
 });
 
-const CtxSet ContextSetCfg::TriangleFlag = ContextSetCfg::addCtxSet
+
+const CtxSet ContextSetCfg::AlfUseTemporalFilt = ContextSetCfg::addCtxSet
 ({
-  { 149, 123, 123, },
-  { 151, 152, 138, },
-  { CNU, CNU, CNU, },
-  { 8, 12, 9, },
+  {  46, },
+  {  53, },
+  {  46, },
+  {   0, },
 });
 
-const CtxSet ContextSetCfg::TriangleIdx = ContextSetCfg::addCtxSet
+const CtxSet ContextSetCfg::CiipFlag = ContextSetCfg::addCtxSet
 ({
+  {  50, },
+  {  50, },
   { CNU, },
-  { CNU, },
-  { CNU, },
-  { DWS, },
+  {   1, },
 });
-// clang-format on
 
 const CtxSet ContextSetCfg::IBCFlag = ContextSetCfg::addCtxSet
 ({
-  { 0, 154, 141, },
-  { 0, 153, 140, },
-  { 132, 153, 125, },
-  { 5, 5, 8, },
+  {   0,  43,  45, },
+  {   0,  42,  37, },
+  {  17,  42,  36, },
+  {   1,   5,   8, },
 });
 
+const CtxSet ContextSetCfg::JointCbCrFlag = ContextSetCfg::addCtxSet
+({
+  {  43,  51,  45, },
+  {  35,  44,  45, },
+  {  35,  29,  51, },
+  {   1,   1,   0, },
+});
+
+const CtxSet ContextSetCfg::TsSigCoeffGroup = ContextSetCfg::addCtxSet
+({
+  {  18,  35,  37, },
+  {  18,  12,  29, },
+  {  18,  20,  38, },
+  {   5,   8,   8, },
+});
+
+const CtxSet ContextSetCfg::TsSigFlag = ContextSetCfg::addCtxSet
+({
+  {  25,  50,  37, },
+  {  40,  35,  44, },
+  {  25,  28,  38, },
+  {  13,  13,   8, },
+});
+
+const CtxSet ContextSetCfg::TsParFlag = ContextSetCfg::addCtxSet
+({
+  {  11, },
+  {   3, },
+  {  11, },
+  {   6, },
+});
+
+const CtxSet ContextSetCfg::TsGtxFlag = ContextSetCfg::addCtxSet
+({
+  { CNU,  10,   4,   4,   5, },
+  { CNU,   2,   3,   3,  11, },
+  { CNU,  10,   3,   3,   3, },
+  { DWS,   1,   1,   1,   1, },
+});
+
+const CtxSet ContextSetCfg::TsLrg1Flag = ContextSetCfg::addCtxSet
+({
+  {  19,  11,   4,   6, },
+  {  18,  11,   4,  28, },
+  {  11,   5,   5,   6, },
+  {   4,   2,   1,   6, },
+});
+
+const CtxSet ContextSetCfg::TsResidualSign = ContextSetCfg::addCtxSet
+({
+  {  28,  25,  53,  28,  33,  30, },
+  {   5,  10,  53,  35,  25,  53, },
+  {  20,  17,  46,  20,  25,  46, },
+  {   1,   4,   4,   8,   8,   8, },
+});
+// clang-format on
+
 const unsigned ContextSetCfg::NumberOfContexts = (unsigned)ContextSetCfg::sm_InitTables[0].size();
 
 
 // combined sets
+const CtxSet ContextSetCfg::Palette = { ContextSetCfg::RotationFlag, ContextSetCfg::RunTypeFlag, ContextSetCfg::IdxRunModel, ContextSetCfg::CopyRunModel };
 const CtxSet ContextSetCfg::Sao = { ContextSetCfg::SaoMergeFlag, ContextSetCfg::SaoTypeIdx };
 
-
+const CtxSet ContextSetCfg::Alf = { ContextSetCfg::ctbAlfFlag, ContextSetCfg::ctbAlfAlternative, ContextSetCfg::AlfUseTemporalFilt };
 
 template <class BinProbModel>
 CtxStore<BinProbModel>::CtxStore()
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index e52842dead764732f268c30aadecc35b3fe2b18d..e831206ce0d8cb2ab264fcfc706bff726236383e 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,7 +67,6 @@ class ProbModelTables
 {
 protected:
   static const BinFracBits m_binFracBits[256];
-  static const uint16_t    m_inistateToCount[128];
   static const uint8_t      m_RenormTable_32  [ 32];          // Std         MP   MPI
 };
 
@@ -199,37 +198,57 @@ public:
   static const CtxSet   SplitQtFlag;
   static const CtxSet   SplitHvFlag;
   static const CtxSet   Split12Flag;
+  static const CtxSet   ModeConsFlag;
   static const CtxSet   SkipFlag;
   static const CtxSet   MergeFlag;
+  static const CtxSet   RegularMergeFlag;
   static const CtxSet   MergeIdx;
-  static const CtxSet   PartSize;
   static const CtxSet   PredMode;
   static const CtxSet   MultiRefLineIdx;
   static const CtxSet   IntraLumaMpmFlag;
+  static const CtxSet   IntraLumaPlanarFlag;
+  static const CtxSet   CclmModeFlag;
+  static const CtxSet   CclmModeIdx;
   static const CtxSet   IntraChromaPredMode;
+  static const CtxSet   MipFlag;
   static const CtxSet   DeltaQP;
   static const CtxSet   InterDir;
   static const CtxSet   RefPic;
   static const CtxSet   MmvdFlag;
   static const CtxSet   MmvdMergeIdx;
   static const CtxSet   MmvdStepMvpIdx;
+  static const CtxSet   SubblockMergeFlag;
   static const CtxSet   AffineFlag;
   static const CtxSet   AffineType;
   static const CtxSet   AffMergeIdx;
   static const CtxSet   Mvd;
+  static const CtxSet   BDPCMMode;
   static const CtxSet   QtRootCbf;
+  static const CtxSet   ACTFlag;
   static const CtxSet   QtCbf           [3];    // [ channel ]
-  static const CtxSet   SigCoeffGroup   [4];    // [ ChannelType ]
+  static const CtxSet   SigCoeffGroup   [2];    // [ ChannelType ]
   static const CtxSet   LastX           [2];    // [ ChannelType ]
   static const CtxSet   LastY           [2];    // [ ChannelType ]
   static const CtxSet   SigFlag         [6];    // [ ChannelType + State ]
   static const CtxSet   ParFlag         [2];    // [ ChannelType ]
   static const CtxSet   GtxFlag         [4];    // [ ChannelType + x ]
+  static const CtxSet   TsSigCoeffGroup;
+  static const CtxSet   TsSigFlag;
+  static const CtxSet   TsParFlag;
+  static const CtxSet   TsGtxFlag;
+  static const CtxSet   TsLrg1Flag;
+  static const CtxSet   TsResidualSign;
   static const CtxSet   MVPIdx;
   static const CtxSet   SaoMergeFlag;
   static const CtxSet   SaoTypeIdx;
-  static const CtxSet   MTSIndex;
-  static const CtxSet   TransquantBypassFlag;
+  static const CtxSet   TransformSkipFlag;
+  static const CtxSet   MTSIdx;
+  static const CtxSet   LFNSTIdx;
+  static const CtxSet   PLTFlag;
+  static const CtxSet   RotationFlag;
+  static const CtxSet   RunTypeFlag;
+  static const CtxSet   IdxRunModel;
+  static const CtxSet   CopyRunModel;
   static const CtxSet   RdpcmFlag;
   static const CtxSet   RdpcmDir;
   static const CtxSet   SbtFlag;
@@ -240,21 +259,23 @@ public:
   static const CtxSet   ChromaQpAdjFlag;
   static const CtxSet   ChromaQpAdjIdc;
   static const CtxSet   ImvFlag;
-  static const CtxSet   GBiIdx;
+  static const CtxSet   BcwIdx;
   static const CtxSet   ctbAlfFlag;
-  static const CtxSet   MHIntraFlag;
-  static const CtxSet   MHIntraPredMode;
-  static const CtxSet   TriangleFlag;
-  static const CtxSet   TriangleIdx;
+  static const CtxSet   ctbAlfAlternative;
+  static const CtxSet   AlfUseTemporalFilt;
+  static const CtxSet   CiipFlag;
   static const CtxSet   SmvdFlag;
   static const CtxSet   IBCFlag;
   static const CtxSet   ISPMode;
+  static const CtxSet   JointCbCrFlag;
   static const unsigned NumberOfContexts;
 
   // combined sets for less complex copying
   // NOTE: The contained CtxSet's should directly follow each other in the initalization list;
   //       otherwise, you will copy more elements than you want !!!
   static const CtxSet   Sao;
+  static const CtxSet   Alf;
+  static const CtxSet   Palette;
 
 public:
   static const std::vector<uint8_t>&  getInitTable( unsigned initId );
@@ -419,7 +440,7 @@ private:
   CtxStore<BinProbModel_Std>    m_CtxStore_Std;
 protected:
   unsigned                      m_GRAdaptStats[RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS];
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
 public:
   int64_t cacheId;
diff --git a/source/Lib/CommonLib/CrossCompPrediction.cpp b/source/Lib/CommonLib/CrossCompPrediction.cpp
index ea637e7befec8514e47051d0cbe75e09baceb419..95d99cfc3c2fadfff5c0edc769090c708ddd0397 100644
--- a/source/Lib/CommonLib/CrossCompPrediction.cpp
+++ b/source/Lib/CommonLib/CrossCompPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/CrossCompPrediction.h b/source/Lib/CommonLib/CrossCompPrediction.h
index 5e35a662a6769747827c2a01469d3897c773b16b..f4bb776bd363db942a802d6bdb59c12d87813750 100644
--- a/source/Lib/CommonLib/CrossCompPrediction.h
+++ b/source/Lib/CommonLib/CrossCompPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp
index 8d46daf033252a13f098cbfa59a39d2e13198be8..af4a5757a7efbcb4a6074b813f4be1df3184acf1 100644
--- a/source/Lib/CommonLib/DepQuant.cpp
+++ b/source/Lib/CommonLib/DepQuant.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -89,6 +89,10 @@ namespace DQIntern
     int           nextSbbBelow;
     int           posX;
     int           posY;
+    ChannelType   chType;
+    int           sbtInfo;
+    int           tuWidth;
+    int           tuHeight;
   };
 
   class Rom;
@@ -128,19 +132,19 @@ namespace DQIntern
     Rom() : m_scansInitialized(false) {}
     ~Rom() { xUninitScanArrays(); }
     void                init        ()                       { xInitScanArrays(); }
-    const NbInfoSbb*    getNbInfoSbb( int hd, int vd, int ch ) const { return m_scanId2NbInfoSbbArray[hd][vd][ch]; }
-    const NbInfoOut*    getNbInfoOut( int hd, int vd, int ch ) const { return m_scanId2NbInfoOutArray[hd][vd][ch]; }
+    const NbInfoSbb*    getNbInfoSbb( int hd, int vd ) const { return m_scanId2NbInfoSbbArray[hd][vd]; }
+    const NbInfoOut*    getNbInfoOut( int hd, int vd ) const { return m_scanId2NbInfoOutArray[hd][vd]; }
     const TUParameters* getTUPars   ( const CompArea& area, const ComponentID compID ) const
     {
-      return m_tuParameters[g_aucLog2[area.width]][g_aucLog2[area.height]][toChannelType(compID)];
+      return m_tuParameters[floorLog2(area.width)][floorLog2(area.height)][toChannelType(compID)];
     }
   private:
     void  xInitScanArrays   ();
     void  xUninitScanArrays ();
   private:
     bool          m_scansInitialized;
-    NbInfoSbb*    m_scanId2NbInfoSbbArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ];
-    NbInfoOut*    m_scanId2NbInfoOutArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ];
+    NbInfoSbb*    m_scanId2NbInfoSbbArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ];
+    NbInfoOut*    m_scanId2NbInfoOutArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ];
     TUParameters* m_tuParameters         [ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ];
   };
 
@@ -157,8 +161,6 @@ namespace DQIntern
     uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ];
     ::memset(raster2id, 0, sizeof(raster2id));
 
-    for( int ch = 0; ch < MAX_NUM_CHANNEL_TYPE; ch++ )
-    {
     for( int hd = 0; hd <= MAX_CU_DEPTH; hd++ )
     {
       for( int vd = 0; vd <= MAX_CU_DEPTH; vd++ )
@@ -169,17 +171,17 @@ namespace DQIntern
         }
         const uint32_t      blockWidth    = (1 << hd);
         const uint32_t      blockHeight   = (1 << vd);
-        const uint32_t      log2CGWidth   = g_log2SbbSize[ch][hd][vd][0];
-        const uint32_t      log2CGHeight  = g_log2SbbSize[ch][hd][vd][1];
+        const uint32_t      log2CGWidth   = g_log2SbbSize[hd][vd][0];
+        const uint32_t      log2CGHeight  = g_log2SbbSize[hd][vd][1];
         const uint32_t      groupWidth    = 1 << log2CGWidth;
         const uint32_t      groupHeight   = 1 << log2CGHeight;
         const uint32_t      groupSize     = groupWidth * groupHeight;
         const CoeffScanType scanType      = SCAN_DIAG;
         const SizeType      blkWidthIdx   = gp_sizeIdxInfo->idxFrom( blockWidth  );
         const SizeType      blkHeightIdx  = gp_sizeIdxInfo->idxFrom( blockHeight );
-        const ScanElement * scanId2RP     = g_scanOrder[ch][SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx];
-        NbInfoSbb*&         sId2NbSbb     = m_scanId2NbInfoSbbArray[hd][vd][ch];
-        NbInfoOut*&         sId2NbOut     = m_scanId2NbInfoOutArray[hd][vd][ch];
+        const ScanElement * scanId2RP     = g_scanOrder[SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx];
+        NbInfoSbb*&         sId2NbSbb     = m_scanId2NbInfoSbbArray[hd][vd];
+        NbInfoOut*&         sId2NbOut     = m_scanId2NbInfoOutArray[hd][vd];
         // consider only non-zero-out region
         const uint32_t      blkWidthNZOut = std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockWidth  );
         const uint32_t      blkHeightNZOut= std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockHeight );
@@ -289,10 +291,12 @@ namespace DQIntern
           nbOut.maxDist -= scanId;
         }
 
-        m_tuParameters[hd][vd][ch] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(ch) );
+        for( int chId = 0; chId < MAX_NUM_CHANNEL_TYPE; chId++ )
+        {
+          m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) );
+        }
       }
     }
-    }
     m_scansInitialized = true;
   }
 
@@ -306,19 +310,19 @@ namespace DQIntern
     {
       for( int vd = 0; vd <= MAX_CU_DEPTH; vd++ )
       {
-        for( int ch = 0; ch < 2; ch++ )
+        NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd];
+        NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd];
+        if( sId2NbSbb )
         {
-          NbInfoSbb*&     sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd][ch];
-          NbInfoOut*&     sId2NbOut = m_scanId2NbInfoOutArray[hd][vd][ch];
-          TUParameters*&  tuPars    = m_tuParameters         [hd][vd][ch];
-          if( sId2NbSbb )
-          {
-            delete [] sId2NbSbb;
-          }
-          if( sId2NbOut )
-          {
-            delete [] sId2NbOut;
-          }
+          delete [] sId2NbSbb;
+        }
+        if( sId2NbOut )
+        {
+          delete [] sId2NbOut;
+        }
+        for( int chId = 0; chId < MAX_NUM_CHANNEL_TYPE; chId++ )
+        {
+          TUParameters*& tuPars = m_tuParameters[hd][vd][chId];
           if( tuPars )
           {
             delete tuPars;
@@ -341,30 +345,25 @@ namespace DQIntern
     const uint32_t nonzeroWidth  = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_width);
     const uint32_t nonzeroHeight = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_height);
     m_numCoeff                   = nonzeroWidth * nonzeroHeight;
-    m_log2SbbWidth        = g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][0];
-    m_log2SbbHeight       = g_log2SbbSize[m_chType][ g_aucLog2[m_width] ][ g_aucLog2[m_height] ][1];
+    const int log2W       = floorLog2( m_width  );
+    const int log2H       = floorLog2( m_height );
+    m_log2SbbWidth        = g_log2SbbSize[ log2W ][ log2H ][0];
+    m_log2SbbHeight       = g_log2SbbSize[ log2W ][ log2H ][1];
     m_log2SbbSize         = m_log2SbbWidth + m_log2SbbHeight;
     m_sbbSize             = ( 1 << m_log2SbbSize );
     m_sbbMask             = m_sbbSize - 1;
     m_widthInSbb  = nonzeroWidth >> m_log2SbbWidth;
     m_heightInSbb = nonzeroHeight >> m_log2SbbHeight;
     m_numSbb              = m_widthInSbb * m_heightInSbb;
-#if HEVC_USE_MDCS
-#error "MDCS is not supported" // use different function...
-    //  m_scanType            = CoeffScanType( TU::getCoefScanIdx( tu, m_compID ) );
-#else
     m_scanType            = SCAN_DIAG;
-#endif
     SizeType        hsbb  = gp_sizeIdxInfo->idxFrom( m_widthInSbb  );
     SizeType        vsbb  = gp_sizeIdxInfo->idxFrom( m_heightInSbb );
     SizeType        hsId  = gp_sizeIdxInfo->idxFrom( m_width  );
     SizeType        vsId  = gp_sizeIdxInfo->idxFrom( m_height );
-    m_scanSbbId2SbbPos    = g_scanOrder     [ chType ][ SCAN_UNGROUPED   ][ m_scanType ][ hsbb ][ vsbb ];
-    m_scanId2BlkPos       = g_scanOrder     [ chType ][ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ];
-    int log2W             = g_aucLog2[ m_width  ];
-    int log2H             = g_aucLog2[ m_height ];
-    m_scanId2NbInfoSbb    = rom.getNbInfoSbb( log2W, log2H, chType );
-    m_scanId2NbInfoOut    = rom.getNbInfoOut( log2W, log2H, chType );
+    m_scanSbbId2SbbPos    = g_scanOrder     [ SCAN_UNGROUPED   ][ m_scanType ][ hsbb ][ vsbb ];
+    m_scanId2BlkPos       = g_scanOrder     [ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ];
+    m_scanId2NbInfoSbb    = rom.getNbInfoSbb( log2W, log2H );
+    m_scanId2NbInfoOut    = rom.getNbInfoOut( log2W, log2H );
     m_scanInfo            = new ScanInfo[ m_numCoeff ];
     for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ )
     {
@@ -375,6 +374,9 @@ namespace DQIntern
 
   void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx )
   {
+    scanInfo.chType     = m_chType;
+    scanInfo.tuWidth    = m_width;
+    scanInfo.tuHeight   = m_height;
     scanInfo.sbbSize    = m_sbbSize;
     scanInfo.numSbb     = m_numSbb;
     scanInfo.scanIdx    = scanIdx;
@@ -395,12 +397,12 @@ namespace DQIntern
       const int diag        = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y;
       if( m_chType == CHANNEL_TYPE_LUMA )
       {
-        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 12 : diag < 5 ?  6 : 0 );
+        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ?  4 : 0 );
         scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 );
       }
       else
       {
-        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 6 : 0 );
+        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 );
         scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 );
       }
       scanInfo.nextInsidePos      = nextScanIdx & m_sbbMask;
@@ -446,7 +448,7 @@ namespace DQIntern
     static const unsigned sm_numCtxSetsSig    = 3;
     static const unsigned sm_numCtxSetsGtx    = 2;
     static const unsigned sm_maxNumSigSbbCtx  = 2;
-    static const unsigned sm_maxNumSigCtx     = 18;
+    static const unsigned sm_maxNumSigCtx     = 12;
     static const unsigned sm_maxNumGtxCtx     = 21;
 
   private:
@@ -486,7 +488,7 @@ namespace DQIntern
       {
         bool rootCbfSoFar = false;
         bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
-        uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> g_aucLog2[tu.lheight()] : tu.cu->lwidth() >> g_aucLog2[tu.lwidth()];
+        uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth());
         if( isLastSubPartition )
         {
           TransformUnit* tuPointer = tu.cu->firstTU;
@@ -504,11 +506,11 @@ namespace DQIntern
         {
           prevLumaCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
         }
-        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.depth, prevLumaCbf, true)));
+        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, prevLumaCbf, true)));
       }
       else
       {
-        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.depth, tu.cbf[COMPONENT_Cb])));
+        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.cbf[COMPONENT_Cb])));
       }
       cbfDeltaBits = lastCbfIsInferred ? 0 : int32_t(bits.intBits[1]) - int32_t(bits.intBits[0]);
     }
@@ -520,12 +522,8 @@ namespace DQIntern
       int32_t             bitOffset   = ( xy ? cbfDeltaBits : 0 );
       int32_t*            lastBits    = ( xy ? m_lastBitsY : m_lastBitsX );
       const unsigned      size        = ( xy ? tuPars.m_height : tuPars.m_width );
-      const unsigned      log2Size    = g_aucNextLog2[ size ];
-#if HEVC_USE_MDCS
-      const bool          useYCtx     = ( m_scanType == SCAN_VER ? ( xy == 0 ) : ( xy != 0 ) );
-#else
+      const unsigned      log2Size    = ceilLog2( size );
       const bool          useYCtx     = ( xy != 0 );
-#endif
       const CtxSet&       ctxSetLast  = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ];
       const unsigned      lastShift   = ( compID == COMPONENT_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) );
       const unsigned      lastOffset  = ( compID == COMPONENT_Y ? ( prefixCtx[log2Size] ) : 0 );
@@ -560,7 +558,7 @@ namespace DQIntern
     {
       BinFracBits*    bits    = m_sigFracBits [ ctxSetId ];
       const CtxSet&   ctxSet  = Ctx::SigFlag  [ chType + 2*ctxSetId ];
-      const unsigned  numCtx  = ( chType == CHANNEL_TYPE_LUMA ? 18 : 12 );
+      const unsigned  numCtx  = ( chType == CHANNEL_TYPE_LUMA ? 12 : 8 );
       for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
       {
         bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
@@ -629,14 +627,13 @@ namespace DQIntern
   {
   public:
     Quantizer() {}
-
-    void  dequantBlock  ( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff   ) const;
-    void  initQuantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda  );
-
-    inline void   preQuantCoeff(const TCoeff absCoeff, PQData *pqData) const;
+    void  dequantBlock         ( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef ) const;
+    void  initQuantBlock       ( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue );
+    inline void   preQuantCoeff( const TCoeff absCoeff, PQData *pqData, int quanCoeff ) const;
     inline TCoeff getLastThreshold() const { return m_thresLast; }
     inline TCoeff getSSbbThreshold() const { return m_thresSSbb; }
 
+    inline int64_t getQScale()       const { return m_QScale; }
   private:
     // quantization
     int               m_QShift;
@@ -666,15 +663,11 @@ namespace DQIntern
     }
     return y;
   }
-
-  void Quantizer::initQuantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda )
+  void Quantizer::initQuantBlock(const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue = -1)
   {
-#if HEVC_USE_SCALING_LISTS
-    CHECK ( tu.cs->sps->getScalingListFlag(), "Scaling lists not supported" );
-#endif
     CHECKD( lambda <= 0.0, "Lambda must be greater than 0" );
 
-    const int         qpDQ                  = cQP.Qp + 1;
+    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1;
     const int         qpPer                 = qpDQ / 6;
     const int         qpRem                 = qpDQ - 6 * qpPer;
     const SPS&        sps                   = *tu.cs->sps;
@@ -683,33 +676,22 @@ namespace DQIntern
     const int         channelBitDepth       = sps.getBitDepth( chType );
     const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange( chType );
     const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
-    const bool        clipTransformShift    = ( tu.mtsIdx==1 && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() );
-    const int         transformShift        = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift );
-
+    const bool        clipTransformShift    = ( tu.mtsIdx[compID] == MTS_SKIP && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag());
+    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
+    const int         transformShift        = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ) + (needsSqrt2ScaleAdjustment?-1:0);
     // quant parameters
     m_QShift                    = QUANT_SHIFT  - 1 + qpPer + transformShift;
     m_QAdd                      = -( ( 3 << m_QShift ) >> 1 );
-#if HM_QTBT_AS_IN_JEM_QUANT
-    Intermediate_Int  invShift  = IQUANT_SHIFT + 1 - qpPer - transformShift + ( TU::needsBlockSizeTrafoScale( tu, compID ) ? ADJ_DEQUANT_SHIFT : 0 );
-    m_QScale                    = ( TU::needsSqrt2Scale( tu, compID ) ? ( g_quantScales[ qpRem ] * 181 ) >> 7 : g_quantScales[ qpRem ] );
-#else
     Intermediate_Int  invShift  = IQUANT_SHIFT + 1 - qpPer - transformShift;
-    m_QScale                    = g_quantScales   [ qpRem ];
-#endif
+    m_QScale                    = g_quantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
     const unsigned    qIdxBD    = std::min<unsigned>( maxLog2TrDynamicRange + 1, 8*sizeof(Intermediate_Int) + invShift - IQUANT_SHIFT - 1 );
     m_maxQIdx                   = ( 1 << (qIdxBD-1) ) - 4;
-    m_thresLast                 = TCoeff( ( int64_t(3) << m_QShift ) / ( 4 * m_QScale ) );
-    m_thresSSbb                 = TCoeff( ( int64_t(3) << m_QShift ) / ( 4 * m_QScale ) );
-
+    m_thresLast                 = TCoeff((int64_t(4) << m_QShift));
+    m_thresSSbb                 = TCoeff((int64_t(3) << m_QShift));
     // distortion calculation parameters
-    const int64_t qScale        = g_quantScales[ qpRem ];
-#if HM_QTBT_AS_IN_JEM_QUANT
+    const int64_t qScale        = (gValue==-1) ? m_QScale : gValue;
     const int nomDShift =
-      SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift;
-#else
-    const int nomDShift = SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth))
-                          + m_QShift + (TU::needsQP3Offset(tu, compID) ? 1 : 0);
-#endif
+      SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift + (needsSqrt2ScaleAdjustment ? 1 : 0);
     const double  qScale2       = double( qScale * qScale );
     const double  nomDistFactor = ( nomDShift < 0 ? 1.0/(double(int64_t(1)<<(-nomDShift))*qScale2*lambda) : double(int64_t(1)<<nomDShift)/(qScale2*lambda) );
     const int64_t pow2dfShift   = (int64_t)( nomDistFactor * qScale2 ) + 1;
@@ -720,23 +702,16 @@ namespace DQIntern
     m_DistOrgFact               = (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+1       )) + .5 );
   }
 
-  void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff ) const
+  void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef) const
   {
-#if HEVC_USE_SCALING_LISTS
-    CHECK ( tu.cs->sps->getScalingListFlag(), "Scaling lists not supported" );
-#endif
 
     //----- set basic parameters -----
     const CompArea&     area      = tu.blocks[ compID ];
     const int           numCoeff  = area.area();
     const SizeType      hsId      = gp_sizeIdxInfo->idxFrom( area.width  );
     const SizeType      vsId      = gp_sizeIdxInfo->idxFrom( area.height );
-#if HEVC_USE_MDCS
-    const CoeffScanType scanType  = CoeffScanType( TU::getCoefScanIdx( tu, compID ) );
-#else
     const CoeffScanType scanType  = SCAN_DIAG;
-#endif
-    const ScanElement *scan = g_scanOrder[toChannelType(compID)][SCAN_GROUPED_4x4][scanType][hsId][vsId];
+    const ScanElement *scan       = g_scanOrder[SCAN_GROUPED_4x4][scanType][hsId][vsId];
     const TCoeff*       qCoeff    = tu.getCoeffs( compID ).buf;
           TCoeff*       tCoeff    = recCoeff.buf;
 
@@ -757,7 +732,7 @@ namespace DQIntern
     }
 
     //----- set dequant parameters -----
-    const int         qpDQ                  = cQP.Qp + 1;
+    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1;
     const int         qpPer                 = qpDQ / 6;
     const int         qpRem                 = qpDQ - 6 * qpPer;
     const SPS&        sps                   = *tu.cs->sps;
@@ -767,22 +742,12 @@ namespace DQIntern
     const TCoeff      minTCoeff             = -( 1 << maxLog2TrDynamicRange );
     const TCoeff      maxTCoeff             =  ( 1 << maxLog2TrDynamicRange ) - 1;
     const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
-    const bool        clipTransformShift    = ( tu.mtsIdx==1 && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() );
-    const int         transformShift        = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift );
-#if HM_QTBT_AS_IN_JEM_QUANT
-    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift + ( TU::needsBlockSizeTrafoScale( tu, compID ) ? ADJ_DEQUANT_SHIFT : 0 );
-    Intermediate_Int  invQScale             = g_invQuantScales[ qpRem ] * ( TU::needsSqrt2Scale( tu, compID ) ? 181 : 1 );
-#else
-    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift;
-    Intermediate_Int  invQScale             = g_invQuantScales[ qpRem ];
-#endif
-    if( shift < 0 )
-    {
-      invQScale <<= -shift;
-      shift       = 0;
-    }
-    Intermediate_Int  add       = ( 1 << shift ) >> 1;
-
+    const bool        clipTransformShift    = ( tu.mtsIdx[compID] == MTS_SKIP && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag());
+    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
+    const int         transformShift        = ( clipTransformShift ? std::max<int>( 0, nomTransformShift ) : nomTransformShift ) + (needsSqrt2ScaleAdjustment?-1:0);
+    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
+    Intermediate_Int  invQScale             = g_invQuantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
+    Intermediate_Int  add = (shift < 0) ? 0 : ((1 << shift) >> 1);
     //----- dequant coefficients -----
     for( int state = 0, scanIdx = lastScanIdx; scanIdx >= 0; scanIdx-- )
     {
@@ -790,17 +755,23 @@ namespace DQIntern
       const TCoeff&   level     = qCoeff[ rasterPos ];
       if( level )
       {
+        if (enableScalingLists)
+          invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale
+        if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx))
+        {
+          invQScale <<= -shift;
+        }
         Intermediate_Int  qIdx      = ( level << 1 ) + ( level > 0 ? -(state>>1) : (state>>1) );
-        Intermediate_Int  nomTCoeff = ( qIdx * invQScale + add ) >> shift;
-        tCoeff[ rasterPos ]         = (TCoeff)Clip3<Intermediate_Int>( minTCoeff, maxTCoeff, nomTCoeff );
+        int64_t  nomTCoeff          = ((int64_t)qIdx * (int64_t)invQScale + add) >> ((shift < 0) ? 0 : shift);
+        tCoeff[rasterPos]           = (TCoeff)Clip3<int64_t>(minTCoeff, maxTCoeff, nomTCoeff);
       }
       state = ( 32040 >> ((state<<2)+((level&1)<<1)) ) & 3;   // the 16-bit value "32040" represent the state transition table
     }
   }
 
-  inline void Quantizer::preQuantCoeff(const TCoeff absCoeff, PQData *pqData) const
+  inline void Quantizer::preQuantCoeff(const TCoeff absCoeff, PQData *pqData, int quanCoeff) const
   {
-    int64_t scaledOrg = int64_t( absCoeff ) * m_QScale;
+    int64_t scaledOrg = int64_t( absCoeff ) * quanCoeff;
     TCoeff  qIdx      = std::max<TCoeff>( 1, std::min<TCoeff>( m_maxQIdx, TCoeff( ( scaledOrg + m_QAdd ) >> m_QShift ) ) );
     int64_t scaledAdd = qIdx * m_DistStepAdd - scaledOrg * m_DistOrgFact;
     PQData& pq_a      = pqData[ qIdx & 3 ];
@@ -875,10 +846,10 @@ namespace DQIntern
 #define RICEMAX 32
   const int32_t g_goRiceBits[4][RICEMAX] =
   {
-      { 32768,	65536,	98304,	131072,	163840,	196608,	262144,	262144,	327680,	327680,	327680,	327680,	393216,	393216,	393216,	393216,	393216,	393216,	393216,	393216,	458752,	458752,	458752,	458752,	458752,	458752,	458752,	458752,	458752,	458752,	458752,	458752},
-      { 65536,	65536,	98304,	98304,	131072,	131072,	163840,	163840,	196608,	196608,	229376,	229376,	294912,	294912,	294912,	294912,	360448,	360448,	360448,	360448,	360448,	360448,	360448,	360448,	425984,	425984,	425984,	425984,	425984,	425984,	425984,	425984},
-      { 98304,	98304,	98304,	98304,	131072,	131072,	131072,	131072,	163840,	163840,	163840,	163840,	196608,	196608,	196608,	196608,	229376,	229376,	229376,	229376,	262144,	262144,	262144,	262144,	327680,	327680,	327680,	327680,	327680,	327680,	327680,	327680},
-      { 131072,	131072,	131072,	131072,	131072,	131072,	131072,	131072,	163840,	163840,	163840,	163840,	163840,	163840,	163840,	163840,	196608,	196608,	196608,	196608,	196608,	196608,	196608,	196608,	229376,	229376,	229376,	229376,	229376,	229376,	229376,	229376}
+    { 32768,  65536,  98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
+    { 65536,  65536,  98304,  98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
+    { 98304,  98304,  98304,  98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
+    {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}
   };
 
   class State
@@ -903,77 +874,76 @@ namespace DQIntern
       m_goRicePar     = 0;
       m_goRiceZero    = 0;
     }
-
-    void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB) const
+    void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB ) const
     {
       const int32_t*  goRiceTab = g_goRiceBits[m_goRicePar];
       int64_t         rdCostA   = m_rdCost + pqDataA.deltaDist;
       int64_t         rdCostB   = m_rdCost + pqDataB.deltaDist;
       int64_t         rdCostZ   = m_rdCost;
-      if( m_remRegBins >= 4 )
-      {
-        if( pqDataA.absLevel < 4 )
-          rdCostA += m_coeffFracBits.bits[pqDataA.absLevel];
-        else
+        if( m_remRegBins >= 4 )
         {
-          const unsigned value = (pqDataA.absLevel - 4) >> 1;
-          rdCostA += m_coeffFracBits.bits[pqDataA.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1];
+          if( pqDataA.absLevel < 4 )
+            rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel ];
+          else
+          {
+            const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
+            rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ];
+          }
+          if( pqDataB.absLevel < 4 )
+            rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel ];
+          else
+          {
+            const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
+            rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ];
+          }
+          if( spt == SCAN_ISCSBB )
+          {
+            rdCostA += m_sigFracBits.intBits[ 1 ];
+            rdCostB += m_sigFracBits.intBits[ 1 ];
+            rdCostZ += m_sigFracBits.intBits[ 0 ];
+          }
+          else if( spt == SCAN_SOCSBB )
+          {
+            rdCostA += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ];
+            rdCostB += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ];
+            rdCostZ += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 0 ];
+          }
+          else if( m_numSigSbb )
+          {
+            rdCostA += m_sigFracBits.intBits[ 1 ];
+            rdCostB += m_sigFracBits.intBits[ 1 ];
+            rdCostZ += m_sigFracBits.intBits[ 0 ];
+          }
+          else
+          {
+            rdCostZ = decisionA.rdCost;
+          }
         }
-        if( pqDataB.absLevel < 4 )
-          rdCostB += m_coeffFracBits.bits[pqDataB.absLevel];
         else
         {
-          const unsigned value = (pqDataB.absLevel - 4) >> 1;
-          rdCostB += m_coeffFracBits.bits[pqDataB.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1];
+          rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : ( pqDataA.absLevel < RICEMAX ? pqDataA.absLevel : RICEMAX - 1 ) ];
+          rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : ( pqDataB.absLevel < RICEMAX ? pqDataB.absLevel : RICEMAX - 1 ) ];
+          rdCostZ += goRiceTab[ m_goRiceZero ];
         }
-        if( spt == SCAN_ISCSBB )
+        if( rdCostA < decisionA.rdCost )
         {
-          rdCostA += m_sigFracBits.intBits[1];
-          rdCostB += m_sigFracBits.intBits[1];
-          rdCostZ += m_sigFracBits.intBits[0];
+          decisionA.rdCost = rdCostA;
+          decisionA.absLevel = pqDataA.absLevel;
+          decisionA.prevId = m_stateId;
         }
-        else if( spt == SCAN_SOCSBB )
+        if( rdCostZ < decisionA.rdCost )
         {
-          rdCostA += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1];
-          rdCostB += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1];
-          rdCostZ += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[0];
+          decisionA.rdCost = rdCostZ;
+          decisionA.absLevel = 0;
+          decisionA.prevId = m_stateId;
         }
-        else if( m_numSigSbb )
-        {
-          rdCostA += m_sigFracBits.intBits[1];
-          rdCostB += m_sigFracBits.intBits[1];
-          rdCostZ += m_sigFracBits.intBits[0];
-        }
-        else
+        if( rdCostB < decisionB.rdCost )
         {
-          rdCostZ = decisionA.rdCost;
+          decisionB.rdCost = rdCostB;
+          decisionB.absLevel = pqDataB.absLevel;
+          decisionB.prevId = m_stateId;
         }
       }
-      else
-      {
-        rdCostA += (1 << SCALE_BITS) + goRiceTab[pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : (pqDataA.absLevel<RICEMAX ? pqDataA.absLevel : RICEMAX-1)];
-        rdCostB += (1 << SCALE_BITS) + goRiceTab[pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : (pqDataB.absLevel<RICEMAX ? pqDataB.absLevel : RICEMAX-1)];
-        rdCostZ += goRiceTab[m_goRiceZero];
-      }
-      if( rdCostA < decisionA.rdCost )
-      {
-        decisionA.rdCost   = rdCostA;
-        decisionA.absLevel = pqDataA.absLevel;
-        decisionA.prevId   = m_stateId;
-      }
-      if( rdCostZ < decisionA.rdCost )
-      {
-        decisionA.rdCost   = rdCostZ;
-        decisionA.absLevel = 0;
-        decisionA.prevId   = m_stateId;
-      }
-      if( rdCostB < decisionB.rdCost )
-      {
-        decisionB.rdCost   = rdCostB;
-        decisionB.absLevel = pqDataB.absLevel;
-        decisionB.prevId   = m_stateId;
-      }
-    }
 
     inline void checkRdCostStart(int32_t lastOffset, const PQData &pqData, Decision &decision) const
     {
@@ -1018,7 +988,7 @@ namespace DQIntern
     int64_t                   m_rdCost;
     uint16_t                  m_absLevelsAndCtxInit[24];  // 16x8bit for abs levels + 16x16bit for ctx init id
     int8_t                    m_numSigSbb;
-    int8_t                    m_remRegBins;
+    int                       m_remRegBins;
     int8_t                    m_refSbbCtxId;
     BinFracBits               m_sbbFracBits;
     BinFracBits               m_sigFracBits;
@@ -1028,8 +998,10 @@ namespace DQIntern
     const int8_t              m_stateId;
     const BinFracBits*const   m_sigFracBitsArray;
     const CoeffFracBits*const m_gtxFracBitsArray;
-    const uint32_t*const      m_goRiceZeroArray;
     CommonCtx&                m_commonCtx;
+  public:
+    unsigned                  effWidth;
+    unsigned                  effHeight;
   };
 
 
@@ -1038,7 +1010,6 @@ namespace DQIntern
     , m_stateId         ( stateId )
     , m_sigFracBitsArray( rateEst.sigFlagBits(stateId) )
     , m_gtxFracBitsArray( rateEst.gtxFracBits(stateId) )
-    , m_goRiceZeroArray ( g_auiGoRicePosCoeff0[std::max(0,stateId-1)] )
     , m_commonCtx       ( commonCtx )
   {
   }
@@ -1059,11 +1030,6 @@ namespace DQIntern
         m_goRicePar             = prvState->m_goRicePar;
         if( m_remRegBins >= 4 )
         {
-          TCoeff rem = (decision.absLevel - 4) >> 1;
-          if( m_goRicePar < 3 && rem > (3<<m_goRicePar)-1 )
-          {
-            m_goRicePar++;
-          }
           m_remRegBins -= (decision.absLevel < 2 ? decision.absLevel : 3);
         }
         ::memcpy( m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48*sizeof(uint8_t) );
@@ -1072,15 +1038,8 @@ namespace DQIntern
       {
         m_numSigSbb     =  1;
         m_refSbbCtxId   = -1;
-        if ( scanInfo.sbbSize == 4 )
-        {
-          m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - (decision.absLevel < 2 ? decision.absLevel : 3);
-        }
-        else
-        {
-          m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - (decision.absLevel < 2 ? decision.absLevel : 3);
-        }
-        m_goRicePar     = ( ((decision.absLevel - 4) >> 1) > (3<<0)-1 ? 1 : 0 );
+        int ctxBinSampleRatio = (scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
+        m_remRegBins = (effWidth * effHeight *ctxBinSampleRatio) / 16 - (decision.absLevel < 2 ? decision.absLevel : 3);
         ::memset( m_absLevelsAndCtxInit, 0, 48*sizeof(uint8_t) );
       }
 
@@ -1125,8 +1084,44 @@ namespace DQIntern
         }
 #undef UPDATE
         TCoeff sumGt1 = sumAbs1 - sumNum;
-        m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + (sumAbs1 < 5 ? sumAbs1 : 5)];
+        m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 )];
         m_coeffFracBits = m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)];
+
+        TCoeff  sumAbs = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8;
+#define UPDATE(k) {TCoeff t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; }
+        if (numIPos == 1)
+        {
+          UPDATE(0);
+        }
+        else if (numIPos == 2)
+        {
+          UPDATE(0);
+          UPDATE(1);
+        }
+        else if (numIPos == 3)
+        {
+          UPDATE(0);
+          UPDATE(1);
+          UPDATE(2);
+        }
+        else if (numIPos == 4)
+        {
+          UPDATE(0);
+          UPDATE(1);
+          UPDATE(2);
+          UPDATE(3);
+        }
+        else if (numIPos == 5)
+        {
+          UPDATE(0);
+          UPDATE(1);
+          UPDATE(2);
+          UPDATE(3);
+          UPDATE(4);
+        }
+#undef UPDATE
+        int sumAll = std::max(std::min(31, (int)sumAbs - 4 * 5), 0);
+        m_goRicePar = g_auiGoRiceParsCoeff[sumAll];
       }
       else
       {
@@ -1165,7 +1160,7 @@ namespace DQIntern
 #undef UPDATE
         sumAbs = std::min<TCoeff>(31, sumAbs);
         m_goRicePar = g_auiGoRiceParsCoeff[sumAbs];
-        m_goRiceZero = m_goRiceZeroArray[sumAbs];
+        m_goRiceZero = g_auiGoRicePosCoeff0(m_stateId, m_goRicePar);
       }
     }
   }
@@ -1203,7 +1198,7 @@ namespace DQIntern
       TCoeff  sumNum  =   tinit        & 7;
       TCoeff  sumAbs1 = ( tinit >> 3 ) & 31;
       TCoeff  sumGt1  = sumAbs1        - sumNum;
-      m_sigFracBits   = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + ( sumAbs1 < 5 ? sumAbs1 : 5 ) ];
+      m_sigFracBits   = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 ) ];
       m_coeffFracBits = m_gtxFracBitsArray[ scanInfo.gtxCtxOffsetNext + ( sumGt1  < 4 ? sumGt1  : 4 ) ];
     }
   }
@@ -1228,13 +1223,14 @@ namespace DQIntern
 
     const int       sigNSbb   = ( ( scanInfo.nextSbbRight ? sbbFlags[ scanInfo.nextSbbRight ] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[ scanInfo.nextSbbBelow ] : false ) ? 1 : 0 );
     currState.m_numSigSbb     = 0;
-    if (scanInfo.sbbSize == 4)
+    if (prevState)
     {
-      currState.m_remRegBins  = MAX_NUM_REG_BINS_2x2SUBBLOCK;
+      currState.m_remRegBins = prevState->m_remRegBins;
     }
     else
     {
-      currState.m_remRegBins  = MAX_NUM_REG_BINS_4x4SUBBLOCK;
+      int ctxBinSampleRatio = (scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
+      currState.m_remRegBins = (currState.effWidth * currState.effHeight *ctxBinSampleRatio) / 16;
     }
     currState.m_goRicePar     = 0;
     currState.m_refSbbCtxId   = currState.m_stateId;
@@ -1291,12 +1287,12 @@ namespace DQIntern
   public:
     DepQuant();
 
-    void    quant   ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum );
-    void    dequant ( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP )  const;
+    void    quant   ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff );
+    void    dequant ( const TransformUnit& tu, CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* quantCoeff );
 
   private:
-    void    xDecideAndUpdate  ( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut );
-    void    xDecide           ( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut );
+    void    xDecideAndUpdate  ( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, int quantCoeff);
+    void    xDecide           ( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut, int quantCoeff );
 
   private:
     CommonCtx   m_commonCtx;
@@ -1323,9 +1319,9 @@ namespace DQIntern
 #undef TINIT
 
 
-  void DepQuant::dequant( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP ) const
+  void DepQuant::dequant( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* piDequantCoef )
   {
-    m_quant.dequantBlock( tu, compID, cQP, recCoeff );
+    m_quant.dequantBlock( tu, compID, cQP, recCoeff, enableScalingLists, piDequantCoef );
   }
 
 
@@ -1334,7 +1330,7 @@ namespace DQIntern
 #undef  DINIT
 
 
-  void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut)
+  void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut, int quanCoeff)
   {
     ::memcpy( decisions, startDec, 8*sizeof(Decision) );
 
@@ -1351,29 +1347,30 @@ namespace DQIntern
     }
 
     PQData  pqData[4];
-    m_quant.preQuantCoeff( absCoeff, pqData );
+    m_quant.preQuantCoeff( absCoeff, pqData, quanCoeff );
     m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2]);
     m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0]);
     m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3]);
     m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]);
     if( spt==SCAN_EOCSBB )
     {
-      m_skipStates[0].checkRdCostSkipSbb( decisions[0] );
-      m_skipStates[1].checkRdCostSkipSbb( decisions[1] );
-      m_skipStates[2].checkRdCostSkipSbb( decisions[2] );
-      m_skipStates[3].checkRdCostSkipSbb( decisions[3] );
+        m_skipStates[0].checkRdCostSkipSbb( decisions[0] );
+        m_skipStates[1].checkRdCostSkipSbb( decisions[1] );
+        m_skipStates[2].checkRdCostSkipSbb( decisions[2] );
+        m_skipStates[3].checkRdCostSkipSbb( decisions[3] );
     }
+
     m_startState.checkRdCostStart( lastOffset, pqData[0], decisions[0] );
     m_startState.checkRdCostStart( lastOffset, pqData[2], decisions[2] );
   }
 
-  void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut )
+  void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, int quantCoeff )
   {
     Decision* decisions = m_trellis[ scanInfo.scanIdx ];
 
     std::swap( m_prevStates, m_currStates );
 
-    xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions, zeroOut );
+    xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions, zeroOut, quantCoeff );
 
     if( scanInfo.scanIdx )
     {
@@ -1436,7 +1433,7 @@ namespace DQIntern
   }
 
 
-  void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum )
+  void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff )
   {
     CHECKD( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(), "ext precision is not supported" );
 
@@ -1449,12 +1446,43 @@ namespace DQIntern
     ::memset( tu.getCoeffs( compID ).buf, 0x00, numCoeff*sizeof(TCoeff) );
     absSum          = 0;
 
+    const CompArea& area     = tu.blocks[ compID ];
+    const uint32_t  width    = area.width;
+    const uint32_t  height   = area.height;
+    const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
+    //===== scaling matrix ====
+    //const int         qpDQ = cQP.Qp + 1;
+    //const int         qpPer = qpDQ / 6;
+    //const int         qpRem = qpDQ - 6 * qpPer;
+
+    //TCoeff thresTmp = thres;
+    bool zeroOut = false;
+    bool zeroOutforThres = false;
+    int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
+    if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32)) && compID == COMPONENT_Y)
+    {
+      effHeight = (tuPars.m_height == 32) ? 16 : tuPars.m_height;
+      effWidth = (tuPars.m_width == 32) ? 16 : tuPars.m_width;
+      zeroOut = (effHeight < tuPars.m_height || effWidth < tuPars.m_width);
+    }
+    zeroOutforThres = zeroOut || (32 < tuPars.m_height || 32 < tuPars.m_width);
     //===== find first test position =====
-    int   firstTestPos = numCoeff - 1;
+    int firstTestPos = numCoeff - 1;
+    if (lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4)
+    {
+      firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) )  ? 7 : 15 ;
+    }
+    const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale();
     const TCoeff thres = m_quant.getLastThreshold();
     for( ; firstTestPos >= 0; firstTestPos-- )
     {
-      if (abs(tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]) > thres)
+      if (zeroOutforThres && (tuPars.m_scanId2BlkPos[firstTestPos].x >= ((tuPars.m_width == 32 && zeroOut) ? 16 : 32)
+                           || tuPars.m_scanId2BlkPos[firstTestPos].y >= ((tuPars.m_height == 32 && zeroOut) ? 16 : 32)))
+        continue;
+      TCoeff thresTmp = (enableScalingLists) ? TCoeff(thres / (4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]))
+                                             : TCoeff(thres / (4 * defaultQuantisationCoefficient));
+
+      if (abs(tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]) > thresTmp)
       {
         break;
       }
@@ -1473,20 +1501,28 @@ namespace DQIntern
     }
     m_startState.init();
 
-    int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
-    bool zeroOut = false;
-    if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+
+    int effectWidth = std::min(32, effWidth);
+    int effectHeight = std::min(32, effHeight);
+    for (int k = 0; k < 12; k++)
     {
-      effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height;
-      effWidth = ( tuPars.m_width == 32 ) ? 16 : tuPars.m_width;
-      zeroOut  = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width );
+      m_allStates[k].effWidth = effectWidth;
+      m_allStates[k].effHeight = effectHeight;
     }
+    m_startState.effWidth = effectWidth;
+    m_startState.effHeight = effectHeight;
 
     //===== populate trellis =====
     for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
     {
       const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
-      xDecideAndUpdate( abs( tCoeff[ scanInfo.rasterPos ] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ) );
+      if (enableScalingLists)
+      {
+        m_quant.initQuantBlock(tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos]);
+        xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), quantCoeff[scanInfo.rasterPos] );
+      }
+      else
+        xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), defaultQuantisationCoefficient );
     }
 
     //===== find best path =====
@@ -1537,9 +1573,22 @@ DepQuant::~DepQuant()
 
 void DepQuant::quant( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx )
 {
-  if( tu.cs->slice->getDepQuantEnabledFlag() )
+  if ( tu.cs->picHeader->getDepQuantEnabledFlag() && (tu.mtsIdx[compID] != MTS_SKIP) )
   {
-    static_cast<DQIntern::DepQuant*>(p)->quant( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum );
+    //===== scaling matrix ====
+    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1;
+    const int         qpPer           = qpDQ / 6;
+    const int         qpRem           = qpDQ - 6 * qpPer;
+    const CompArea    &rect           = tu.blocks[compID];
+    const int         width           = rect.width;
+    const int         height          = rect.height;
+    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
+    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
+    const uint32_t    log2TrWidth     = floorLog2(width);
+    const uint32_t    log2TrHeight    = floorLog2(height);
+    const bool        disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+    const bool        enableScalingLists = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), tu.cu->lfnstIdx > 0, disableSMForLFNST);
+    static_cast<DQIntern::DepQuant*>(p)->quant( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum, enableScalingLists, Quant::getQuantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
   }
   else
   {
@@ -1549,9 +1598,21 @@ void DepQuant::quant( TransformUnit &tu, const ComponentID &compID, const CCoeff
 
 void DepQuant::dequant( const TransformUnit &tu, CoeffBuf &dstCoeff, const ComponentID &compID, const QpParam &cQP )
 {
-  if( tu.cs->slice->getDepQuantEnabledFlag() )
+  if( tu.cs->picHeader->getDepQuantEnabledFlag() && (tu.mtsIdx[compID] != MTS_SKIP))
   {
-    static_cast<DQIntern::DepQuant*>(p)->dequant( tu, dstCoeff, compID, cQP );
+    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID] == MTS_SKIP) + 1;
+    const int         qpPer           = qpDQ / 6;
+    const int         qpRem           = qpDQ - 6 * qpPer;
+    const CompArea    &rect           = tu.blocks[compID];
+    const int         width           = rect.width;
+    const int         height          = rect.height;
+    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
+    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
+    const uint32_t    log2TrWidth  = floorLog2(width);
+    const uint32_t    log2TrHeight = floorLog2(height);
+    const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+    const bool enableScalingLists = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), tu.cu->lfnstIdx > 0, disableSMForLFNST);
+    static_cast<DQIntern::DepQuant*>(p)->dequant( tu, dstCoeff, compID, cQP, enableScalingLists, Quant::getDequantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
   }
   else
   {
diff --git a/source/Lib/CommonLib/DepQuant.h b/source/Lib/CommonLib/DepQuant.h
index 5a26b46da88ee3a424d62cd500ecd91325fe36d6..eb2685a2e02adfb9277abb74d81e36b0a77f0fed 100644
--- a/source/Lib/CommonLib/DepQuant.h
+++ b/source/Lib/CommonLib/DepQuant.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/HRD.cpp b/source/Lib/CommonLib/HRD.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e21e5178d58f02b2c26c5defc5052423483e79e2
--- /dev/null
+++ b/source/Lib/CommonLib/HRD.cpp
@@ -0,0 +1,34 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
diff --git a/source/Lib/CommonLib/HRD.h b/source/Lib/CommonLib/HRD.h
new file mode 100644
index 0000000000000000000000000000000000000000..b236a10b74d690b6f1a7f9bd897328f1c6055fee
--- /dev/null
+++ b/source/Lib/CommonLib/HRD.h
@@ -0,0 +1,189 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef __HRD__
+#define __HRD__
+
+#include "Common.h"
+#include "SEI.h"
+
+class TimingInfo
+{
+protected:
+  bool     m_timingInfoPresentFlag;
+  uint32_t m_numUnitsInTick;
+  uint32_t m_timeScale;
+  int      m_numTicksPocDiffOneMinus1;
+
+public:
+  TimingInfo()
+    : m_timingInfoPresentFlag      (false)
+    , m_numUnitsInTick             (1001)
+    , m_timeScale                  (60000)
+    , m_numTicksPocDiffOneMinus1   (0)
+  {}
+
+  void     setTimingInfoPresentFlag( bool flag )   { m_timingInfoPresentFlag = flag;       }
+  bool     getTimingInfoPresentFlag( ) const       { return m_timingInfoPresentFlag;       }
+
+  void     setNumUnitsInTick( uint32_t value )     { m_numUnitsInTick = value;             }
+  uint32_t getNumUnitsInTick( ) const              { return m_numUnitsInTick;              }
+  void     setTimeScale( uint32_t value )          { m_timeScale = value;                  }
+  uint32_t getTimeScale( ) const                   { return m_timeScale;                   }
+
+  void     setNumTicksPocDiffOneMinus1(int x)      { m_numTicksPocDiffOneMinus1 = x;       }
+  int      getNumTicksPocDiffOneMinus1( ) const    { return m_numTicksPocDiffOneMinus1;    }
+};
+
+struct HrdSubLayerInfo
+{
+  bool     fixedPicRateFlag;
+  bool     fixedPicRateWithinCvsFlag;
+  uint32_t picDurationInTcMinus1;
+  bool     lowDelayHrdFlag;
+  uint32_t cpbCntMinus1;
+  uint32_t bitRateValueMinus1[MAX_CPB_CNT][2];
+  uint32_t cpbSizeValue      [MAX_CPB_CNT][2];
+  uint32_t ducpbSizeValue    [MAX_CPB_CNT][2];
+  bool     cbrFlag           [MAX_CPB_CNT][2];
+  uint32_t duBitRateValue    [MAX_CPB_CNT][2];
+};
+
+class HRDParameters
+{
+private:
+  bool     m_nalHrdParametersPresentFlag;
+  bool     m_vclHrdParametersPresentFlag;
+  uint32_t m_tickDivisorMinus2;
+  bool     m_generalDecodingUnitHrdParamsPresentFlag;
+  uint32_t m_bitRateScale;
+  uint32_t m_cpbSizeScale;
+  uint32_t m_cpbSizeDuScale;
+  HrdSubLayerInfo m_HRD[MAX_TLAYER];
+
+public:
+  HRDParameters()
+    :m_nalHrdParametersPresentFlag       (false)
+    ,m_vclHrdParametersPresentFlag       (false)
+    ,m_tickDivisorMinus2                 (0)
+    ,m_generalDecodingUnitHrdParamsPresentFlag  (false)
+    ,m_bitRateScale                      (0)
+    ,m_cpbSizeScale                      (0)
+    ,m_cpbSizeDuScale                    (0)
+  {}
+
+  virtual ~HRDParameters() {}
+
+  void      setNalHrdParametersPresentFlag( bool flag )                                { m_nalHrdParametersPresentFlag = flag;                      }
+  bool      getNalHrdParametersPresentFlag( ) const                                    { return m_nalHrdParametersPresentFlag;                      }
+
+  void      setVclHrdParametersPresentFlag( bool flag )                                { m_vclHrdParametersPresentFlag = flag;                      }
+  bool      getVclHrdParametersPresentFlag( ) const                                    { return m_vclHrdParametersPresentFlag;                      }
+
+
+  void      setTickDivisorMinus2( uint32_t value )                                     { m_tickDivisorMinus2 = value;                               }
+  uint32_t  getTickDivisorMinus2( ) const                                              { return m_tickDivisorMinus2;                                }
+
+
+  void      setGeneralDecodingUnitHrdParamsPresentFlag( bool flag)                     { m_generalDecodingUnitHrdParamsPresentFlag = flag;                 }
+  bool      getGeneralDecodingUnitHrdParamsPresentFlag( ) const                        { return m_generalDecodingUnitHrdParamsPresentFlag;                 }
+
+  void      setBitRateScale( uint32_t value )                                          { m_bitRateScale = value;                                    }
+  uint32_t  getBitRateScale( ) const                                                   { return m_bitRateScale;                                     }
+
+  void      setCpbSizeScale( uint32_t value )                                          { m_cpbSizeScale = value;                                    }
+  uint32_t  getCpbSizeScale( ) const                                                   { return m_cpbSizeScale;                                     }
+  void      setCpbSizeDuScale( uint32_t value )                                        { m_cpbSizeDuScale = value;                                  }
+  uint32_t  getCpbSizeDuScale( ) const                                                 { return m_cpbSizeDuScale;                                   }
+
+
+  void      setFixedPicRateFlag( int layer, bool flag )                                { m_HRD[layer].fixedPicRateFlag = flag;                      }
+  bool      getFixedPicRateFlag( int layer ) const                                     { return m_HRD[layer].fixedPicRateFlag;                      }
+
+  void      setFixedPicRateWithinCvsFlag( int layer, bool flag )                       { m_HRD[layer].fixedPicRateWithinCvsFlag = flag;             }
+  bool      getFixedPicRateWithinCvsFlag( int layer ) const                            { return m_HRD[layer].fixedPicRateWithinCvsFlag;             }
+
+  void      setPicDurationInTcMinus1( int layer, uint32_t value )                      { m_HRD[layer].picDurationInTcMinus1 = value;                }
+  uint32_t  getPicDurationInTcMinus1( int layer ) const                                { return m_HRD[layer].picDurationInTcMinus1;                 }
+
+  void      setLowDelayHrdFlag( int layer, bool flag )                                 { m_HRD[layer].lowDelayHrdFlag = flag;                       }
+  bool      getLowDelayHrdFlag( int layer ) const                                      { return m_HRD[layer].lowDelayHrdFlag;                       }
+
+  void      setCpbCntMinus1( int layer, uint32_t value )                               { m_HRD[layer].cpbCntMinus1 = value;                         }
+  uint32_t  getCpbCntMinus1( int layer ) const                                         { return m_HRD[layer].cpbCntMinus1;                          }
+
+  void      setBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value )   { m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl] = value; }
+  uint32_t  getBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const             { return m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl];  }
+
+  void      setCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value )   { m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl] = value;       }
+  uint32_t  getCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const             { return m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl];        }
+  void      setDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl] = value;     }
+  uint32_t  getDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const           { return m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl];      }
+  void      setDuBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl] = value;     }
+  uint32_t  getDuBitRateValueMinus1(int layer, int cpbcnt, int nalOrVcl ) const            { return m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl];      }
+  void      setCbrFlag( int layer, int cpbcnt, int nalOrVcl, bool value )                  { m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl] = value;            }
+  bool      getCbrFlag( int layer, int cpbcnt, int nalOrVcl ) const                        { return m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl];             }
+
+  bool      getCpbDpbDelaysPresentFlag( ) const                                            { return getNalHrdParametersPresentFlag() || getVclHrdParametersPresentFlag(); }
+};
+
+class HRD
+{
+public:
+  HRD()
+  :m_bufferingPeriodInitialized (false)
+  {};
+
+  virtual ~HRD()
+  {};
+
+  void                 setHRDParameters(HRDParameters &hrdParam)    { m_hrdParams=hrdParam; }
+  HRDParameters        getHRDParameters() const                     { return m_hrdParams; }
+  const HRDParameters& getHRDParameters()                           { return m_hrdParams; }
+
+  void                 setTimingInfo(TimingInfo &timingInfo)        { m_timingInfo=timingInfo; }
+  TimingInfo           getTimingInfo() const                        { return m_timingInfo; }
+  const TimingInfo&    getTimingInfo()                              { return m_timingInfo; }
+
+  void                       setBufferingPeriodSEI(const SEIBufferingPeriod* bp)  { bp->copyTo(m_bufferingPeriodSEI); m_bufferingPeriodInitialized = true; }
+  const SEIBufferingPeriod*  getBufferingPeriodSEI() const                        { return m_bufferingPeriodInitialized ? &m_bufferingPeriodSEI : nullptr; }
+
+protected:
+  HRDParameters m_hrdParams;
+  TimingInfo    m_timingInfo;
+  bool               m_bufferingPeriodInitialized;
+  SEIBufferingPeriod m_bufferingPeriodSEI;
+};
+
+#endif //__HRD__
diff --git a/source/Lib/CommonLib/Hash.cpp b/source/Lib/CommonLib/Hash.cpp
index 2301f6845b148de9e435cb6f72cbd4dbcb0fddfc..5657f3cb3fd66b4aa877f0a03eba16ac9afd4a32 100644
--- a/source/Lib/CommonLib/Hash.cpp
+++ b/source/Lib/CommonLib/Hash.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -109,6 +109,10 @@ TComHash::TComHash()
 {
   m_lookupTable = NULL;
   tableHasContent = false;
+  for (int i = 0; i < 5; i++)
+  {
+    hashPic[i] = NULL;
+  }
 }
 
 TComHash::~TComHash()
@@ -120,12 +124,21 @@ TComHash::~TComHash()
     m_lookupTable = NULL;
   }
 }
-
-void TComHash::create()
+void TComHash::create(int picWidth, int picHeight)
 {
-  if (m_lookupTable != NULL)
+  if (m_lookupTable)
   {
     clearAll();
+  }
+  if (!hashPic[0])
+  {
+    for (int k = 0; k < 5; k++)
+    {
+      hashPic[k] = new uint16_t[picWidth*picHeight];
+    }
+  }
+  if (m_lookupTable)
+  {
     return;
   }
   int maxAddr = 1 << (m_CRCBits + m_blockSizeBits);
@@ -136,6 +149,14 @@ void TComHash::create()
 
 void TComHash::clearAll()
 {
+  if (hashPic[0])
+  {
+    for (int k = 0; k < 5; k++)
+    {
+      delete[] hashPic[k];
+      hashPic[k] = NULL;
+    }
+  }
   tableHasContent = false;
   if (m_lookupTable == NULL)
   {
@@ -251,83 +272,6 @@ void TComHash::generateBlock2x2HashValue(const PelUnitBuf &curPicBuf, int picWid
 
   delete[] p;
 }
-void TComHash::generateRectangleHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3])
-{
-  //at present, only support 1:2(2:1) retangle hash value
-  CHECK(width != (height << 1) && (width << 1) != height, "Wrong")
-  bool isHorizontal = width == (height << 1) ? true : false;
-
-  int xEnd = picWidth - width + 1;
-  int yEnd = picHeight - height + 1;
-
-  int srcWidth = width >> 1;
-  int quadWidth = width >> 2;
-  int srcHeight = height >> 1;
-  int quadHeight = height >> 2;
-
-  int length = 2 * sizeof(uint32_t);
-  uint32_t* p = new uint32_t[2];
-  int pos = 0;
-  if (isHorizontal)
-  {
-    for (int yPos = 0; yPos < yEnd; yPos++)
-    {
-      for (int xPos = 0; xPos < xEnd; xPos++)
-      {
-        p[0] = srcPicBlockHash[0][pos];
-        p[1] = srcPicBlockHash[0][pos + srcWidth];
-        dstPicBlockHash[0][pos] = TComHash::getCRCValue1((unsigned char*)p, length);
-
-        p[0] = srcPicBlockHash[1][pos];
-        p[1] = srcPicBlockHash[1][pos + srcWidth];
-        dstPicBlockHash[1][pos] = TComHash::getCRCValue2((unsigned char*)p, length);
-
-        dstPicBlockSameInfo[0][pos] = srcPicBlockSameInfo[0][pos] && srcPicBlockSameInfo[0][pos + quadWidth] && srcPicBlockSameInfo[0][pos + srcWidth];
-        dstPicBlockSameInfo[1][pos] = srcPicBlockSameInfo[1][pos] && srcPicBlockSameInfo[1][pos + srcWidth];
-        pos++;
-      }
-      pos += width - 1;
-    }
-  }
-  else
-  {
-    for (int yPos = 0; yPos < yEnd; yPos++)
-    {
-      for (int xPos = 0; xPos < xEnd; xPos++)
-      {
-        p[0] = srcPicBlockHash[0][pos];
-        p[1] = srcPicBlockHash[0][pos + srcHeight * picWidth];
-        dstPicBlockHash[0][pos] = TComHash::getCRCValue1((unsigned char*)p, length);
-
-        p[0] = srcPicBlockHash[1][pos];
-        p[1] = srcPicBlockHash[1][pos + srcHeight * picWidth];
-        dstPicBlockHash[1][pos] = TComHash::getCRCValue2((unsigned char*)p, length);
-
-        dstPicBlockSameInfo[0][pos] = srcPicBlockSameInfo[0][pos] && srcPicBlockSameInfo[0][pos + srcHeight * picWidth];
-        dstPicBlockSameInfo[1][pos] = srcPicBlockSameInfo[1][pos] && srcPicBlockSameInfo[1][pos + quadHeight * picWidth] && srcPicBlockSameInfo[1][pos + srcHeight * picWidth];
-
-        pos++;
-      }
-      pos += width - 1;
-    }
-  }
-
-  int widthMinus1 = width - 1;
-  int heightMinus1 = height - 1;
-  pos = 0;
-
-  for (int yPos = 0; yPos < yEnd; yPos++)
-  {
-    for (int xPos = 0; xPos < xEnd; xPos++)
-    {
-      dstPicBlockSameInfo[2][pos] = (!dstPicBlockSameInfo[0][pos] && !dstPicBlockSameInfo[1][pos]) || (((xPos & widthMinus1) == 0) && ((yPos & heightMinus1) == 0));
-      pos++;
-    }
-    pos += width - 1;
-  }
-
-  delete[] p;
-}
 
 void TComHash::generateBlockHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3])
 {
@@ -341,7 +285,7 @@ void TComHash::generateBlockHashValue(int picWidth, int picHeight, int width, in
 
   int length = 4 * sizeof(uint32_t);
 
-  uint32_t* p = new uint32_t[4];
+  uint32_t p[4];
   int pos = 0;
   for (int yPos = 0; yPos < yEnd; yPos++)
   {
@@ -372,23 +316,18 @@ void TComHash::generateBlockHashValue(int picWidth, int picHeight, int width, in
 
   if (width >= 4)
   {
-    int widthMinus1 = width - 1;
-    int heightMinus1 = height - 1;
     pos = 0;
 
     for (int yPos = 0; yPos < yEnd; yPos++)
     {
       for (int xPos = 0; xPos < xEnd; xPos++)
       {
-        dstPicBlockSameInfo[2][pos] = (!dstPicBlockSameInfo[0][pos] && !dstPicBlockSameInfo[1][pos]) || (((xPos & widthMinus1) == 0) && ((yPos & heightMinus1) == 0));
+        dstPicBlockSameInfo[2][pos] = (!dstPicBlockSameInfo[0][pos] && !dstPicBlockSameInfo[1][pos]);
         pos++;
       }
       pos += width - 1;
     }
   }
-
-  delete[] p;
-
 }
 
 void TComHash::addToHashMapByRowWithPrecalData(uint32_t* picHash[2], bool* picIsSame, int picWidth, int picHeight, int width, int height)
@@ -404,12 +343,14 @@ void TComHash::addToHashMapByRowWithPrecalData(uint32_t* picHash[2], bool* picIs
   addValue <<= m_CRCBits;
   int crcMask = 1 << m_CRCBits;
   crcMask -= 1;
+  int blockIdx = floorLog2(width) - 2;
 
   for (int xPos = 0; xPos < xEnd; xPos++)
   {
     for (int yPos = 0; yPos < yEnd; yPos++)
     {
       int pos = yPos * picWidth + xPos;
+      hashPic[blockIdx][pos] = (uint16_t)(srcHash[1][pos] & crcMask);
       //valid data
       if (srcIsAdded[pos])
       {
@@ -557,7 +498,36 @@ bool TComHash::isBlock2x2ColSameValue(unsigned char* p, bool includeAllComponent
 
   return true;
 }
+bool TComHash::isHorizontalPerfectLuma(const Pel* srcPel, int stride, int width, int height)
+{
+  for (int i = 0; i < height; i++)
+  {
+    for (int j = 1; j < width; j++)
+    {
+      if (srcPel[j] != srcPel[0])
+      {
+        return false;
+      }
+    }
+    srcPel += stride;
+  }
+  return true;
+}
 
+bool TComHash::isVerticalPerfectLuma(const Pel* srcPel, int stride, int width, int height)
+{
+  for (int i = 0; i < width; i++)
+  {
+    for (int j = 1; j < height; j++)
+    {
+      if (srcPel[j*stride + i] != srcPel[i])
+      {
+        return false;
+      }
+    }
+  }
+  return true;
+}
 bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int height, int xStart, int yStart, const BitDepths bitDepths, uint32_t& hashValue1, uint32_t& hashValue2)
 {
   int addValue = m_blockSizeToIndex[width][height];
@@ -575,7 +545,7 @@ bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int hei
   }
 
   unsigned char* p = new unsigned char[length];
-  uint32_t* toHash = new uint32_t[4];
+  uint32_t toHash[4];
 
   int block2x2Num = (width*height) >> 2;
 
@@ -682,8 +652,6 @@ bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int hei
   hashValue1 = (hashValueBuffer[0][dstIdx][0] & crcMask) + addValue;
   hashValue2 = hashValueBuffer[1][dstIdx][0];
 
-  delete[] toHash;
-
   for (int i = 0; i < 2; i++)
   {
     for (int j = 0; j < 2; j++)
@@ -712,8 +680,6 @@ void TComHash::initBlockSizeToIndex()
   m_blockSizeToIndex[32][32] = 2;
   m_blockSizeToIndex[64][64] = 3;
   m_blockSizeToIndex[4][4] = 4;
-  m_blockSizeToIndex[4][8] = 5;
-  m_blockSizeToIndex[8][4] = 6;
 }
 
 uint32_t TComHash::getCRCValue1(unsigned char* p, int length)
diff --git a/source/Lib/CommonLib/Hash.h b/source/Lib/CommonLib/Hash.h
index d69787cfc70eb38306153b2fc0e913793098fa66..2a47b0ffb60fd3c46044d73439588bbe73e35c3f 100644
--- a/source/Lib/CommonLib/Hash.h
+++ b/source/Lib/CommonLib/Hash.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -91,7 +91,7 @@ struct TComHash
 public:
   TComHash();
   ~TComHash();
-  void create();
+  void create(int picWidth, int picHeight);
   void clearAll();
   void addToTable(uint32_t hashValue, const BlockHash& blockHash);
   int count(uint32_t hashValue);
@@ -102,11 +102,10 @@ public:
 
   void generateBlock2x2HashValue(const PelUnitBuf &curPicBuf, int picWidth, int picHeight, const BitDepths bitDepths, uint32_t* picBlockHash[2], bool* picBlockSameInfo[3]);
   void generateBlockHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3]);
-  void generateRectangleHashValue(int picWidth, int picHeight, int width, int height, uint32_t* srcPicBlockHash[2], uint32_t* dstPicBlockHash[2], bool* srcPicBlockSameInfo[3], bool* dstPicBlockSameInfo[3]);
   void addToHashMapByRowWithPrecalData(uint32_t* srcHash[2], bool* srcIsSame, int picWidth, int picHeight, int width, int height);
   bool isInitial() { return tableHasContent; }
   void setInitial() { tableHasContent = true; }
-
+  uint16_t* getHashPic(int baseSize) const { return hashPic[floorLog2(baseSize) - 2]; }
 
 
 public:
@@ -117,10 +116,13 @@ public:
   static bool isBlock2x2ColSameValue(unsigned char* p, bool includeAllComponent = true);
   static bool getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int height, int xStart, int yStart, const BitDepths bitDepths, uint32_t& hashValue1, uint32_t& hashValue2);
   static void initBlockSizeToIndex();
+  static bool isHorizontalPerfectLuma(const Pel* srcPel, int stride, int width, int height);
+  static bool isVerticalPerfectLuma(const Pel* srcPel, int stride, int width, int height);
 
 private:
   std::vector<BlockHash>** m_lookupTable;
   bool tableHasContent;
+  uint16_t* hashPic[5];//4x4 ~ 64x64
 
 private:
   static const int m_CRCBits = 16;
diff --git a/source/Lib/CommonLib/IbcHashMap.cpp b/source/Lib/CommonLib/IbcHashMap.cpp
index 9d876292deeedcab45d80f3057b14018b8ba17a4..3b0b2d2f12038f79e7bdcf1166d9d0c5a5d03bce 100644
--- a/source/Lib/CommonLib/IbcHashMap.cpp
+++ b/source/Lib/CommonLib/IbcHashMap.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -274,6 +274,7 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
   // find the block with least candidates
   size_t minSize = MAX_UINT;
   unsigned int targetHashOneBlock = 0;
+  Position targetBlockOffsetInCu(0, 0);
   for (SizeType y = 0; y < lumaArea.height && minSize > 1; y += MIN_PU_SIZE)
   {
     for (SizeType x = 0; x < lumaArea.width && minSize > 1; x += MIN_PU_SIZE)
@@ -283,6 +284,7 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
       {
         minSize = m_hash2Pos[hash].size();
         targetHashOneBlock = hash;
+        targetBlockOffsetInCu.repositionTo(Position(x, y));
       }
     }
   }
@@ -294,11 +296,12 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
     // check whether whole block match
     for (std::vector<Position>::iterator refBlockPos = candOneBlock.begin(); refBlockPos != candOneBlock.end(); refBlockPos++)
     {
-      Position bottomRight = refBlockPos->offset(lumaArea.width - 1, lumaArea.height - 1);
+      Position topLeft = refBlockPos->offset(-targetBlockOffsetInCu.x, -targetBlockOffsetInCu.y);
+      Position bottomRight = topLeft.offset(lumaArea.width - 1, lumaArea.height - 1);
       bool wholeBlockMatch = true;
       if (lumaArea.width > MIN_PU_SIZE || lumaArea.height > MIN_PU_SIZE)
       {
-        if (!cs.isDecomp(bottomRight, cs.chType) || bottomRight.x >= m_picWidth || bottomRight.y >= m_picHeight)
+        if (!cs.isDecomp(bottomRight, CHANNEL_TYPE_LUMA) || bottomRight.x >= m_picWidth || bottomRight.y >= m_picHeight || topLeft.x < 0 || topLeft.y < 0)
         {
           continue;
         }
@@ -307,20 +310,21 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
           for (SizeType x = 0; x < lumaArea.width && wholeBlockMatch; x += MIN_PU_SIZE)
           {
             // whether the reference block and current block has the same hash
-            wholeBlockMatch &= (m_pos2Hash[lumaArea.pos().y + y][lumaArea.pos().x + x] == m_pos2Hash[refBlockPos->y + y][refBlockPos->x + x]);
+            wholeBlockMatch &= (m_pos2Hash[lumaArea.pos().y + y][lumaArea.pos().x + x] == m_pos2Hash[topLeft.y + y][topLeft.x + x]);
           }
         }
       }
       else
       {
-        if (abs(refBlockPos->x - lumaArea.x) > searchRange4SmallBlk || abs(refBlockPos->y - lumaArea.y) > searchRange4SmallBlk || !cs.isDecomp(bottomRight, cs.chType))
+        CHECK(topLeft != *refBlockPos, "4x4 target block should not have offset!");
+        if (abs(topLeft.x - lumaArea.x) > searchRange4SmallBlk || abs(topLeft.y - lumaArea.y) > searchRange4SmallBlk || !cs.isDecomp(bottomRight, CHANNEL_TYPE_LUMA))
         {
           continue;
         }
       }
       if (wholeBlockMatch)
       {
-        cand.push_back(*refBlockPos);
+        cand.push_back(topLeft);
         if (cand.size() > maxCand)
         {
           break;
@@ -349,5 +353,82 @@ int IbcHashMap::getHashHitRatio(const Area& lumaArea)
   return 100 * hit / total;
 }
 
+int IbcHashMap::calHashBlkMatchPerc(const Area& lumaArea)
+{
+  int maxX = std::min((int)(lumaArea.x + lumaArea.width), m_picWidth);
+  int maxY = std::min((int)(lumaArea.y + lumaArea.height), m_picHeight);
+  int          maxUsage[100];
+  unsigned int mostSelHash[100];
+
+  static   int numExcludedHashValue = 36;
+
+  for (int i = 0; i < numExcludedHashValue; i++)
+  {
+    maxUsage[i] = 0;
+    mostSelHash[i] = 0;
+  }
+
+  for (std::unordered_map<unsigned int, std::vector<Position>>::iterator it = m_hash2Pos.begin(); it != m_hash2Pos.end(); ++it)
+  {
+    unsigned int hash = it->first;
+    int usage = (int)it->second.size();
+    assert(usage == m_hash2Pos[hash].size());
+
+    int insertPos = -1;
+    for (insertPos = 0; insertPos < numExcludedHashValue; insertPos++)
+    {
+      if (usage > maxUsage[insertPos])
+      {
+        break;
+      }
+    }
+    assert(insertPos <= numExcludedHashValue);
+
+    if (insertPos < numExcludedHashValue)
+    {
+      for (int i = (numExcludedHashValue - 1); i >= (insertPos + 1); i--)
+      {
+        maxUsage[i] = maxUsage[i - 1];
+        mostSelHash[i] = mostSelHash[i - 1];
+      }
+      maxUsage[insertPos] = usage;
+      mostSelHash[insertPos] = hash;
+    }
+  }
+
+  int hit = 0, total = 0;
+  for (int y = lumaArea.y; y < maxY; y += MIN_PU_SIZE)
+  {
+    for (int x = lumaArea.x; x < maxX; x += MIN_PU_SIZE)
+    {
+      unsigned int hash = m_pos2Hash[y][x];
+
+      bool excludedHash = false;
+      for (int i = 0; i < numExcludedHashValue && !excludedHash; i++)
+      {
+        if (hash == mostSelHash[i])
+        {
+          excludedHash = true;
+        }
+      }
+
+      if (excludedHash)
+      {
+        continue;
+      }
+
+      hit += (m_hash2Pos[hash].size() > 1);
+      total++;
+    }
+  }
 
+  if (total == 0)
+  {
+    return 0;
+  }
+  else
+  {
+    return 100 * hit / total;
+  }
+}
 //! \}
diff --git a/source/Lib/CommonLib/IbcHashMap.h b/source/Lib/CommonLib/IbcHashMap.h
index e343aab6e042abdd15ff0227a3ba38d921f24fdd..bd90e10fdb3ffa8f68a6363e75f89aa379b4c024 100644
--- a/source/Lib/CommonLib/IbcHashMap.h
+++ b/source/Lib/CommonLib/IbcHashMap.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -82,6 +82,8 @@ public:
   bool    ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk);
   int     getHashHitRatio(const Area& lumaArea);
 
+  int     calHashBlkMatchPerc(const Area& lumaArea);
+
 #ifdef TARGET_SIMD_X86
   void    initIbcHashMapX86();
   template <X86_VEXT vext>
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 1967564ce5c6593d9ab9dd4fb38c30270c1571a4..cad694b0a6fdc0afab936f7c074e8dc5207766a5 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,6 +57,9 @@ InterPrediction::InterPrediction()
 , m_maxCompIDToPred ( MAX_NUM_COMPONENT )
 , m_pcRdCost        ( nullptr )
 , m_storedMv        ( nullptr )
+, m_skipPROF (false)
+, m_encOnly  (false)
+, m_isBi     (false)
 , m_gradX0(nullptr)
 , m_gradY0(nullptr)
 , m_gradX1(nullptr)
@@ -124,6 +127,9 @@ void InterPrediction::destroy()
   }
 
   m_triangleBuf.destroy();
+  m_colorTransResiBuf[0].destroy();
+  m_colorTransResiBuf[1].destroy();
+  m_colorTransResiBuf[2].destroy();
 
   if (m_storedMv != nullptr)
   {
@@ -146,9 +152,10 @@ void InterPrediction::destroy()
     xFree(m_cRefSamplesDMVRL1[ch]);
     m_cRefSamplesDMVRL1[ch] = nullptr;
   }
+  m_IBCBuffer.destroy();
 }
 
-void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
+void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize )
 {
   m_pcRdCost = pcRdCost;
 
@@ -186,6 +193,9 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
     }
 
     m_triangleBuf.create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
+    m_colorTransResiBuf[0].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
+    m_colorTransResiBuf[1].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
+    m_colorTransResiBuf[2].create(UnitArea(chromaFormatIDC, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
 
     m_iRefListIdx = -1;
 
@@ -214,42 +224,11 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
     const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE;
     m_storedMv = new Mv[MVBUFFER_SIZE*MVBUFFER_SIZE];
   }
-}
-
-bool checkIdenticalMotion( const PredictionUnit &pu, bool checkAffine )
-{
-  const Slice &slice = *pu.cs->slice;
-
-  if( slice.isInterB() && !pu.cs->pps->getWPBiPred() )
+  if (m_IBCBuffer.bufs.empty())
   {
-    if( pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 )
-    {
-      int RefPOCL0 = slice.getRefPic( REF_PIC_LIST_0, pu.refIdx[0] )->getPOC();
-      int RefPOCL1 = slice.getRefPic( REF_PIC_LIST_1, pu.refIdx[1] )->getPOC();
-
-      if( RefPOCL0 == RefPOCL1 )
-      {
-        if( !pu.cu->affine )
-        {
-          if( pu.mv[0] == pu.mv[1] )
-          {
-            return true;
-          }
-        }
-        else
-        {
-          CHECK( !checkAffine, "In this case, checkAffine should be on." );
-          if ( (pu.cu->affineType == AFFINEMODEL_4PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1]))
-            || (pu.cu->affineType == AFFINEMODEL_6PARAM && (pu.mvAffi[0][0] == pu.mvAffi[1][0]) && (pu.mvAffi[0][1] == pu.mvAffi[1][1]) && (pu.mvAffi[0][2] == pu.mvAffi[1][2])) )
-          {
-            return true;
-          }
-        }
-      }
-    }
+    m_IBCBufferWidth = g_IBCBufferSize / ctuSize;
+    m_IBCBuffer.create(UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize)));
   }
-
-  return false;
 }
 
 // ====================================================================================================================
@@ -291,7 +270,7 @@ bool InterPrediction::xCheckIdenticalMotion( const PredictionUnit &pu )
   return false;
 }
 
-void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ )
+void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, const bool luma /*= true*/, const bool chroma /*= true*/)
 {
 
   // compute the location of the current PU
@@ -324,6 +303,12 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
   int  fstStep = (!verMC ? puHeight : puWidth);
   int  secStep = (!verMC ? puWidth : puHeight);
 
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  bool scaled = pu.cu->slice->getRefPic( REF_PIC_LIST_0, 0 )->isRefScaled( pu.cs->pps ) || ( pu.cs->slice->getSliceType() == B_SLICE ? pu.cu->slice->getRefPic( REF_PIC_LIST_1, 0 )->isRefScaled( pu.cs->pps ) : false );
+#else
+  bool scaled = pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, 0 ) != SCALE_1X || ( pu.cs->slice->getSliceType() == B_SLICE ? pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, 0 ) != SCALE_1X : false );
+#endif
+
   m_subPuMC = true;
 
   for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep)
@@ -340,7 +325,7 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
       while (later < secEnd)
       {
         const MotionInfo &laterMi = !verMC ? pu.getMotionInfo(Position{ later, fstDim }) : pu.getMotionInfo(Position{ fstDim, later });
-        if (laterMi == curMi)
+        if (!scaled && laterMi == curMi)
         {
           length += secStep;
         }
@@ -358,7 +343,7 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
       PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu));
       subPu.mmvdEncOptMode = 0;
       subPu.mvRefine = false;
-      motionCompensation(subPu, subPredBuf, eRefPicList);
+      motionCompensation(subPu, subPredBuf, eRefPicList, luma, chroma);
       secDim = later - secStep;
     }
   }
@@ -366,36 +351,99 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
 
   pu.cu->affine = isAffine;
 }
-
-void InterPrediction::xChromaMC(PredictionUnit &pu, PelUnitBuf& pcYuvPred)
+void InterPrediction::xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, PelUnitBuf* yuvDstTmp /*= NULL*/)
 {
-  // separated tree, chroma
-  const CompArea lumaArea = CompArea(COMPONENT_Y, pu.chromaFormat, pu.Cb().lumaPos(), recalcSize(pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, pu.Cb().size()));
+  // compute the location of the current PU
+  Position puPos = pu.lumaPos();
+  Size puSize = pu.lumaSize();
+
+#if JVET_J0090_MEMORY_BANDWITH_MEASURE
+  JVET_J0090_SET_CACHE_ENABLE(true);
+  int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
+  for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
+  {
+    RefPicList refId = (RefPicList)k;
+    const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
+    for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
+    {
+      Mv cMv = pu.mv[refId];
+      int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+      int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
+      cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp));
+      bool wrapRef = false;
+      if (pu.cs->sps->getWrapAroundEnabledFlag())
+      {
+        wrapRef = wrapClipMv(cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps);
+      }
+      else
+      {
+        clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps);
+      }
+
+      int width = predBuf.bufs[compID].width + (filtersize - 1);
+      int height = predBuf.bufs[compID].height + (filtersize - 1);
+
+      CPelBuf refBuf;
+      Position recOffset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
+      refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, recOffset, pu.blocks[compID].size()), wrapRef);
+
+      JVET_J0090_SET_REF_PICTURE(refPic, (ComponentID)compID);
+      for (int row = 0; row < height; row++)
+      {
+        for (int col = 0; col < width; col++)
+        {
+          JVET_J0090_CACHE_ACCESS(((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__);
+        }
+      }
+    }
+  }
+  JVET_J0090_SET_CACHE_ENABLE(false);
+#endif
   PredictionUnit subPu;
+
   subPu.cs = pu.cs;
   subPu.cu = pu.cu;
-
-  Picture * refPic = pu.cu->slice->getPic();
-  for (int y = lumaArea.y; y < lumaArea.y + lumaArea.height; y += MIN_PU_SIZE)
+  subPu.mergeType = pu.mergeType;
+  subPu.mmvdMergeFlag = pu.mmvdMergeFlag;
+  subPu.mmvdEncOptMode = pu.mmvdEncOptMode;
+  subPu.mergeFlag = pu.mergeFlag;
+  subPu.ciipFlag = pu.ciipFlag;
+  subPu.mvRefine = pu.mvRefine;
+  subPu.refIdx[0] = pu.refIdx[0];
+  subPu.refIdx[1] = pu.refIdx[1];
+  int  fstStart = puPos.y;
+  int  secStart = puPos.x;
+  int  fstEnd = puPos.y + puSize.height;
+  int  secEnd = puPos.x + puSize.width;
+  int  fstStep = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.height);
+  int  secStep = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.width);
+  for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep)
   {
-    for (int x = lumaArea.x; x < lumaArea.x + lumaArea.width; x += MIN_PU_SIZE)
+    for (int secDim = secStart; secDim < secEnd; secDim += secStep)
     {
-      const MotionInfo &curMi = pu.cs->picture->cs->getMotionInfo(Position{ x, y });
+      int x = secDim;
+      int y = fstDim;
+      int dx = secStep;
+      int dy = fstStep;
 
-      subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, MIN_PU_SIZE, MIN_PU_SIZE)));
-      PelUnitBuf subPredBuf = pcYuvPred.subBuf(UnitAreaRelative(pu, subPu));
+      const MotionInfo &curMi = pu.getMotionInfo(Position{ x, y });
+
+      subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
+      subPu = curMi;
+      PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu));
 
-      xPredInterBlk(COMPONENT_Cb, subPu, refPic, curMi.mv[0], subPredBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)
-                    , false
-                    , true);
-      xPredInterBlk(COMPONENT_Cr, subPu, refPic, curMi.mv[0], subPredBuf, false, pu.cu->slice->clpRng(COMPONENT_Cr)
-                    , false
-                    , true);
+      if (yuvDstTmp)
+      {
+        PelUnitBuf subPredBufTmp = yuvDstTmp->subBuf(UnitAreaRelative(pu, subPu));
+        motionCompensation(subPu, subPredBuf, eRefPicList, true, true, &subPredBufTmp);
+      }
+      else
+      motionCompensation(subPu, subPredBuf, eRefPicList);
     }
   }
+  JVET_J0090_SET_CACHE_ENABLE(true);
 }
 
-
 void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi
                                    , const bool& bioApplied
                                    , const bool luma, const bool chroma
@@ -406,6 +454,7 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
   int iRefIdx = pu.refIdx[eRefPicList];
   Mv mv[3];
   bool isIBC = false;
+  CHECK( !CU::isIBC( *pu.cu ) && pu.lwidth() == 4 && pu.lheight() == 4, "invalid 4x4 inter blocks" );
   if (CU::isIBC(*pu.cu))
   {
     isIBC = true;
@@ -422,10 +471,21 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
   {
     mv[0] = pu.mv[eRefPicList];
   }
-  if ( !pu.cu->affine )
-  clipMv(mv[0], pu.cu->lumaPos(),
-         pu.cu->lumaSize(),
-         sps);
+
+  if( !pu.cu->affine )
+  {
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    if( pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->isRefScaled( pu.cs->pps ) == false )
+#else
+    if( pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ) == SCALE_1X )
+#endif
+    {
+      if( !sps.getWrapAroundEnabledFlag() )
+      {
+        clipMv( mv[0], pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps );
+      }
+    }
+  }
 
   for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size() && comp <= m_maxCompIDToPred; comp++ )
   {
@@ -437,7 +497,9 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
     if ( pu.cu->affine )
     {
       CHECK( bioApplied, "BIO is not allowed with affine" );
-      xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ) );
+      m_iRefListIdx = eRefPicList;
+      bool genChromaMv = (!luma && chroma && compID == COMPONENT_Cb);
+      xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ), genChromaMv, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ));
     }
     else
     {
@@ -450,22 +512,26 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
       }
       else
       {
-        xPredInterBlk(compID, pu, pu.cu->slice->getRefPic(eRefPicList, iRefIdx), mv[0], pcYuvPred, bi, pu.cu->slice->clpRng(compID)
-          , bioApplied
-          , isIBC
-        );
+        xPredInterBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv[0], pcYuvPred, bi, pu.cu->slice->clpRng( compID ), bioApplied, isIBC, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ) );
       }
     }
   }
 }
 
-void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
+void InterPrediction::xPredInterBi(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma, PelUnitBuf *yuvPredTmp /*= NULL*/)
 {
   const PPS   &pps   = *pu.cs->pps;
   const Slice &slice = *pu.cs->slice;
+  CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
+  WPScalingParam *wp0;
+  WPScalingParam *wp1;
+  int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+  int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+  pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
+  pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
 
   bool bioApplied = false;
-  if (pu.cs->sps->getBDOFEnabledFlag())
+  if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag()))
   {
     if (pu.cu->affine || m_subPuMC)
     {
@@ -473,24 +539,29 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
     }
     else
     {
-      const bool biocheck0 = !(pps.getWPBiPred() && slice.getSliceType() == B_SLICE);
+      const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE);
       const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
       if (biocheck0
         && biocheck1
-        && PU::isBiPredFromDifferentDir(pu)
-        && !(pu.Y().height == 4 || (pu.Y().width == 4 && pu.Y().height == 8))
+        && PU::isBiPredFromDifferentDirEqDistPoc(pu)
+        && (pu.Y().height >= 8)
+        && (pu.Y().width >= 8)
+        && ((pu.Y().height * pu.Y().width) >= 128)
        )
       {
         bioApplied = true;
       }
     }
 
+    if (bioApplied && pu.ciipFlag)
+      bioApplied = false;
+
     if (bioApplied && pu.cu->smvdMode)
     {
       bioApplied = false;
     }
 
-    if (pu.cu->cs->sps->getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT)
+    if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT)
     {
       bioApplied = false;
     }
@@ -500,6 +571,18 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
   }
   bool dmvrApplied = false;
   dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu);
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  bool refIsScaled = ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || 
+                     ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) );
+  dmvrApplied = dmvrApplied && !refIsScaled;
+  bioApplied = bioApplied && !refIsScaled;
+#else
+  bool samePicSize = ( refIdx0 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ) == SCALE_1X ) && ( refIdx1 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ) == SCALE_1X );
+  dmvrApplied = dmvrApplied && samePicSize;
+  bioApplied = bioApplied && samePicSize;
+#endif
+
   for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
   {
     if( pu.refIdx[refList] < 0)
@@ -521,10 +604,14 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
     if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
     {
       if (dmvrApplied)
-        continue; // mc will happen in processDMVR
+      {
+        if (yuvPredTmp)
+          xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma);
+        continue;
+      }
       xPredInterUni ( pu, eRefPicList, pcMbBuf, true
         , bioApplied
-        , true, true
+        , luma, chroma
       );
     }
     else
@@ -533,43 +620,51 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
       {
         xPredInterUni ( pu, eRefPicList, pcMbBuf, true
           , bioApplied
-          , true, true
+          , luma, chroma
         );
       }
       else
       {
         xPredInterUni( pu, eRefPicList, pcMbBuf, pu.cu->triangle
           , bioApplied
-          , true, true
+          , luma, chroma
         );
       }
     }
   }
-  if (dmvrApplied)
-  {
-    xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied);
-  }
-
-
   CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ?
                            CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y())) :
                            CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[0][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvPred.Cr())) );
   CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ?
                            CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) :
                            CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) );
-  if( pps.getWPBiPred() && slice.getSliceType() == B_SLICE )
+  const bool lumaOnly   = luma && !chroma;
+  const bool chromaOnly = !luma && chroma;
+  if( !pu.cu->triangle && (!dmvrApplied) && (!bioApplied) && pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->BcwIdx==BCW_DEFAULT)
   {
-    xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred );
+    xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred, lumaOnly, chromaOnly );
+    if (yuvPredTmp)
+      yuvPredTmp->copyFrom(pcYuvPred);
   }
-  else if( pps.getUseWP() && slice.getSliceType() == P_SLICE )
+  else if( !pu.cu->triangle && pps.getUseWP() && slice.getSliceType() == P_SLICE )
   {
-    xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred );
+    xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred, lumaOnly, chromaOnly );
+    if (yuvPredTmp)
+      yuvPredTmp->copyFrom(pcYuvPred);
   }
   else
   {
-    if (dmvrApplied == false)
+    if (dmvrApplied)
+    {
+      if (yuvPredTmp)
+      {
+        yuvPredTmp->addAvg(srcPred0, srcPred1, slice.clpRngs(), false);
+      }
+      xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied);
+    }
+    else
     {
-    xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied );
+      xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp );
     }
   }
 }
@@ -577,6 +672,7 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
 void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
                                      , const bool& bioApplied
                                      , bool isIBC
+                                     , const std::pair<int, int> scalingRatio
                                      , SizeType dmvrWidth
                                      , SizeType dmvrHeight
                                      , bool bilinearMC
@@ -591,8 +687,24 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
   int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX(compID, chFmt);
   int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY(compID, chFmt);
 
-  int xFrac = _mv.hor & ((1 << shiftHor) - 1);
-  int yFrac = _mv.ver & ((1 << shiftVer) - 1);
+  bool  wrapRef = false;
+  Mv    mv(_mv);
+  if( !isIBC && pu.cs->sps->getWrapAroundEnabledFlag() )
+  {
+    wrapRef = wrapClipMv( mv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps );
+  }
+
+  bool useAltHpelIf = pu.cu->imv == IMV_HPEL;
+
+  if( !isIBC && xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID], Size( dstPic.bufs[compID].width, dstPic.bufs[compID].height ) ), refPic, mv, dstPic.bufs[compID].buf, dstPic.bufs[compID].stride, bi, wrapRef, clpRng, 0, useAltHpelIf ) )
+  {
+    CHECK( bilinearMC, "DMVR should be disabled with RPR" );
+    CHECK( bioApplied, "BDOF should be disabled with RPR" );
+  }
+  else
+  {
+  int xFrac = mv.hor & ((1 << shiftHor) - 1);
+  int yFrac = mv.ver & ((1 << shiftVer) - 1);
   if (isIBC)
   {
     xFrac = yFrac = 0;
@@ -605,13 +717,13 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
 
   CPelBuf refBuf;
   {
-    Position offset = pu.blocks[compID].pos().offset( _mv.getHor() >> shiftHor, _mv.getVer() >> shiftVer );
+    Position offset = pu.blocks[compID].pos().offset( mv.getHor() >> shiftHor, mv.getVer() >> shiftVer );
     if (dmvrWidth)
     {
-      refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight)));
+      refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight)), wrapRef);
     }
     else
-    refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) );
+    refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ), wrapRef);
   }
 
   if (NULL != srcPadBuf)
@@ -640,13 +752,14 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2;
   }
 
+
   if( yFrac == 0 )
   {
-    m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC);
+    m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
   }
   else if( xFrac == 0 )
   {
-    m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC);
+    m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
   }
   else
   {
@@ -659,15 +772,17 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     {
       vFilterSize = NTAPS_BILINEAR;
     }
-    m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC);
+    m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
     JVET_J0090_SET_CACHE_ENABLE( false );
-    m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC);
+    m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
   }
-  JVET_J0090_SET_CACHE_ENABLE( true );
+  JVET_J0090_SET_CACHE_ENABLE((srcPadStride == 0) && (bioApplied == false)); // Enabled only in non-DMVR-non-BDOF process, In DMVR process, srcPadStride is always non-zero
   if (bioApplied && compID == COMPONENT_Y)
   {
     const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
-    const Pel* refPel = refBuf.buf - refBuf.stride - 1;
+    int xOffset = (xFrac < 8) ? 1 : 0;
+    int yOffset = (yFrac < 8) ? 1 : 0;
+    const Pel* refPel = refBuf.buf - yOffset * refBuf.stride - xOffset;
     Pel* dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1;
     for (int w = 0; w < (width - 2 * BIO_EXTEND_SIZE); w++)
     {
@@ -675,7 +790,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
       dstPel[w] = val - (Pel)IF_INTERNAL_OFFS;
     }
 
-    refPel = refBuf.buf - 1;
+    refPel = refBuf.buf + (1 - yOffset)*refBuf.stride - xOffset;
     dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 1;
     for (int h = 0; h < (height - 2 * BIO_EXTEND_SIZE - 2); h++)
     {
@@ -689,7 +804,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
       dstPel += dstBuf.stride;
     }
 
-    refPel = refBuf.buf + (height - 2 * BIO_EXTEND_SIZE - 2)*refBuf.stride - 1;
+    refPel = refBuf.buf + (height - 2 * BIO_EXTEND_SIZE - 2 + 1 - yOffset)*refBuf.stride - xOffset;
     dstPel = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + (height - 2 * BIO_EXTEND_SIZE)*dstBuf.stride + 1;
     for (int w = 0; w < (width - 2 * BIO_EXTEND_SIZE); w++)
     {
@@ -703,24 +818,51 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     dstBuf.buf = backupDstBufPtr;
     dstBuf.stride = backupDstBufStride;
   }
+  }
 }
 
-void InterPrediction::xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng )
+bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType )
 {
-  if ( (pu.cu->affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2])
-    || (pu.cu->affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1])
-    )
+  int s4 = ( 4 << 11 );
+  int filterTap = 6;
+
+  if ( predType == 3 )
+  {
+    int refBlkWidth  = std::max( std::max( 0, 4 * a + s4 ), std::max( 4 * c, 4 * a + 4 * c + s4 ) ) - std::min( std::min( 0, 4 * a + s4 ), std::min( 4 * c, 4 * a + 4 * c + s4 ) );
+    int refBlkHeight = std::max( std::max( 0, 4 * b ), std::max( 4 * d + s4, 4 * b + 4 * d + s4 ) ) - std::min( std::min( 0, 4 * b ), std::min( 4 * d + s4, 4 * b + 4 * d + s4 ) );
+    refBlkWidth  = ( refBlkWidth >> 11 ) + filterTap + 3;
+    refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3;
+
+    if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 9 ) )
+    {
+      return true;
+    }
+  }
+  else
   {
-    Mv mvTemp = _mv[0];
-    clipMv( mvTemp, pu.cu->lumaPos(),
-            pu.cu->lumaSize(),
-            *pu.cs->sps );
-    xPredInterBlk( compID, pu, refPic, mvTemp, dstPic, bi, clpRng
-                  , false
-                  , false
-                  );
-    return;
+    int refBlkWidth  = std::max( 0, 4 * a + s4 ) - std::min( 0, 4 * a + s4 );
+    int refBlkHeight = std::max( 0, 4 * b ) - std::min( 0, 4 * b );
+    refBlkWidth  = ( refBlkWidth >> 11 ) + filterTap + 3;
+    refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3;
+    if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 5 ) )
+    {
+      return true;
+    }
+
+    refBlkWidth  = std::max( 0, 4 * c ) - std::min( 0, 4 * c );
+    refBlkHeight = std::max( 0, 4 * d + s4 ) - std::min( 0, 4 * d + s4 );
+    refBlkWidth  = ( refBlkWidth >> 11 ) + filterTap + 3;
+    refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3;
+    if ( refBlkWidth * refBlkHeight > ( filterTap + 5 ) * ( filterTap + 9 ) )
+    {
+      return true;
+    }
   }
+  return false;
+}
+
+void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio)
+{
 
   JVET_J0090_SET_REF_PICTURE( refPic, compID );
   const ChromaFormat chFmt = pu.chromaFormat;
@@ -739,7 +881,7 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
   int blockHeight = AFFINE_MIN_BLOCK_SIZE;
 
   CHECK(blockWidth  > (width >> iScaleX ), "Sub Block width  > Block width");
-  CHECK(blockHeight > (height >> iScaleX), "Sub Block height > Block height");
+  CHECK(blockHeight > (height >> iScaleY), "Sub Block height > Block height");
   const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE;
 
   const int cxWidth  = width  >> iScaleX;
@@ -749,12 +891,12 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
 
   const int iBit = MAX_CU_DEPTH;
   int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY;
-  iDMvHorX = (mvRT - mvLT).getHor() << (iBit - g_aucLog2[cxWidth]);
-  iDMvHorY = (mvRT - mvLT).getVer() << (iBit - g_aucLog2[cxWidth]);
+  iDMvHorX = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidth));
+  iDMvHorY = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidth));
   if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
   {
-    iDMvVerX = (mvLB - mvLT).getHor() << (iBit - g_aucLog2[cxHeight]);
-    iDMvVerY = (mvLB - mvLT).getVer() << (iBit - g_aucLog2[cxHeight]);
+    iDMvVerX = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeight));
+    iDMvVerY = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeight));
   }
   else
   {
@@ -767,16 +909,161 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
   const SPS &sps    = *pu.cs->sps;
   const int iMvShift = 4;
   const int iOffset  = 8;
-  const int iHorMax = ( sps.getPicWidthInLumaSamples()     + iOffset -      pu.Y().x - 1 ) << iMvShift;
+  const int iHorMax = ( pu.cs->pps->getPicWidthInLumaSamples() + iOffset - pu.Y().x - 1 ) << iMvShift;
   const int iHorMin = (      -(int)pu.cs->pcv->maxCUWidth  - iOffset - (int)pu.Y().x + 1 ) << iMvShift;
-  const int iVerMax = ( sps.getPicHeightInLumaSamples()    + iOffset -      pu.Y().y - 1 ) << iMvShift;
+  const int iVerMax = ( pu.cs->pps->getPicHeightInLumaSamples() + iOffset - pu.Y().y - 1 ) << iMvShift;
   const int iVerMin = (      -(int)pu.cs->pcv->maxCUHeight - iOffset - (int)pu.Y().y + 1 ) << iMvShift;
 
-  PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
   const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
 
   const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
+  bool      wrapRef = false;
+  const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit( iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, pu.interDir );
+
+  bool enablePROF = (sps.getUsePROF()) && (!m_skipPROF) && (compID == COMPONENT_Y);
+  enablePROF &= (!pu.cs->picHeader->getDisProfFlag());
+  enablePROF &= !((pu.cu->affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) || (pu.cu->affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1]));
+  enablePROF &= !subblkMVSpreadOverLimit;
+  const int profThres = 1 << (iBit + (m_isBi ? 1 : 0));
+  enablePROF &= !m_encOnly || pu.cu->slice->getCheckLDC() || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres;
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  enablePROF &= (refPic->isRefScaled( pu.cs->pps ) == false);
+#else
+  enablePROF &= pu.cs->pps->getPicWidthInLumaSamples() == refPic->getPicWidthInLumaSamples() && pu.cs->pps->getPicHeightInLumaSamples() == refPic->getPicHeightInLumaSamples();
+  enablePROF &= scalingRatio == SCALE_1X;
+#endif
+
+
+  bool isLast = enablePROF ? false : !bi;
+
+  const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2;
+  const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2;
+
+  PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH);
+  PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH);
+  const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA, NTAPS_CHROMA);
+  const int dstExtW = ((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3;
+  const int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2;
+  PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], dstExtW, dstExtH);
+
+  const int refExtH = dstExtH + MAX_FILTER_SIZE - 1;
+  PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], dstExtW, refExtH);
+
+  PelBuf &dstBuf = dstPic.bufs[compID];
+
+  int *dMvScaleHor = m_dMvBuf[m_iRefListIdx];
+  int *dMvScaleVer = m_dMvBuf[m_iRefListIdx] + 16;
+
+  if (enablePROF)
+  {
+    int* dMvH = dMvScaleHor;
+    int* dMvV = dMvScaleVer;
+    int quadHorX = iDMvHorX << 2;
+    int quadHorY = iDMvHorY << 2;
+    int quadVerX = iDMvVerX << 2;
+    int quadVerY = iDMvVerY << 2;
+
+    dMvH[0] = ((iDMvHorX + iDMvVerX) << 1) - ((quadHorX + quadVerX) << 1);
+    dMvV[0] = ((iDMvHorY + iDMvVerY) << 1) - ((quadHorY + quadVerY) << 1);
+
+    for (int w = 1; w < blockWidth; w++)
+    {
+      dMvH[w] = dMvH[w - 1] + quadHorX;
+      dMvV[w] = dMvV[w - 1] + quadHorY;
+    }
+
+    dMvH += blockWidth;
+    dMvV += blockWidth;
+    for (int h = 1; h < blockHeight; h++)
+    {
+      for (int w = 0; w < blockWidth; w++)
+      {
+        dMvH[w] = dMvH[w - blockWidth] + quadVerX;
+        dMvV[w] = dMvV[w - blockWidth] + quadVerY;
+      }
+      dMvH += blockWidth;
+      dMvV += blockWidth;
+    }
+
+    const int mvShift  = 8;
+    const int dmvLimit = ( 1 << 5 ) - 1;
+
+    if (!g_pelBufOP.roundIntVector)
+    {
+      for (int idx = 0; idx < blockWidth * blockHeight; idx++)
+      {
+        roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift);
+        dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] );
+        dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] );
+      }
+    }
+    else
+    {
+      int sz = blockWidth * blockHeight;
+      g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit);
+      g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit);
+    }
+  }
+  int scaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt);
+  int scaleYLuma = ::getComponentScaleY(COMPONENT_Y, chFmt);
+
+  if (genChromaMv && pu.chromaFormat != CHROMA_444)
+  {
+    CHECK(compID == COMPONENT_Y, "Chroma only subblock MV calculation should not apply to Luma");
+    int lumaBlockWidth  = AFFINE_MIN_BLOCK_SIZE;
+    int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE;
+
+    CHECK(lumaBlockWidth > (width >> scaleXLuma), "Sub Block width  > Block width");
+    CHECK(lumaBlockHeight > (height >> scaleYLuma), "Sub Block height > Block height");
+
+    const int cxWidthLuma  = width >> scaleXLuma;
+    const int cxHeightLuma = height >> scaleYLuma;
+    const int halfBWLuma  = lumaBlockWidth >> 1;
+    const int halfBHLuma  = lumaBlockHeight >> 1;
+
+    int dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma;
+    dMvHorXLuma = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidthLuma));
+    dMvHorYLuma = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidthLuma));
+    if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+    {
+      dMvVerXLuma = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeightLuma));
+      dMvVerYLuma = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeightLuma));
+    }
+    else
+    {
+      dMvVerXLuma = -dMvHorYLuma;
+      dMvVerYLuma = dMvHorXLuma;
+    }
+
+    const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma, pu.interDir);
+
+    // get luma MV block by block
+    for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight)
+    {
+      for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth)
+      {
+        int mvScaleTmpHor, mvScaleTmpVer;
+        if (!subblkMVSpreadOverLimitLuma)
+        {
+          mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (halfBWLuma + w) + dMvVerXLuma * (halfBHLuma + h);
+          mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (halfBWLuma + w) + dMvVerYLuma * (halfBHLuma + h);
+        }
+        else
+        {
+          mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (cxWidthLuma >> 1) + dMvVerXLuma * (cxHeightLuma >> 1);
+          mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (cxWidthLuma >> 1) + dMvVerYLuma * (cxHeightLuma >> 1);
+        }
 
+        roundAffineMv(mvScaleTmpHor, mvScaleTmpVer, shift);
+        Mv tmpMv(mvScaleTmpHor, mvScaleTmpVer);
+        tmpMv.clipToStorageBitDepth();
+        mvScaleTmpHor = tmpMv.getHor();
+        mvScaleTmpVer = tmpMv.getVer();
+
+        m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(mvScaleTmpHor, mvScaleTmpVer);
+      }
+    }
+  }
   // get prediction block by block
   for ( int h = 0; h < cxHeight; h += blockHeight )
   {
@@ -784,10 +1071,18 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
     {
 
       int iMvScaleTmpHor, iMvScaleTmpVer;
-      if(compID == COMPONENT_Y)
+      if (compID == COMPONENT_Y || pu.chromaFormat == CHROMA_444)
       {
-        iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h);
-        iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h);
+        if ( !subblkMVSpreadOverLimit )
+        {
+          iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h);
+          iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h);
+        }
+        else
+        {
+          iMvScaleTmpHor = iMvScaleHor + iDMvHorX * ( cxWidth >> 1 ) + iDMvVerX * ( cxHeight >> 1 );
+          iMvScaleTmpVer = iMvScaleVer + iDMvHorY * ( cxWidth >> 1 ) + iDMvVerY * ( cxHeight >> 1 );
+        }
         roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift);
         Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
         tmpMv.clipToStorageBitDepth();
@@ -799,34 +1094,57 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
         {
           m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer);
           Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
-          clipMv(tmpMv, Position(pu.Y().x + w, pu.Y().y + h), Size(blockWidth, blockHeight), sps);
+          wrapRef = wrapClipMv( tmpMv, Position( pu.Y().x + w, pu.Y().y + h ), Size( blockWidth, blockHeight ), &sps, pu.cs->pps );
           iMvScaleTmpHor = tmpMv.getHor();
           iMvScaleTmpVer = tmpMv.getVer();
         }
         else
         {
+          wrapRef = false;
           m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer);
-          iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor));
-          iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer));
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+          if( refPic->isRefScaled( pu.cs->pps ) == false )
+#else
+          if( scalingRatio == SCALE_1X ) 
+#endif
+          {
+            iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor));
+            iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer));
+          }
         }
       }
       else
       {
         Mv curMv = m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE) * MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE)] +
-          m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + 1)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + 1)];
+          m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + iScaleY)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + iScaleX)];
         roundAffineMv(curMv.hor, curMv.ver, 1);
         if (sps.getWrapAroundEnabledFlag())
         {
-          clipMv(curMv, Position(pu.Y().x + (w << iScaleX), pu.Y().y + (h << iScaleY)), Size(blockWidth << iScaleX, blockHeight << iScaleY), sps);
+          wrapRef = wrapClipMv( curMv, Position( pu.Y().x + ( w << iScaleX ), pu.Y().y + ( h << iScaleY ) ), Size( blockWidth << iScaleX, blockHeight << iScaleY ), &sps, pu.cs->pps );
         }
         else
         {
-          curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor));
-          curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver));
+          wrapRef = false;
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+          if( refPic->isRefScaled( pu.cs->pps ) == false )
+#else
+          if( scalingRatio == SCALE_1X ) 
+#endif
+          {
+            curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor));
+            curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver));
+          }
         }
         iMvScaleTmpHor = curMv.hor;
         iMvScaleTmpVer = curMv.ver;
       }
+
+      if( xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID].offset( w, h ), Size( blockWidth, blockHeight ) ), refPic, Mv( iMvScaleTmpHor, iMvScaleTmpVer ), dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, 2 ) )
+      {
+        CHECK( enablePROF, "PROF should be disabled with RPR" );
+      }
+      else
+      {
       // get the MV in high precision
       int xFrac, yFrac, xInt, yInt;
 
@@ -851,43 +1169,80 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
         yFrac = iMvScaleTmpVer & 31;
       }
 
-      const CPelBuf refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID] ) );
-      PelBuf &dstBuf = dstPic.bufs[compID];
+      const CPelBuf refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID] ), wrapRef );
+
+      Pel* ref = (Pel*) refBuf.buf;
+      Pel* dst = dstBuf.buf + w + h * dstBuf.stride;
+
+      int refStride = refBuf.stride;
+      int dstStride = dstBuf.stride;
+
+      int bw = blockWidth;
+      int bh = blockHeight;
+
+      if (enablePROF)
+      {
+        dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+        dstStride = dstExtBuf.stride;
+      }
 
       if ( yFrac == 0 )
       {
-        m_if.filterHor( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, xFrac, !bi, chFmt, clpRng );
+        m_if.filterHor( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng);
       }
       else if ( xFrac == 0 )
       {
-        m_if.filterVer( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, true, !bi, chFmt, clpRng );
+        m_if.filterVer( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng);
       }
       else
       {
-        m_if.filterHor( compID, (Pel*) refBuf.buf - ((vFilterSize>>1) -1)*refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, blockWidth, blockHeight+vFilterSize-1, xFrac, false,      chFmt, clpRng);
+        m_if.filterHor( compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false,      chFmt, clpRng);
         JVET_J0090_SET_CACHE_ENABLE( false );
-        m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, false, !bi, chFmt, clpRng);
+        m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng);
         JVET_J0090_SET_CACHE_ENABLE( true );
       }
-    }
-  }
-}
+      if (enablePROF)
+      {
+        const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+        const int xOffset = xFrac >> 3;
+        const int yOffset = yFrac >> 3;
 
-int getMSB( unsigned x )
-{
-  int msb = 0, bits = ( sizeof(int) << 3 ), y = 1;
-  while( x > 1u )
-  {
-    bits >>= 1;
-    y      = x >> bits;
-    if( y )
-    {
-      x    = y;
-      msb += bits;
+        const int refOffset = (blockHeight + 1) * refStride;
+        const int dstOffset = (blockHeight + 1)* dstStride;
+
+        const Pel* refPel = ref - (1 - yOffset) * refStride + xOffset - 1;
+        Pel* dstPel = dst - dstStride - 1;
+        for (int pw = 0; pw < blockWidth + 2; pw++)
+        {
+          dstPel[pw] = leftShift_round(refPel[pw], shift) - (Pel)IF_INTERNAL_OFFS;
+          dstPel[pw+dstOffset] = leftShift_round(refPel[pw+refOffset], shift) - (Pel)IF_INTERNAL_OFFS;
+        }
+
+        refPel = ref + yOffset * refBuf.stride + xOffset;
+        dstPel = dst;
+        for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride)
+        {
+          dstPel[-1] = leftShift_round(refPel[-1], shift) - (Pel)IF_INTERNAL_OFFS;
+          dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS;
+        }
+
+        PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
+        PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
+        g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd);
+
+        const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+        const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS;
+        Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+        Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+        Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+
+        Pel * dstY = dstBuf.bufAt(w, h);
+
+        g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng);
+      }
+      }
     }
   }
-  msb += y;
-  return msb;
 }
 
 void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths)
@@ -938,15 +1293,7 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
   const int   bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
   const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
   const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-  const int   limit = (bitDepth>12)? 2 : ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
-
-  int*     dotProductTemp1 = m_dotProduct1;
-  int*     dotProductTemp2 = m_dotProduct2;
-  int*     dotProductTemp3 = m_dotProduct3;
-  int*     dotProductTemp5 = m_dotProduct5;
-  int*     dotProductTemp6 = m_dotProduct6;
-
-  xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
+  const int   limit = ( 1 << 4 ) - 1;
 
   int xUnit = (width >> 2);
   int yUnit = (height >> 2);
@@ -959,42 +1306,27 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
   {
     for (int xu = 0; xu < xUnit; xu++)
     {
-      if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
-      {
-        srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
-        srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
-        dstY0 = dstY + ((yu*dstStride + xu) << 2);
-        PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4));
-        dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng);
-        continue;
-      }
-
-      int     sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
-      int     tmpx = 0, tmpy = 0;
-
-      dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
-      dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
-      dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
-      dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
-      dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
-
-      xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
-
-      if (sGx2 > 0)
-      {
-        tmpx = rightShiftMSB(sGxdI << 3, sGx2);
-        tmpx = Clip3(-limit, limit, tmpx);
-      }
-      if (sGy2 > 0)
-      {
-        int     mainsGxGy = sGxGy >> 12;
-        int     secsGxGy = sGxGy & ((1 << 12) - 1);
-        int     tmpData = tmpx * mainsGxGy;
-        tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
-        tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
-        tmpy = Clip3(-limit, limit, tmpy);
-      }
-
+      int tmpx = 0, tmpy = 0;
+      int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0;
+      int sumSignGY_GX = 0;
+
+      Pel* pGradX0Tmp = m_gradX0 + (xu << 2) + (yu << 2) * widthG;
+      Pel* pGradX1Tmp = m_gradX1 + (xu << 2) + (yu << 2) * widthG;
+      Pel* pGradY0Tmp = m_gradY0 + (xu << 2) + (yu << 2) * widthG;
+      Pel* pGradY1Tmp = m_gradY1 + (xu << 2) + (yu << 2) * widthG;
+      const Pel* SrcY1Tmp = srcY1 + (xu << 2) + (yu << 2) * src1Stride;
+      const Pel* SrcY0Tmp = srcY0 + (xu << 2) + (yu << 2) * src0Stride;
+
+      g_pelBufOP.calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX);
+      tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX << 2, sumAbsGX));
+      tmpx = Clip3(-limit, limit, tmpx);
+
+      int     mainsGxGy = sumSignGY_GX >> 12;
+      int     secsGxGy = sumSignGY_GX & ((1 << 12) - 1);
+      int     tmpData = tmpx * mainsGxGy;
+      tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
+      tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY << 2) - tmpData), sumAbsGY));
+      tmpy = Clip3(-limit, limit, tmpy);
       srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
       srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
       gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
@@ -1009,39 +1341,6 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
 }
 
 
-bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
-{
-  const int     width = pu.lwidth();
-  const int     height = pu.lheight();
-  const int     clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
-  const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
-  const int     shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
-  const int     xUnit = (width >> 2);
-  const int     yUnit = (height >> 2);
-
-  m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
-  m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
-
-  m_bioDistThres >>= distortionShift;
-  m_bioSubBlkDistThres >>= distortionShift;
-
-  DistParam cDistParam;
-  Distortion dist = 0;
-  for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
-  {
-    for (int xu = 0; xu < xUnit; xu++, blkIdx++)
-    {
-      const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
-      const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
-
-      m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
-      m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
-      dist += m_bioPredSubBlkDist[blkIdx];
-    }
-  }
-
-  return (dist >= m_bioDistThres);
-}
 
 void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
 {
@@ -1063,17 +1362,21 @@ void InterPrediction::xCalcBlkGradient(int sx, int sy, int    *arraysGx2, int
   g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
 }
 
-void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )
+void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, bool lumaOnly, bool chromaOnly, PelUnitBuf* yuvDstTmp /*= NULL*/)
 {
+  CHECK( (chromaOnly && lumaOnly), "should not happen" );
+
   const int iRefIdx0 = pu.refIdx[0];
   const int iRefIdx1 = pu.refIdx[1];
 
   if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
   {
-    if( pu.cu->GBiIdx != GBI_DEFAULT )
+    if( pu.cu->BcwIdx != BCW_DEFAULT && (yuvDstTmp || !pu.ciipFlag) )
     {
-      CHECK(bioApplied, "GBi is disallowed with BIO");
-      pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
+      CHECK(bioApplied, "Bcw is disallowed with BIO");
+      pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->BcwIdx, chromaOnly, lumaOnly);
+      if (yuvDstTmp)
+        yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly);
       return;
     }
     if (bioApplied)
@@ -1083,26 +1386,68 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
       const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
       const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
 
-      bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
+      bool bioEnabled = true;
       if (bioEnabled)
       {
         applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
+        if (yuvDstTmp)
+          yuvDstTmp->bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
       }
       else
       {
         pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
+        if (yuvDstTmp)
+          yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]);
       }
     }
-    pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
-  }
-  else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
-  {
+    if (pu.cs->pps->getWPBiPred())
+    {
+      const int iRefIdx0 = pu.refIdx[0];
+      const int iRefIdx1 = pu.refIdx[1];
+      WPScalingParam  *pwp0;
+      WPScalingParam  *pwp1;
+      getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1);
+      if (!bioApplied)
+      {
+        if (!chromaOnly)
+        addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y);
+      }
+      if (!lumaOnly)
+      {
+        addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb);
+        addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr);
+      }
+    }
+    else
+    {
+      if (!bioApplied && (lumaOnly || chromaOnly))
+      {
+        pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly);
+      }
+      else
+      pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
+    }
+    if (yuvDstTmp)
+    {
+      if (bioApplied)
+      {
+        yuvDstTmp->bufs[1].copyFrom(pcYuvDst.bufs[1]);
+        yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]);
+      }
+      else
+        yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly);
+    }
+  }
+  else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
+  {
     if( pu.cu->triangle )
     {
       pcYuvDst.copyFrom( pcYuvSrc0 );
     }
     else
-    pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
+      pcYuvDst.copyClip( pcYuvSrc0, clpRngs, lumaOnly, chromaOnly );
+    if (yuvDstTmp)
+      yuvDstTmp->copyFrom( pcYuvDst, lumaOnly, chromaOnly );
   }
   else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
   {
@@ -1111,29 +1456,41 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
       pcYuvDst.copyFrom( pcYuvSrc1 );
     }
     else
-    pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
+      pcYuvDst.copyClip( pcYuvSrc1, clpRngs, lumaOnly, chromaOnly );
+    if (yuvDstTmp)
+      yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly);
   }
 }
 
+
 void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList
   , const bool luma, const bool chroma
+  , PelUnitBuf* predBufWOBIO /*= NULL*/
 )
 {
-  // dual tree handling for IBC as the only ref
-  if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0)
+  CHECK(predBufWOBIO && pu.ciipFlag, "the case should not happen!");
+
+  if (!pu.cs->pcv->isEncoder)
   {
-    if (!luma && chroma)
+    if (CU::isIBC(*pu.cu))
     {
-      xChromaMC(pu, predBuf);
+      CHECK(!luma, "IBC only for Chroma is not allowed.");
+      xIntraBlockCopy(pu, predBuf, COMPONENT_Y);
+      if (chroma)
+      {
+        xIntraBlockCopy(pu, predBuf, COMPONENT_Cb);
+        xIntraBlockCopy(pu, predBuf, COMPONENT_Cr);
+      }
       return;
     }
-    else // (luma && !chroma)
-    {
+  }
+  // dual tree handling for IBC as the only ref
+  if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0)
+  {
       xPredInterUni(pu, eRefPicList, predBuf, false
         , false
         , luma, chroma);
       return;
-    }
   }
   // else, go with regular MC below
         CodingStructure &cs = *pu.cs;
@@ -1142,38 +1499,110 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu
 
   if( eRefPicList != REF_PIC_LIST_X )
   {
-    if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
+    CHECK(predBufWOBIO != NULL, "the case should not happen!");
+    if ((CU::isIBC(*pu.cu) == false) && ((sliceType == P_SLICE && pps.getUseWP()) || (sliceType == B_SLICE && pps.getWPBiPred())))
     {
       xPredInterUni         ( pu,          eRefPicList, predBuf, true
         , false
-        , true, true
+        , luma, chroma
+      );
+      xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred 
+        , (luma && !chroma), (!luma && chroma)
       );
-      xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
     }
     else
     {
       xPredInterUni( pu, eRefPicList, predBuf, false
         , false
-        , true, true
+        , luma, chroma
       );
     }
   }
   else
   {
+
+    CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
+    WPScalingParam *wp0;
+    WPScalingParam *wp1;
+    int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+    int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+    pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
+    pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
+    bool bioApplied = false;
+    const Slice &slice = *pu.cs->slice;
+    if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->picHeader->getDisBdofFlag()))
+    {
+
+      if (pu.cu->affine || m_subPuMC)
+      {
+        bioApplied = false;
+      }
+      else
+      {
+        const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE);
+        const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
+        if (biocheck0
+          && biocheck1
+          && PU::isBiPredFromDifferentDirEqDistPoc(pu)
+          && (pu.Y().height >= 8)
+          && (pu.Y().width >= 8)
+          && ((pu.Y().height * pu.Y().width) >= 128)
+          )
+        {
+          bioApplied = true;
+        }
+      }
+
+      if (bioApplied && pu.ciipFlag)
+      {
+        bioApplied = false;
+      }
+
+      if (bioApplied && pu.cu->smvdMode)
+      {
+        bioApplied = false;
+      }
+      if (pu.cu->cs->sps->getUseBcw() && bioApplied && pu.cu->BcwIdx != BCW_DEFAULT)
+      {
+        bioApplied = false;
+      }
+      if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag)
+      {
+        bioApplied = false;
+      }
+    }
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    bool refIsScaled = ( refIdx0 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) ) || 
+                       ( refIdx1 < 0 ? false : pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) );
+    bioApplied = refIsScaled ? false : bioApplied;
+#else
+    bioApplied = ( ( refIdx0 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ) == SCALE_1X ) && ( refIdx1 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ) == SCALE_1X ) ) ? bioApplied : false;
+#endif
+    bool dmvrApplied = false;
+    dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu);
+    if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied))
+    {
+      xSubPuBio(pu, predBuf, eRefPicList, predBufWOBIO);
+    }
+    else
     if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)
     {
-      xSubPuMC( pu, predBuf, eRefPicList );
+      CHECK(predBufWOBIO != NULL, "the case should not happen!");
+      xSubPuMC( pu, predBuf, eRefPicList, luma, chroma );
     }
     else if( xCheckIdenticalMotion( pu ) )
     {
       xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
         , false
-        , true, true
+        , luma, chroma
       );
+      if (predBufWOBIO)
+        predBufWOBIO->copyFrom(predBuf, (luma && !chroma), (chroma && !luma));
     }
     else
     {
-      xPredInterBi( pu, predBuf );
+      xPredInterBi(pu, predBuf, luma, chroma, predBufWOBIO);
     }
   }
   return;
@@ -1206,20 +1635,7 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &
 
 int InterPrediction::rightShiftMSB(int numer, int denom)
 {
-  int     d;
-  int msbIdx = 0;
-  for (msbIdx = 0; msbIdx<32; msbIdx++)
-  {
-    if (denom < ((int)1 << msbIdx))
-    {
-      break;
-    }
-  }
-
-  int shiftIdx = msbIdx - 1;
-  d = (numer >> shiftIdx);
-
-  return d;
+  return numer >> floorLog2(denom);
 }
 
 void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
@@ -1259,125 +1675,83 @@ void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitD
 {
   if( channel == CHANNEL_TYPE_LUMA )
   {
-    xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
+    m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
   }
   else if( channel == CHANNEL_TYPE_CHROMA )
   {
-    xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
-    xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
+    m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
+    m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
   }
   else
   {
-    xWeightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
-    xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
-    xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
+    m_if.weightedTriangleBlk( pu, pu.lumaSize().width,   pu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1 );
+    m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
+    m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
   }
 }
 
-void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
-{
-  Pel*    dst        = predDst .get(compIdx).buf;
-  Pel*    src0       = predSrc0.get(compIdx).buf;
-  Pel*    src1       = predSrc1.get(compIdx).buf;
-  int32_t strideDst  = predDst .get(compIdx).stride  - width;
-  int32_t strideSrc0 = predSrc0.get(compIdx).stride  - width;
-  int32_t strideSrc1 = predSrc1.get(compIdx).stride  - width;
-
-  const char    log2WeightBase    = 3;
-  const ClpRng  clipRng           = pu.cu->slice->clpRngs().comp[compIdx];
-  const int32_t clipbd            = clipRng.bd;
-  const int32_t shiftDefault      = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
-  const int32_t offsetDefault     = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
-  const int32_t shiftWeighted     = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
-  const int32_t offsetWeighted    = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
-
-  const int32_t ratioWH           = (width > height) ? (width / height) : 1;
-  const int32_t ratioHW           = (width > height) ? 1 : (height / width);
-  const bool    longWeight        = (compIdx == COMPONENT_Y) || ( predDst.chromaFormat == CHROMA_444 );
-  const int32_t weightedLength    = longWeight ? 7 : 3;
-        int32_t weightedStartPos  = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
-        int32_t weightedEndPos    = weightedStartPos + weightedLength * ratioWH - 1;
-        int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
-
-        Pel     tmpPelWeighted;
-        int32_t weightIdx;
-        int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
-
-  for( y = 0; y < height; y+= ratioHW )
-  {
-    for( tmpY = ratioHW; tmpY > 0; tmpY-- )
-    {
-      for( x = 0; x < weightedStartPos; x++ )
-      {
-        *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
-        src0++;
-        src1++;
-      }
-
-      tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
-      tmpWeightedEnd   = std::min(weightedEndPos, (int32_t)(width - 1));
-      weightIdx        = 1;
-      if( weightedStartPos < 0 )
-      {
-        weightIdx     += abs(weightedStartPos) / ratioWH;
-      }
-      for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
-      {
-        for( tmpX = ratioWH; tmpX > 0; tmpX-- )
-        {
-          tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
-          tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
-          *dst++         = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
-        }
-        weightIdx ++;
-      }
-
-      for( x = weightedEndPos + 1; x < width; x++ )
-      {
-        *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
-        src0++;
-        src1++;
-      }
-
-      dst  += strideDst;
-      src0 += strideSrc0;
-      src1 += strideSrc1;
-    }
-    weightedStartPos += weightedPosoffset;
-    weightedEndPos   += weightedPosoffset;
-  }
-}
 
-void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
+void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma)
 {
   int offset, width, height;
-  int padsize;
   Mv cMv;
-  const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
+  const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic;
   int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
-  for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
+
+  int start = 0;
+  int end = MAX_NUM_COMPONENT;
+
+  start = forLuma ? 0 : 1;
+  end = forLuma ? 1 : MAX_NUM_COMPONENT;
+
+  for (int compID = start; compID < end; compID++)
   {
     cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
-    pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
+    pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
     int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
     width = pcPad.bufs[compID].width;
     height = pcPad.bufs[compID].height;
     offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
-    padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
-    int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat);
     width += (filtersize - 1);
     height += (filtersize - 1);
-    cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
-      -(((filtersize >> 1) - 1) << mvshiftTemp));
-    clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
+    cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTempHor),
+      -(((filtersize >> 1) - 1) << mvshiftTempVer));
+    bool wrapRef = false;
+    if( pu.cs->sps->getWrapAroundEnabledFlag() )
+    {
+      wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps );
+    }
+    else
+    {
+      clipMv( cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    }
     /* Pre-fetch similar to HEVC*/
     {
       CPelBuf refBuf;
-      Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
-      refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()));
+      Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTempHor, cMv.getVer() >> mvshiftTempVer);
+      refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef);
       PelBuf &dstBuf = pcPad.bufs[compID];
       g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
     }
+  }
+}
+void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
+{
+  int offset = 0, width, height;
+  int padsize;
+  Mv cMv;
+  for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
+  {
+    int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
+    width = pcPad.bufs[compID].width;
+    height = pcPad.bufs[compID].height;
+    offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
+    /*using the larger padsize for 422*/
+    padsize = (DMVR_NUM_ITERATION) >> getComponentScaleY((ComponentID)compID, pu.chromaFormat);
+    width += (filtersize - 1);
+    height += (filtersize - 1);
     /*padding on all side of size DMVR_PAD_LENGTH*/
     {
       g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
@@ -1500,6 +1874,7 @@ void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& m
 
 void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied
   , const Mv mergeMV[NUM_REF_PIC_LIST_01]
+  , bool blockMoved
 )
 {
   int offset, deltaIntMvX, deltaIntMvY;
@@ -1514,9 +1889,12 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu
     RefPicList refId = (RefPicList)k;
     Mv cMv = pu.mv[refId];
     m_iRefListIdx = refId;
-    const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
+    const Picture* refPic = pu.cu->slice->getRefPic( refId, pu.refIdx[refId] )->unscaledPic;
     Mv cMvClipped = cMv;
-    clipMv(cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
+    if( !pu.cs->sps->getWrapAroundEnabledFlag() ) 
+    {
+      clipMv( cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    }
 
     Mv startMv = mergeMV[refId];
 
@@ -1529,30 +1907,39 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu
     }
     for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
     {
-      int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
-      int leftPixelExtra;
-      if (compID == COMPONENT_Y)
+      Pel *srcBufPelPtr = NULL;
+      int pcPadstride = 0;
+      if (blockMoved || (compID == 0))
       {
-        leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
-      }
-      else
-      {
-        leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
+        pcPadstride = pcPadTemp.bufs[compID].stride;
+        int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+        int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat);
+        int leftPixelExtra;
+        if (compID == COMPONENT_Y)
+        {
+          leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
+        }
+        else
+        {
+          leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
+        }
+        PelBuf &srcBuf = pcPadTemp.bufs[compID];
+        deltaIntMvX = (cMv.getHor() >> mvshiftTempHor) -
+          (startMv.getHor() >> mvshiftTempHor);
+        deltaIntMvY = (cMv.getVer() >> mvshiftTempVer) -
+          (startMv.getVer() >> mvshiftTempVer);
+
+        CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
+
+        offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
+        offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
+        offset += (deltaIntMvX);
+        srcBufPelPtr = (srcBuf.buf + offset);
       }
-
-      deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
-        (startMv.getHor() >> mvshiftTemp);
-      deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
-        (startMv.getVer() >> mvshiftTemp);
-
-      CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
-
-      offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
-      offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
-      offset += (deltaIntMvX);
-      PelBuf &srcBuf = pcPadTemp.bufs[compID];
-      xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
-        bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
+      JVET_J0090_SET_CACHE_ENABLE(false);
+      xPredInterBlk( (ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
+        bioApplied, false, pu.cu->slice->getScalingRatio( refId, pu.refIdx[refId] ), 0, 0, 0, srcBufPelPtr, pcPadstride );
+      JVET_J0090_SET_CACHE_ENABLE(false);
     }
     pcYUVTemp = pcYuvSrc1;
     pcPadTemp = pcPad1;
@@ -1566,7 +1953,7 @@ uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride,
   cDistParam.useMR = false;
   m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1);
   uint64_t uiCost = cDistParam.distFunc(cDistParam);
-  return uiCost;
+  return uiCost>>1;
 }
 
 void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray)
@@ -1598,8 +1985,11 @@ void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
   Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]);
 
   /*Clip the starting MVs*/
-  clipMv(mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
-  clipMv(mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
+  if( !pu.cs->sps->getWrapAroundEnabledFlag() )
+  {
+    clipMv( mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    clipMv( mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps );
+  }
 
   /*L0 MC for refinement*/
   {
@@ -1610,11 +2000,11 @@ void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
     offset += (-(int)DMVR_NUM_ITERATION);
     PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
     PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
-      (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
+      m_biLinearBufStride
+      , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
 
-    xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx0), mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
-      false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
-    );
+    xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->unscaledPic, mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
+      false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride );
   }
 
   /*L1 MC for refinement*/
@@ -1626,11 +2016,11 @@ void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
     offset += (-(int)DMVR_NUM_ITERATION);
     PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
     PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
-      (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
+      m_biLinearBufStride
+      , pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
 
-    xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, refIdx1), mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
-      false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
-    );
+    xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->unscaledPic, mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
+      false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride );
   }
 }
 
@@ -1647,40 +2037,101 @@ void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, con
 
   int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
   int dx = std::min<int>(pu.lumaSize().width,  DMVR_SUBCU_WIDTH);
-  /*L0 Padding*/
-  m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
-    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
-    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
-      PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
-
-  xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
+  Position puPos = pu.lumaPos();
 
-  /*L1 Padding*/
-  m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
-    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
-    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
-      PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
+  int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
 
-  xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
+  int            bioEnabledThres = 2 * dy * dx;
+  bool           bioAppliedType[MAX_NUM_SUBCU_DMVR];
 
-  xinitMC(pu, clpRngs);
+#if JVET_J0090_MEMORY_BANDWITH_MEASURE
+  JVET_J0090_SET_CACHE_ENABLE(true);
+  for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
+  {
+    RefPicList refId = (RefPicList)k;
+    const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
+    for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
+    {
+      Mv cMv = pu.mv[refId];
+      int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+      int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
+      cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp));
+      bool wrapRef = false;
+      if (pu.cs->sps->getWrapAroundEnabledFlag())
+      {
+        wrapRef = wrapClipMv(cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps, pu.cs->pps);
+      }
+      else
+      {
+        clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps, *pu.cs->pps);
+      }
 
-  // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
-  Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
-  Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
+      int width = pcYuvDst.bufs[compID].width + (filtersize - 1);
+      int height = pcYuvDst.bufs[compID].height + (filtersize - 1);
 
-  Position puPos = pu.lumaPos();
+      CPelBuf refBuf;
+      Position recOffset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
+      refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, recOffset, pu.blocks[compID].size()), wrapRef);
 
-  int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
+      JVET_J0090_SET_REF_PICTURE(refPic, (ComponentID)compID);
+      for (int row = 0; row < height; row++)
+      {
+        for (int col = 0; col < width; col++)
+        {
+          JVET_J0090_CACHE_ACCESS(((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__);
+        }
+      }
+    }
+  }
+  JVET_J0090_SET_CACHE_ENABLE(false);
+#endif
 
   {
     int num = 0;
 
+    int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat);
+    int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat);
+    m_biLinearBufStride = (dx + (2 * DMVR_NUM_ITERATION));
+    // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
+    Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
+    Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
+
+    PredictionUnit subPu = pu;
+    subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
+    m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
+        PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
+    m_cYuvRefBuffDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
+
+    m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
+        PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
+    m_cYuvRefBuffDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
+
+    PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
+    PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
+
+    srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
+    srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
+
     int yStart = 0;
     for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
     {
       for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
       {
+        PredictionUnit subPu = pu;
+        subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
+        xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 1);
+        xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 1);
+
+        xinitMC(subPu, clpRngs);
+
         uint64_t minCost = MAX_UINT64;
         bool notZeroCost = true;
         int16_t totalDeltaMV[2] = { 0,0 };
@@ -1691,19 +2142,17 @@ void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, con
           m_SADsArray[i] = MAX_UINT64;
         }
         pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1];
-
-        Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
-        Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
         for (int i = 0; i < iterationCount; i++)
         {
           deltaMV[0] = 0;
           deltaMV[1] = 0;
-          Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
-          Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
+          Pel *addrL0 = biLinearPredL0 + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
+          Pel *addrL1 = biLinearPredL1 - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
           if (i == 0)
           {
             minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
-            if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/))))
+            minCost -= (minCost >>2);
+            if (minCost < (dx * dy))
             {
               notZeroCost = false;
               break;
@@ -1727,58 +2176,47 @@ void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, con
           pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]);
         }
 
+        bioAppliedType[num] = (minCost < bioEnabledThres) ? false : bioApplied;
         totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
         totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
         xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
 
         pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
+        PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
 
-        num++;
-      }
-    }
-  }
+        bool blockMoved = false;
+        if (pu.mvdL0SubPu[num] != Mv(0, 0))
+        {
+          blockMoved = true;
+          xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 0);
+          xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 0);
+          xPad(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
+          xPad(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
+        }
 
-  {
-    PredictionUnit subPu = pu;
-    subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
-    PelUnitBuf           m_cYuvRefBuffSubCuDMVRL0;
-    PelUnitBuf           m_cYuvRefBuffSubCuDMVRL1;
-    PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
-      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
-      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
-    PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
-      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
-      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
+        int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
+        subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
+        subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
 
-    srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
-    srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
-    PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
+        subPu.mv[0].clipToStorageBitDepth();
+        subPu.mv[1].clipToStorageBitDepth();
 
-    int x = 0, y = 0;
-    int xStart = 0, yStart = 0;
-    int num = 0;
+        xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffDMVRL0, m_cYuvRefBuffDMVRL1, bioAppliedType[num], mergeMv
+          , blockMoved
+        );
 
-    int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
-    for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
-    {
-      for (x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
-      {
-        subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
+        subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
 
-        subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
-        subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
-        m_cYuvRefBuffSubCuDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
-        m_cYuvRefBuffSubCuDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
-        xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bioApplied, mergeMv);
-
-        subPredBuf.bufs[COMPONENT_Y].buf  = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
-        subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cb]);
-        subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cr]);
-        xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioApplied);
+        subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]);
+
+        subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]);
+
+        xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioAppliedType[num]);
         num++;
       }
     }
   }
+  JVET_J0090_SET_CACHE_ENABLE(true);
 }
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
 void InterPrediction::cacheAssign( CacheModel *cache )
@@ -1789,4 +2227,241 @@ void InterPrediction::cacheAssign( CacheModel *cache )
 }
 #endif
 
-//! \}
+void InterPrediction::xFillIBCBuffer(CodingUnit &cu)
+{
+  for (auto &currPU : CU::traverseTUs(cu))
+  {
+    for (const CompArea &area : currPU.blocks)
+    {
+      if (!area.valid())
+        continue;
+
+      const unsigned int lcuWidth = cu.cs->slice->getSPS()->getMaxCUWidth();
+      const int shiftSampleHor = ::getComponentScaleX(area.compID, cu.chromaFormat);
+      const int shiftSampleVer = ::getComponentScaleY(area.compID, cu.chromaFormat);
+      const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer;
+      const int pux = area.x & ((m_IBCBufferWidth >> shiftSampleHor) - 1);
+      const int puy = area.y & (( 1 << ctuSizeLog2Ver ) - 1);
+      const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height));
+      CPelBuf srcBuf = cu.cs->getRecoBuf(area);
+      PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea);
+
+      dstBuf.copyFrom(srcBuf);
+    }
+  }
+}
+
+void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID)
+{
+  const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
+  const int shiftSampleHor = ::getComponentScaleX(compID, pu.chromaFormat);
+  const int shiftSampleVer = ::getComponentScaleY(compID, pu.chromaFormat);
+  const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer;
+  pu.bv = pu.mv[REF_PIC_LIST_0];
+  pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
+  int refx, refy;
+  if (compID == COMPONENT_Y)
+  {
+    refx = pu.Y().x + pu.bv.hor;
+    refy = pu.Y().y + pu.bv.ver;
+  }
+  else
+  {//Cb or Cr
+    refx = pu.Cb().x + (pu.bv.hor >> shiftSampleHor);
+    refy = pu.Cb().y + (pu.bv.ver >> shiftSampleVer);
+  }
+  refx &= ((m_IBCBufferWidth >> shiftSampleHor) - 1);
+  refy &= ((1 << ctuSizeLog2Ver) - 1);
+
+  if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSampleHor))
+  {
+    const CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height));
+    const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea);
+    predBuf.bufs[compID].copyFrom(refBuf);
+  }
+  else
+  {//wrap around
+    int width = (m_IBCBufferWidth >> shiftSampleHor) - refx;
+    CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height));
+    CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea);
+    PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
+    dstBuf.copyFrom(srcBuf);
+
+    width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSampleHor);
+    srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height));
+    srcBuf = m_IBCBuffer.getBuf(srcArea);
+    dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
+    dstBuf.copyFrom(srcBuf);
+  }
+}
+
+void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize)
+{
+  const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize));
+  m_IBCBuffer.getBuf(area).fill(-1);
+}
+
+void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos)
+{
+  const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize));
+  m_IBCBuffer.getBuf(area).fill(-1);
+}
+
+bool InterPrediction::isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv)
+{
+  if(((yCb + yBv) & (ctuSize - 1)) + height > ctuSize)
+  {
+    return false;
+  }
+  int refTLx = xCb + xBv;
+  int refTLy = (yCb + yBv) & (ctuSize - 1);
+  PelBuf buf = m_IBCBuffer.Y();
+  for(int x = 0; x < width; x += 4)
+  {
+    for(int y = 0; y < height; y += 4)
+    {
+      if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
+      if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
+      if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
+      if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
+    }
+  }
+  return true;
+}
+
+bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const CompArea &blk, const Picture* refPic, const Mv& mv, Pel* dst, const int dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf )
+{
+  const ChromaFormat  chFmt = blk.chromaFormat;
+  const ComponentID compID = blk.compID;
+  const bool          rndRes = !bi;
+
+  int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX( compID, chFmt );
+  int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY( compID, chFmt );
+
+  int width = blk.width;
+  int height = blk.height;
+  CPelBuf refBuf;
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  const bool scaled = refPic->isRefScaled( &pps );
+#else
+  const bool scaled = scalingRatio != SCALE_1X;
+#endif
+
+  if( scaled )
+  {
+    int row, col;
+    int refPicWidth = refPic->getPicWidthInLumaSamples();
+    int refPicHeight = refPic->getPicHeightInLumaSamples();
+
+    int xFilter = filterIndex;
+    int yFilter = filterIndex;
+    const int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4; 
+    const int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4;
+    if( filterIndex == 0 )
+    {
+      if( scalingRatio.first > rprThreshold2 )
+      {
+        xFilter = 4;
+      }
+      else if( scalingRatio.first > rprThreshold1 )
+      {
+        xFilter = 3;
+      }
+
+      if( scalingRatio.second > rprThreshold2 )
+      {
+        yFilter = 4;
+      }
+      else if( scalingRatio.second > rprThreshold1 )
+      {
+        yFilter = 3;
+      }
+    }
+
+    const int posShift = SCALE_RATIO_BITS - 4;
+    int stepX = ( scalingRatio.first + 8 ) >> 4;
+    int stepY = ( scalingRatio.second + 8 ) >> 4;
+    int64_t x0Int;
+    int64_t y0Int;
+    int offX = 1 << ( posShift - shiftHor - 1 );
+    int offY = 1 << ( posShift - shiftVer - 1 );
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    const int64_t posX = ( ( blk.pos().x << ::getComponentScaleX( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) ) >> ::getComponentScaleX( compID, chFmt );
+    const int64_t posY = ( ( blk.pos().y << ::getComponentScaleY( compID, chFmt ) ) - ( pps.getScalingWindow().getWindowTopOffset()  * SPS::getWinUnitY( chFmt ) ) ) >> ::getComponentScaleY( compID, chFmt );
+#else
+    const int64_t posX = ( ( blk.pos().x << ::getComponentScaleX( compID, chFmt ) ) - pps.getScalingWindow().getWindowLeftOffset() ) >> ::getComponentScaleX( compID, chFmt );
+    const int64_t posY = ( ( blk.pos().y << ::getComponentScaleY( compID, chFmt ) ) - pps.getScalingWindow().getWindowTopOffset() ) >> ::getComponentScaleY( compID, chFmt );
+#endif
+
+    int addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first - SCALE_1X.first );
+    int addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second );
+
+    x0Int = ( ( posX << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() ) * (int64_t)scalingRatio.first + addX;
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - ::getComponentScaleX( compID, chFmt ) ) ) );
+#else
+    x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ) + ( refPic->getScalingWindow().getWindowLeftOffset() << ( ( posShift - ::getComponentScaleX( compID, chFmt ) ) ) );
+#endif
+
+    y0Int = ( ( posY << ( 4 + ::getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() ) * (int64_t)scalingRatio.second + addY;
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) << ( ( posShift - ::getComponentScaleY( compID, chFmt ) ) ) );
+#else
+    y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) ) + ( refPic->getScalingWindow().getWindowTopOffset() << ( ( posShift - ::getComponentScaleY( compID, chFmt ) ) ) );
+#endif
+
+    const int extSize = isLuma( compID ) ? 1 : 2;
+    int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA;
+
+    int yInt0 = ( (int32_t)y0Int + offY ) >> posShift;
+    yInt0 = std::min( std::max( -(NTAPS_LUMA / 2), yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
+
+    int xInt0 = ( (int32_t)x0Int + offX ) >> posShift;
+    xInt0 = std::min( std::max( -(NTAPS_LUMA / 2), xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
+        
+    int refHeight = ((((int32_t)y0Int + (height-1) * stepY) + offY ) >> posShift) - ((((int32_t)y0Int + 0 * stepY) + offY ) >> posShift) + 1;
+    refHeight = std::max<int>( 1, refHeight );
+
+    CHECK( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 < refHeight + vFilterSize - 1 + extSize, "Buffer is not large enough, increase MAX_SCALING_RATIO" );
+
+    Pel buffer[( MAX_CU_SIZE + 16 ) * ( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 )];
+    int tmpStride = width;
+    int xInt = 0, yInt = 0;
+
+    for( col = 0; col < width; col++ )
+    {
+      int posX = (int32_t)x0Int + col * stepX;
+      xInt = ( posX + offX ) >> posShift;
+      xInt = std::min( std::max( -(NTAPS_LUMA / 2), xInt ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
+      int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 );
+
+      CHECK( xInt0 > xInt, "Wrong horizontal starting point" );
+
+      Position offset = Position( xInt, yInt0 );
+      refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, refHeight ) ), wrapRef );
+      Pel* tempBuf = buffer + col;
+
+      m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, xFilter, false, useAltHpelIf );
+    }
+
+    for( row = 0; row < height; row++ )
+    {
+      int posY = (int32_t)y0Int + row * stepY;
+      yInt = ( posY + offY ) >> posShift;
+      yInt = std::min( std::max( -(NTAPS_LUMA / 2), yInt ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
+      int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 );
+
+      CHECK( yInt0 > yInt, "Wrong vertical starting point" );
+
+      Pel* tempBuf = buffer + ( yInt - yInt0 ) * tmpStride;
+
+      JVET_J0090_SET_CACHE_ENABLE( false );
+      m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, yFilter, false, useAltHpelIf );
+      JVET_J0090_SET_CACHE_ENABLE( true );
+    }
+  }
+
+  return scaled;
+}
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index 56b45faa2e72a52c99c526c191a492c71ad7687a..1bc16ff8d7b567029f5eaff532713e6f850cb822 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -63,17 +63,8 @@ class Mv;
 class InterPrediction : public WeightPrediction
 {
 private:
-  int m_shareState;
 
-  Distortion  m_bioDistThres;
-  Distortion  m_bioSubBlkDistThres;
-  Distortion  m_bioPredSubBlkDist[MAX_NUM_PARTS_IN_CTU];
 
-  int m_dotProduct1[BIO_TEMP_BUFFER_SIZE];
-  int m_dotProduct2[BIO_TEMP_BUFFER_SIZE];
-  int m_dotProduct3[BIO_TEMP_BUFFER_SIZE];
-  int m_dotProduct5[BIO_TEMP_BUFFER_SIZE];
-  int m_dotProduct6[BIO_TEMP_BUFFER_SIZE];
 
 protected:
   InterpolationFilter  m_if;
@@ -108,23 +99,32 @@ protected:
                              Mv(-2, 2), Mv(-1, 2), Mv(0, 2), Mv(1, 2), Mv(2, 2) };
   uint64_t m_SADsArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)];
 
+  Pel                  m_gradBuf[2][(AFFINE_MIN_BLOCK_SIZE + 2) * (AFFINE_MIN_BLOCK_SIZE + 2)];
+  int                  m_dMvBuf[2][16 * 2];
+  bool                 m_skipPROF;
+  bool                 m_encOnly;
+  bool                 m_isBi;
+
   Pel*                 m_gradX0;
   Pel*                 m_gradY0;
   Pel*                 m_gradX1;
   Pel*                 m_gradY1;
   bool                 m_subPuMC;
 
+  int                  m_IBCBufferWidth;
+  PelStorage           m_IBCBuffer;
+  void xIntraBlockCopy          (PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID);
   int             rightShiftMSB(int numer, int    denom);
   void            applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths);
-  bool            xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* yuvSrc0, const int src0Stride, const Pel* yuvSrc1, const int src1Stride, const BitDepths &clipBitDepths);
   void xPredInterUni            ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi
                                   , const bool& bioApplied
                                   , const bool luma, const bool chroma
   );
-  void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
+  void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred, const bool luma = true, const bool chroma = true, PelUnitBuf* yuvPredTmp = NULL );
   void xPredInterBlk            ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
                                  , const bool& bioApplied
                                  , bool isIBC
+                                 , const std::pair<int, int> scalingRatio = SCALE_1X
                                  , SizeType dmvrWidth = 0
                                  , SizeType dmvrHeight = 0
                                  , bool bilinearMC = false
@@ -136,31 +136,33 @@ protected:
   void xBioGradFilter           (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth);
   void xCalcBIOPar              (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth);
   void xCalcBlkGradient         (int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
-  void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied );
-  void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng );
-
+  void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, const bool lumaOnly = false, const bool chromaOnly = false, PelUnitBuf* yuvDstTmp = NULL );
+  void xPredAffineBlk           ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X );
   void xWeightedTriangleBlk     ( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 );
 
   static bool xCheckIdenticalMotion( const PredictionUnit& pu );
 
-  void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X);
+  void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, const bool luma = true, const bool chroma = true);
+  void xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, PelUnitBuf* yuvDstTmp = NULL);
   void destroy();
 
 
   MotionInfo      m_SubPuMiBuf[(MAX_CU_SIZE * MAX_CU_SIZE) >> (MIN_CU_LOG2 << 1)];
-  void xChromaMC(PredictionUnit &pu, PelUnitBuf& pcYuvPred);
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   CacheModel      *m_cacheModel;
 #endif
+  PelStorage       m_colorTransResiBuf[3];  // 0-org; 1-act; 2-tmp
+
 public:
   InterPrediction();
   virtual ~InterPrediction();
 
-  void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC);
+  void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize);
 
   // inter
   void    motionCompensation  (PredictionUnit &pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X
     , const bool luma = true, const bool chroma = true
+    , PelUnitBuf* predBufWOBIO = NULL
   );
   void    motionCompensation  (PredictionUnit &pu, const RefPicList &eRefPicList = REF_PIC_LIST_X
     , const bool luma = true, const bool chroma = true
@@ -171,9 +173,11 @@ public:
 
   void    motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 );
   void    weightedTriangleBlk        ( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 );
-  void xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId);
+  void xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma);
+  void xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId);
   void xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied
     , const Mv startMV[NUM_REF_PIC_LIST_01]
+    , bool blockMoved
   );
   void xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height);
   uint64_t xDMVRCost(int bitDepth, Pel* pRef, uint32_t refStride, const Pel* pOrg, uint32_t orgStride, int width, int height);
@@ -183,10 +187,13 @@ public:
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void    cacheAssign( CacheModel *cache );
 #endif
-  void    setShareState(int shareStateIn) {m_shareState = shareStateIn;}
-#if ENABLE_SPLIT_PARALLELISM
-  int     getShareState() const { return m_shareState; }
-#endif
+  static bool isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType );
+  void xFillIBCBuffer(CodingUnit &cu);
+  void resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize);
+  void resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos);
+  bool isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv);
+
+  bool xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const CompArea &blk, const Picture* refPic, const Mv& mv, Pel* dst, const int dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf = false );
 };
 
 //! \}
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index 01d7c284636a06f205eb5f3c938e7661b6ee8197..5d02b6da2a1f5e7717645b6259936994b7497775 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,25 @@ CacheModel* InterpolationFilter::m_cacheModel;
 // ====================================================================================================================
 // Tables
 // ====================================================================================================================
+const TFilterCoeff InterpolationFilter::m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
+{
+  {  0, 0,   0, 64,  0,   0,  0,  0 },
+  {  0, 1,  -3, 63,  4,  -2,  1,  0 },
+  {  0, 1,  -5, 62,  8,  -3,  1,  0 },
+  {  0, 2,  -8, 60, 13,  -4,  1,  0 },
+  {  0, 3, -10, 58, 17,  -5,  1,  0 }, //1/4
+  {  0, 3, -11, 52, 26,  -8,  2,  0 },
+  {  0, 2,  -9, 47, 31, -10,  3,  0 },
+  {  0, 3, -11, 45, 34, -10,  3,  0 },
+  {  0, 3, -11, 40, 40, -11,  3,  0 }, //1/2
+  {  0, 3, -10, 34, 45, -11,  3,  0 },
+  {  0, 3, -10, 31, 47,  -9,  2,  0 },
+  {  0, 2,  -8, 26, 52, -11,  3,  0 },
+  {  0, 1,  -5, 17, 58, -10,  3,  0 }, //3/4
+  {  0, 1,  -4, 13, 60,  -8,  2,  0 },
+  {  0, 1,  -3,  8, 62,  -5,  1,  0 },
+  {  0, 1,  -2,  4, 63,  -3,  1,  0 }
+};
 
 const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
 {
@@ -75,6 +94,49 @@ const TFilterCoeff InterpolationFilter::m_lumaFilter[LUMA_INTERPOLATION_FILTER_S
   {  0, 1,  -2,  4, 63,  -3,  1,  0 }
 };
 
+// 1.5x
+const TFilterCoeff InterpolationFilter::m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
+{
+  { -1, -5, 17, 42, 17, -5, -1,  0 },
+  {  0, -5, 15, 41, 19, -5, -1,  0 },
+  {  0, -5, 13, 40, 21, -4, -1,  0 },
+  {  0, -5, 11, 39, 24, -4, -2,  1 },
+  {  0, -5,  9, 38, 26, -3, -2,  1 },
+  {  0, -5,  7, 38, 28, -2, -3,  1 },
+  {  1, -5,  5, 36, 30, -1, -3,  1 },
+  {  1, -4,  3, 35, 32,  0, -4,  1 },
+  {  1, -4,  2, 33, 33,  2, -4,  1 },
+  {  1, -4,  0, 32, 35,  3, -4,  1 },
+  {  1, -3, -1, 30, 36,  5, -5,  1 },
+  {  1, -3, -2, 28, 38,  7, -5,  0 },
+  {  1, -2, -3, 26, 38,  9, -5,  0 },
+  {  1, -2, -4, 24, 39, 11, -5,  0 },
+  {  0, -1, -4, 21, 40, 13, -5,  0 },
+  {  0, -1, -5, 19, 41, 15, -5,  0 }
+};
+
+// 2x
+const TFilterCoeff InterpolationFilter::m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA] =
+{
+  { -4,  2, 20, 28, 20,  2, -4,  0 },
+  { -4,  0, 19, 29, 21,  5, -4, -2 },
+  { -4, -1, 18, 29, 22,  6, -4, -2 },
+  { -4, -1, 16, 29, 23,  7, -4, -2 },
+  { -4, -1, 16, 28, 24,  7, -4, -2 },
+  { -4, -1, 14, 28, 25,  8, -4, -2 },
+  { -3, -3, 14, 27, 26,  9, -3, -3 },
+  { -3, -1, 12, 28, 25, 10, -4, -3 },
+  { -3, -3, 11, 27, 27, 11, -3, -3 },
+  { -3, -4, 10, 25, 28, 12, -1, -3 },
+  { -3, -3,  9, 26, 27, 14, -3, -3 },
+  { -2, -4,  8, 25, 28, 14, -1, -4 },
+  { -2, -4,  7, 24, 28, 16, -1, -4 },
+  { -2, -4,  7, 23, 29, 16, -1, -4 },
+  { -2, -4,  6, 22, 29, 18, -1, -4 },
+  { -2, -4,  5, 21, 29, 19,  0, -4 }
+};
+
+const TFilterCoeff InterpolationFilter::m_lumaAltHpelIFilter[NTAPS_LUMA] = {  0, 3, 9, 20, 20, 9, 3, 0 };
 const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] =
 {
   {  0, 64,  0,  0 },
@@ -111,6 +173,80 @@ const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILT
   {  0,  2, 63, -1 },
 };
 
+//1.5x
+const TFilterCoeff InterpolationFilter::m_chromaFilterRPR1[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] =
+{
+  { 12, 40, 12,  0 },
+  { 11, 40, 13,  0 },
+  { 10, 40, 15, -1 },
+  {  9, 40, 16, -1 },
+  {  8, 40, 17, -1 },
+  {  8, 39, 18, -1 },
+  {  7, 39, 19, -1 },
+  {  6, 38, 21, -1 },
+  {  5, 38, 22, -1 },
+  {  4, 38, 23, -1 },
+  {  4, 37, 24, -1 },
+  {  3, 36, 25,  0 },
+  {  3, 35, 26,  0 },
+  {  2, 34, 28,  0 },
+  {  2, 33, 29,  0 },
+  {  1, 33, 30,  0 },
+  {  1, 31, 31,  1 },
+  {  0, 30, 33,  1 },
+  {  0, 29, 33,  2 },
+  {  0, 28, 34,  2 },
+  {  0, 26, 35,  3 },
+  {  0, 25, 36,  3 },
+  { -1, 24, 37,  4 },
+  { -1, 23, 38,  4 },
+  { -1, 22, 38,  5 },
+  { -1, 21, 38,  6 },
+  { -1, 19, 39,  7 },
+  { -1, 18, 39,  8 },
+  { -1, 17, 40,  8 },
+  { -1, 16, 40,  9 },
+  { -1, 15, 40, 10 },
+  {  0, 13, 40, 11 },
+};
+
+//2x
+const TFilterCoeff InterpolationFilter::m_chromaFilterRPR2[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA] =
+{
+  { 17, 30, 17,  0 },
+  { 17, 30, 18, -1 },
+  { 16, 30, 18,  0 },
+  { 16, 30, 18,  0 },
+  { 15, 30, 18,  1 },
+  { 14, 30, 18,  2 },
+  { 13, 29, 19,  3 },
+  { 13, 29, 19,  3 },
+  { 12, 29, 20,  3 },
+  { 11, 28, 21,  4 },
+  { 10, 28, 22,  4 },
+  { 10, 27, 22,  5 },
+  {  9, 27, 23,  5 },
+  {  9, 26, 24,  5 },
+  {  8, 26, 24,  6 },
+  {  7, 26, 25,  6 },
+  {  7, 25, 25,  7 },
+  {  6, 25, 26,  7 },
+  {  6, 24, 26,  8 },
+  {  5, 24, 26,  9 },
+  {  5, 23, 27,  9 },
+  {  5, 22, 27, 10 },
+  {  4, 22, 28, 10 },
+  {  4, 21, 28, 11 },
+  {  3, 20, 29, 12 },
+  {  3, 19, 29, 13 },
+  {  3, 19, 29, 13 },
+  {  2, 18, 30, 14 },
+  {  1, 18, 30, 15 },
+  {  0, 18, 30, 16 },
+  {  0, 18, 30, 16 },
+  { -1, 18, 30, 17 }
+};
+
 const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] =
 {
   { 64,  0, },
@@ -191,6 +327,7 @@ InterpolationFilter::InterpolationFilter()
   m_filterCopy[1][0]   = filterCopy<true, false>;
   m_filterCopy[1][1]   = filterCopy<true, true>;
 
+  m_weightedTriangleBlk = xWeightedTriangleBlk;
 }
 
 
@@ -226,11 +363,7 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
     {
       for (col = 0; col < width; col++)
       {
-#if HM_JEM_CLIP_PEL
         dst[col] = src[col];
-#else
-        dst[col] = ClipPel( src[col], clpRng );
-#endif
         JVET_J0090_CACHE_ACCESS( &src[col], __FILE__, __LINE__ );
       }
 
@@ -559,29 +692,61 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr
  * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
-void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR)
+void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf)
 {
-  if( frac == 0 )
+  if( frac == 0 && nFilterIdx < 2 )
   {
-    m_filterCopy[true][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR);
+    m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR );
   }
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
     if( nFilterIdx == 1 )
     {
-      filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR);
+      filterHor<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR );
+    }
+    else if( nFilterIdx == 2 )
+    {
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
+    }
+    else if( nFilterIdx == 3 )
+    {
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR1[frac], biMCForDMVR );
+    }
+    else if( nFilterIdx == 4 )
+    {
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilterRPR2[frac], biMCForDMVR );
+    }
+    else if( frac == 8 && useAltHpelIf )
+    {
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaAltHpelIFilter, biMCForDMVR );
+    }
+    else if( ( width == 4 && height == 4 ) || ( width == 4 && height == ( 4 + NTAPS_LUMA - 1 ) ) )
+    {
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
     }
     else
     {
-      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR);
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMCForDMVR );
+
     }
   }
   else
   {
     const uint32_t csx = getComponentScaleX( compID, fmt );
     CHECK( frac < 0 || csx >= 2 || ( frac << ( 1 - csx ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
-    filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR);
+    if( nFilterIdx == 3 )
+    {
+      filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac << ( 1 - csx )], biMCForDMVR );
+    }
+    else if( nFilterIdx == 4 )
+    {
+      filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac << ( 1 - csx )], biMCForDMVR );
+    }
+    else
+    {
+      filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR );
+    }
   }
 }
 
@@ -602,32 +767,159 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i
  * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
-void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR)
+void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf)
 {
-  if( frac == 0 )
+  if( frac == 0 && nFilterIdx < 2 )
   {
-    m_filterCopy[isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR);
+    m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR );
   }
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
-    if (nFilterIdx == 1)
+    if( nFilterIdx == 1 )
+    {
+      filterVer<NTAPS_BILINEAR>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR );
+    }
+    else if( nFilterIdx == 2 )
+    {
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
+    }
+    else if( nFilterIdx == 3 )
     {
-      filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMCForDMVR);
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR1[frac], biMCForDMVR );
+    }
+    else if( nFilterIdx == 4 )
+    {
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilterRPR2[frac], biMCForDMVR );
+    }
+    else if( frac == 8 && useAltHpelIf )
+    {
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaAltHpelIFilter, biMCForDMVR );
+    }
+    else if( width == 4 && height == 4 )
+    {
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter4x4[frac], biMCForDMVR );
     }
     else
     {
-      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR);
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMCForDMVR );
     }
   }
   else
   {
     const uint32_t csy = getComponentScaleY( compID, fmt );
     CHECK( frac < 0 || csy >= 2 || ( frac << ( 1 - csy ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
-    filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << (1 - csy)], biMCForDMVR);
+    if( nFilterIdx == 3 )
+    {
+      filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR1[frac << ( 1 - csy )], biMCForDMVR );
+    }
+    else if( nFilterIdx == 4 )
+    {
+      filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR2[frac << ( 1 - csy )], biMCForDMVR );
+    }
+    else
+    {
+      filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )], biMCForDMVR );
+    }
   }
 }
 
+void InterpolationFilter::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
+{
+  Pel*    dst        = predDst .get(compIdx).buf;
+  Pel*    src0       = predSrc0.get(compIdx).buf;
+  Pel*    src1       = predSrc1.get(compIdx).buf;
+  int32_t strideDst  = predDst .get(compIdx).stride  - width;
+  int32_t strideSrc0 = predSrc0.get(compIdx).stride  - width;
+  int32_t strideSrc1 = predSrc1.get(compIdx).stride  - width;
+
+  const char    log2WeightBase    = 3;
+  const ClpRng  clipRng           = pu.cu->slice->clpRngs().comp[compIdx];
+  const int32_t clipbd            = clipRng.bd;
+  const int32_t shiftDefault      = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
+  const int32_t offsetDefault     = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
+  const int32_t shiftWeighted     = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
+  const int32_t offsetWeighted    = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
+  int32_t stepX = 1 << getComponentScaleX(compIdx, pu.chromaFormat);
+  int32_t stepY = 1 << getComponentScaleY(compIdx, pu.chromaFormat);
+
+  int32_t widthY = width << getComponentScaleX(compIdx, pu.chromaFormat);
+  int32_t heightY = height << getComponentScaleY(compIdx, pu.chromaFormat);
+
+  int32_t ratioWH = (widthY > heightY) ? (widthY / heightY) : 1;
+  int32_t ratioHW = (widthY > heightY) ? 1 : (heightY / widthY);
+
+  int32_t weightedLength = 7;
+  int32_t weightedStartPos = (splitDir == 0) ? (0 - (weightedLength >> 1) * ratioWH) : (widthY - ((weightedLength + 1) >> 1) * ratioWH);
+        int32_t weightedEndPos    = weightedStartPos + weightedLength * ratioWH - 1;
+        int32_t weightedPosoffset = ( splitDir == 0 ) ? ratioWH : -ratioWH;
+
+        Pel     tmpPelWeighted;
+        int32_t weightIdx;
+        int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
+  for (y = 0; y < heightY; y += ratioHW)
+  {
+    if (y % stepY != 0)
+    {
+      weightedStartPos += weightedPosoffset;
+      weightedEndPos += weightedPosoffset;
+      continue;
+    }
+    for (tmpY = ratioHW; tmpY > 0; tmpY -= stepY)
+    {
+      for (x = 0; x < weightedStartPos; x += stepX)
+      {
+        *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
+        src0++;
+        src1++;
+      }
+
+      tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
+      tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(widthY - 1));
+      weightIdx        = 1;
+      if( weightedStartPos < 0 )
+      {
+        weightIdx     += abs(weightedStartPos) / ratioWH;
+      }
+      for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
+      {
+        if (x % stepX != 0)
+        {
+          weightIdx++;
+          continue;
+        }
+
+        for (tmpX = ratioWH; tmpX > 0; tmpX -= stepX)
+        {
+          tmpPelWeighted = Clip3(1, 7, weightIdx);
+          tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
+          *dst++         = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
+        }
+        weightIdx ++;
+      }
+
+      int32_t start = ((weightedEndPos + 1) % stepX != 0) ? (weightedEndPos + 2) : (weightedEndPos + 1);
+      for (x = start; x < widthY; x += stepX)
+      {
+        *dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
+        src0++;
+        src1++;
+      }
+
+      dst  += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+    }
+    weightedStartPos += weightedPosoffset;
+    weightedEndPos   += weightedPosoffset;
+  }
+}
+
+void InterpolationFilter::weightedTriangleBlk(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
+{
+  m_weightedTriangleBlk(pu, width, height, compIdx, splitDir, predDst, predSrc0, predSrc1);
+}
+
 /**
  * \brief turn on SIMD fuc
  *
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index 21f698e816e67198ffb6b671cda8bb869fdbb1e3..3c8422cc537fab474a4a4fb061b29c23320e1753 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,8 +55,16 @@
  */
 class InterpolationFilter
 {
+  static const TFilterCoeff m_lumaFilter4x4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA];
+public:
   static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps
   static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps
+  static const TFilterCoeff m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 1.5x
+  static const TFilterCoeff m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 2x
+  static const TFilterCoeff m_chromaFilterRPR1[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps 1.5x
+  static const TFilterCoeff m_chromaFilterRPR2[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps 2x
+private:
+  static const TFilterCoeff m_lumaAltHpelIFilter[NTAPS_LUMA]; ///< Luma filter taps
   static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
   static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
 public:
@@ -71,6 +79,8 @@ public:
   template<int N>
   void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMCForDMVR);
 
+  static void xWeightedTriangleBlk(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1);
+  void weightedTriangleBlk(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1);
 protected:
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   static CacheModel* m_cacheModel;
@@ -81,6 +91,7 @@ public:
   void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR);
   void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR);
   void( *m_filterCopy[2][2] )  ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMCForDMVR);
+  void( *m_weightedTriangleBlk )(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1);
 
   void initInterpolationFilter( bool enable );
 #ifdef TARGET_SIMD_X86
@@ -88,8 +99,8 @@ public:
   template <X86_VEXT vext>
   void _initInterpolationFilterX86();
 #endif
-  void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false);
-  void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false);
+  void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, bool useAltHpelIf = false);
+  void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, bool useAltHpelIf = false);
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
 #endif
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index 980b4b546e39b102ac13d102ad8f371057ce6199..07d2b0a0a5ad67b7f7a6586ee5db6334fb0c538b 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,64 +55,18 @@
 // Tables
 // ====================================================================================================================
 
-const uint8_t IntraPrediction::m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS] =
+const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] =
 {
-  { // Luma
-    20, //   1xn
-    20, //   2xn
-    20, //   4xn
-    14, //   8xn
-    2,  //  16xn
-    0,  //  32xn
-    0,  //  64xn
-    0,  // 128xn
-  },
-  { // Chroma
-    40, //   1xn
-    40, //   2xn
-    40, //   4xn
-    28, //   8xn
-    4,  //  16xn
-    0,  //  32xn
-    0,  //  64xn
-    0,  // 128xn
-  }
+  24, //   1xn
+  24, //   2xn
+  24, //   4xn
+  14, //   8xn
+  2,  //  16xn
+  0,  //  32xn
+  0,  //  64xn
+  0   // 128xn
 };
 
-const TFilterCoeff g_intraGaussFilter[32][4] = {
-  { 16, 32, 16, 0 },
-  { 15, 29, 17, 3 },
-  { 15, 29, 17, 3 },
-  { 14, 29, 18, 3 },
-  { 13, 29, 18, 4 },
-  { 13, 28, 19, 4 },
-  { 13, 28, 19, 4 },
-  { 12, 28, 20, 4 },
-  { 11, 28, 20, 5 },
-  { 11, 27, 21, 5 },
-  { 10, 27, 22, 5 },
-  { 9, 27, 22, 6 },
-  { 9, 26, 23, 6 },
-  { 9, 26, 23, 6 },
-  { 8, 25, 24, 7 },
-  { 8, 25, 24, 7 },
-  { 8, 24, 24, 8 },
-  { 7, 24, 25, 8 },
-  { 7, 24, 25, 8 },
-  { 6, 23, 26, 9 },
-  { 6, 23, 26, 9 },
-  { 6, 22, 27, 9 },
-  { 5, 22, 27, 10 },
-  { 5, 21, 27, 11 },
-  { 5, 20, 28, 11 },
-  { 4, 20, 28, 12 },
-  { 4, 19, 28, 13 },
-  { 4, 19, 28, 13 },
-  { 4, 18, 29, 13 },
-  { 3, 18, 29, 14 },
-  { 3, 17, 29, 15 },
-  { 3, 17, 29, 15 }
-};
 
 // ====================================================================================================================
 // Constructor / destructor / initialize
@@ -122,13 +76,6 @@ IntraPrediction::IntraPrediction()
 :
   m_currChromaFormat( NUM_CHROMA_FORMAT )
 {
-  for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
-  {
-    for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++)
-    {
-      m_piYuvExt[ch][buf] = nullptr;
-    }
-  }
   for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
   {
     for (uint32_t buf = 0; buf < 4; buf++)
@@ -148,14 +95,6 @@ IntraPrediction::~IntraPrediction()
 
 void IntraPrediction::destroy()
 {
-  for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
-  {
-    for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++)
-    {
-      delete[] m_piYuvExt[ch][buf];
-      m_piYuvExt[ch][buf] = nullptr;
-    }
-  }
   for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
   {
     for (uint32_t buf = 0; buf < 4; buf++)
@@ -173,11 +112,6 @@ void IntraPrediction::destroy()
 
 void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
 {
-  // if it has been initialised before, but the chroma format has changed, release the memory and start again.
-  if (m_piYuvExt[COMPONENT_Y][PRED_BUF_UNFILTERED] != nullptr && m_currChromaFormat != chromaFormatIDC)
-  {
-    destroy();
-  }
 
   if (m_yuvExt2[COMPONENT_Y][0] != nullptr && m_currChromaFormat != chromaFormatIDC)
   {
@@ -186,18 +120,6 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth
 
   m_currChromaFormat = chromaFormatIDC;
 
-  if (m_piYuvExt[COMPONENT_Y][PRED_BUF_UNFILTERED] == nullptr) // check if first is null (in which case, nothing initialised yet)
-  {
-    m_iYuvExtSize = (MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX * 33) * (MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX * 33);
-
-    for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
-    {
-      for (uint32_t buf = 0; buf < NUM_PRED_BUF; buf++)
-      {
-        m_piYuvExt[ch][buf] = new Pel[m_iYuvExtSize];
-      }
-    }
-  }
 
   if (m_yuvExt2[COMPONENT_Y][0] == nullptr) // check if first is null (in which case, nothing initialised yet)
   {
@@ -237,21 +159,21 @@ Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize )
   const int width  = dstSize.width;
   const int height = dstSize.height;
   const auto denom     = (width == height) ? (width << 1) : std::max(width,height);
-  const auto divShift  = g_aucLog2[denom];
+  const auto divShift  = floorLog2(denom);
   const auto divOffset = (denom >> 1);
 
   if ( width >= height )
   {
     for( idx = 0; idx < width; idx++ )
     {
-      sum += pSrc.at( 1 + idx, 0 );
+      sum += pSrc.at(m_ipaParam.multiRefIndex + 1 + idx, 0);
     }
   }
   if ( width <= height )
   {
     for( idx = 0; idx < height; idx++ )
     {
-      sum += pSrc.at( 0, 1 + idx );
+      sum += pSrc.at(m_ipaParam.multiRefIndex + 1 + idx, 1);
     }
   }
 
@@ -259,141 +181,85 @@ Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize )
   return dcVal;
 }
 
-  int IntraPrediction::getWideAngle( int width, int height, int predMode )
+int IntraPrediction::getWideAngle( int width, int height, int predMode )
+{
+  if ( predMode > DC_IDX && predMode <= VDIA_IDX )
   {
-    if ( predMode > DC_IDX && predMode <= VDIA_IDX )
+    int modeShift[] = { 0, 6, 10, 12, 14, 15 };
+    int deltaSize = abs(floorLog2(width) - floorLog2(height));
+    if (width > height && predMode < 2 + modeShift[deltaSize])
     {
-      int modeShift[] = { 0, 6, 10, 12, 14, 15 };
-      int deltaSize = abs(g_aucLog2[width] - g_aucLog2[height]);
-      if (width > height && predMode < 2 + modeShift[deltaSize])
-      {
-        predMode += (VDIA_IDX - 1);
-      }
-      else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
-      {
-        predMode -= (VDIA_IDX - 1);
-      }
+      predMode += (VDIA_IDX - 1);
+    }
+    else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
+    {
+      predMode -= (VDIA_IDX - 1);
     }
-    return predMode;
   }
+  return predMode;
+}
 
-  void IntraPrediction::setReferenceArrayLengths( const CompArea &area )
-  {
-    // set Top and Left reference samples length
-    const int  width    = area.width;
-    const int  height   = area.height;
+void IntraPrediction::setReferenceArrayLengths( const CompArea &area )
+{
+  // set Top and Left reference samples length
+  const int  width    = area.width;
+  const int  height   = area.height;
 
-    m_leftRefLength     = (height << 1);
-    m_topRefLength      = (width << 1);
+  m_leftRefLength     = (height << 1);
+  m_topRefLength      = (width << 1);
 
-  }
+}
 
-void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu, const bool useFilteredPredSamples )
+void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu)
 {
   const ComponentID    compID       = MAP_CHROMA( compId );
   const ChannelType    channelType  = toChannelType( compID );
   const int            iWidth       = piPred.width;
   const int            iHeight      = piPred.height;
-  const Size           cuSize       = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height );
-  const uint32_t           uiDirMode    = PU::getFinalIntraMode( pu, channelType );
-
+  CHECK(iWidth == 2, "Width of 2 is not supported");
+  const uint32_t       uiDirMode    = isLuma( compId ) && pu.cu->bdpcmMode ? BDPCM_IDX : !isLuma(compId) && pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, channelType);
 
-  CHECK( g_aucLog2[iWidth] < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" );
-  CHECK( g_aucLog2[iWidth] > 7, "Size not allowed" );
+  CHECK( floorLog2(iWidth) < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" );
+  CHECK( floorLog2(iWidth) > 7, "Size not allowed" );
 
-  const int  multiRefIdx = (compID == COMPONENT_Y) ? pu.multiRefIdx : 0;
-  const bool useISP = pu.cu->ispMode && isLuma( compID );
-  const int whRatio = useISP ? std::max( unsigned( 1 ), cuSize.width / cuSize.height ) : std::max( 1, iWidth / iHeight );
-  const int hwRatio = useISP ? std::max( unsigned( 1 ), cuSize.height / cuSize.width ) : std::max( 1, iHeight / iWidth );
-  const int  srcStride  = m_topRefLength  + 1 + (whRatio + 1) * multiRefIdx;
-  const int  srcHStride = m_leftRefLength + 1 + (hwRatio + 1) * multiRefIdx;
+  const int srcStride  = m_refBufferStride[compID];
+  const int srcHStride = 2;
 
-  Pel *ptrSrc = getPredictorPtr(compID, useFilteredPredSamples);
+  const CPelBuf & srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride);
   const ClpRng& clpRng(pu.cu->cs->slice->clpRng(compID));
 
   switch (uiDirMode)
   {
-    case(PLANAR_IDX): xPredIntraPlanar(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, *pu.cs->sps); break;
-    case(DC_IDX):     xPredIntraDc(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, false); break;
-    case(2):
-    case(DIA_IDX):
-    case(VDIA_IDX):
-      if (getWideAngle(useISP ? cuSize.width : iWidth, useISP ? cuSize.height : iHeight, uiDirMode) == static_cast<int>(uiDirMode)) // check if uiDirMode is not wide-angle
-      {
-        xPredIntraAng(CPelBuf(ptrSrc, srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, multiRefIdx, useFilteredPredSamples, useISP, cuSize );
-        break;
-      }
-    default:          xPredIntraAng(CPelBuf(getPredictorPtr(compID, false), srcStride, srcHStride), piPred, channelType, uiDirMode, clpRng, *pu.cs->sps, multiRefIdx, useFilteredPredSamples, useISP, cuSize); break;
+    case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred); break;
+    case(DC_IDX):     xPredIntraDc(srcBuf, piPred, channelType, false); break;
+    case(BDPCM_IDX):  xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? pu.cu->bdpcmMode : pu.cu->bdpcmModeChroma, clpRng); break;
+    default:          xPredIntraAng(srcBuf, piPred, channelType, clpRng); break;
   }
 
-  bool pdpcCondition = (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX || uiDirMode == HOR_IDX || uiDirMode == VER_IDX);
-  if( pdpcCondition && multiRefIdx == 0 && !useISP )
+  if (m_ipaParam.applyPDPC)
   {
-    const CPelBuf srcBuf = CPelBuf(ptrSrc, srcStride, srcStride);
     PelBuf dstBuf = piPred;
-    const int scale = ((g_aucLog2[iWidth] - 2 + g_aucLog2[iHeight] - 2 + 2) >> 2);
+    const int scale = ((floorLog2(iWidth) - 2 + floorLog2(iHeight) - 2 + 2) >> 2);
     CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
 
-    if (uiDirMode == PLANAR_IDX)
-    {
-      for (int y = 0; y < iHeight; y++)
-      {
-        int wT = 32 >> std::min(31, ((y << 1) >> scale));
-        const Pel left = srcBuf.at(0, y + 1);
-        for (int x = 0; x < iWidth; x++)
-        {
-          const Pel top = srcBuf.at(x + 1, 0);
-          int wL = 32 >> std::min(31, ((x << 1) >> scale));
-          dstBuf.at(x, y) = ClipPel((wL * left + wT * top + (64 - wL - wT) * dstBuf.at(x, y) + 32) >> 6, clpRng);
-        }
-      }
-    }
-    else if (uiDirMode == DC_IDX)
+    if (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX)
     {
-      const Pel topLeft = srcBuf.at(0, 0);
       for (int y = 0; y < iHeight; y++)
       {
-        int wT = 32 >> std::min(31, ((y << 1) >> scale));
-        const Pel left = srcBuf.at(0, y + 1);
+        const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
+        const Pel left = srcBuf.at(y + 1, 1);
         for (int x = 0; x < iWidth; x++)
         {
-          const Pel top = srcBuf.at(x + 1, 0);
-          int wL = 32 >> std::min(31, ((x << 1) >> scale));
-          int wTL = (wL >> 4) + (wT >> 4);
-          dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
-        }
-      }
-    }
-    else if (uiDirMode == HOR_IDX)
-    {
-      const Pel topLeft = srcBuf.at(0, 0);
-      for (int y = 0; y < iHeight; y++)
-      {
-        int wT = 32 >> std::min(31, ((y << 1) >> scale));
-        for (int x = 0; x < iWidth; x++)
-        {
-          const Pel top = srcBuf.at(x + 1, 0);
-          int wTL = wT;
-          dstBuf.at(x, y) = ClipPel((wT * top - wTL * topLeft + (64 - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
-        }
-      }
-    }
-    else if (uiDirMode == VER_IDX)
-    {
-      const Pel topLeft = srcBuf.at(0, 0);
-      for (int y = 0; y < iHeight; y++)
-      {
-        const Pel left = srcBuf.at(0, y + 1);
-        for (int x = 0; x < iWidth; x++)
-        {
-          int wL = 32 >> std::min(31, ((x << 1) >> scale));
-          int wTL = wL;
-          dstBuf.at(x, y) = ClipPel((wL * left - wTL * topLeft + (64 - wL + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
+          const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
+          const Pel top   = srcBuf.at(x + 1, 0);
+          const Pel val   = dstBuf.at(x, y);
+          dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
         }
       }
     }
   }
 }
+
 void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred, const PredictionUnit &pu, const CompArea& chromaArea, int intraDir)
 {
   int  iLumaStride = 0;
@@ -405,8 +271,8 @@ void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred
   }
   else
   {
-  iLumaStride = MAX_CU_SIZE + 1;
-  Temp = PelBuf(m_piTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
+    iLumaStride = MAX_CU_SIZE + 1;
+    Temp = PelBuf(m_piTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
   }
   int a, b, iShift;
   xGetLMParameters(pu, compID, chromaArea, a, b, iShift);
@@ -416,30 +282,17 @@ void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf &piPred
   piPred.linearTransform(a, iShift, b, true, pu.cs->slice->clpRng(compID));
 }
 
-void IntraPrediction::xFilterGroup(Pel* pMulDst[], int i, Pel const * const piSrc, int iRecStride, bool bAboveAvaillable, bool bLeftAvaillable)
-{
-  pMulDst[0][i] = (piSrc[1] + piSrc[iRecStride + 1] + 1) >> 1;
-
-  pMulDst[1][i] = (piSrc[iRecStride] + piSrc[iRecStride + 1] + 1) >> 1;
-
-  pMulDst[3][i] = (piSrc[0] + piSrc[1] + 1) >> 1;
-
-  pMulDst[2][i] = (piSrc[0] + piSrc[1] + piSrc[iRecStride] + piSrc[iRecStride + 1] + 2) >> 2;
-
-}
-
-
-
 /** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
  */
 
 //NOTE: Bit-Limit - 24-bit source
-void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps )
+void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst )
 {
   const uint32_t width  = pDst.width;
   const uint32_t height = pDst.height;
-  const uint32_t log2W  = g_aucLog2[width  < 2 ? 2 : width];
-  const uint32_t log2H  = g_aucLog2[height < 2 ? 2 : height];
+
+  const uint32_t log2W = floorLog2( width );
+  const uint32_t log2H = floorLog2( height );
 
   int leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
   const uint32_t offset = 1 << (log2W + log2H);
@@ -452,7 +305,7 @@ void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const
 
   for( int k = 0; k < height + 1; k++ )
   {
-    leftColumn[k] = pSrc.at( 0, k + 1 );
+    leftColumn[k] = pSrc.at(k + 1, 1);
   }
 
   // Prepare intermediate variables used in interpolation
@@ -488,55 +341,103 @@ void IntraPrediction::xPredIntraPlanar( const CPelBuf &pSrc, PelBuf &pDst, const
     }
   }
 }
-
-
-
-
 void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter )
 {
   const Pel dcval = xGetPredValDc( pSrc, pDst );
   pDst.fill( dcval );
-
-#if HEVC_USE_DC_PREDFILTERING
-  if( enableBoundaryFilter )
-  {
-    xDCPredFiltering( pSrc, pDst, channelType );
-  }
-#endif
 }
 
-#if HEVC_USE_DC_PREDFILTERING
-/** Function for filtering intra DC predictor. This function performs filtering left and top edges of the prediction samples for DC mode (intra coding).
- */
-void IntraPrediction::xDCPredFiltering(const CPelBuf &pSrc, PelBuf &pDst, const ChannelType &channelType)
+// Function for initialization of intra prediction parameters
+void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompArea area, const SPS& sps)
 {
-  uint32_t iWidth = pDst.width;
-  uint32_t iHeight = pDst.height;
-  int x, y;
+  const ComponentID compId = area.compID;
+  const ChannelType chType = toChannelType(compId);
+
+  const bool        useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( chType );
+
+  const Size   cuSize    = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height );
+  const Size   puSize    = Size( area.width, area.height );
+  const Size&  blockSize = useISP ? cuSize : puSize;
+  const int      dirMode = PU::getFinalIntraMode(pu, chType);
+  const int     predMode = getWideAngle( blockSize.width, blockSize.height, dirMode );
+
+  m_ipaParam.isModeVer            = predMode >= DIA_IDX;
+  m_ipaParam.multiRefIndex        = isLuma (chType) ? pu.multiRefIdx : 0 ;
+  m_ipaParam.refFilterFlag        = false;
+  m_ipaParam.interpolationFlag    = false;
+  m_ipaParam.applyPDPC            = ((puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) || !isLuma(compId)) && m_ipaParam.multiRefIndex == 0;
 
-  if (isLuma(channelType) && (iWidth <= MAXIMUM_INTRA_FILTERED_WIDTH) && (iHeight <= MAXIMUM_INTRA_FILTERED_HEIGHT))
+  const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
+
+
+  int absAng = 0;
+  if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes
   {
-    //top-left
-    pDst.at(0, 0) = (Pel)((pSrc.at(1, 0) + pSrc.at(0, 1) + 2 * pDst.at(0, 0) + 2) >> 2);
+    static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
+    static const int invAngTable[32] = {
+      0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
+      512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
+    };   // (512 * 32) / Angle
+
+    const int     absAngMode         = abs(intraPredAngleMode);
+    const int     signAng            = intraPredAngleMode < 0 ? -1 : 1;
+                  absAng             = angTable  [absAngMode];
 
-    //top row (vertical filter)
-    for ( x = 1; x < iWidth; x++ )
+    m_ipaParam.invAngle              = invAngTable[absAngMode];
+    m_ipaParam.intraPredAngle        = signAng * absAng;
+    if (intraPredAngleMode < 0)
     {
-      pDst.at(x, 0) = (Pel)((pSrc.at(x + 1, 0)  +  3 * pDst.at(x, 0) + 2) >> 2);
+      m_ipaParam.applyPDPC = false;
     }
-
-    //left column (horizontal filter)
-    for ( y = 1; y < iHeight; y++ )
+    else if (intraPredAngleMode > 0)
     {
-      pDst.at(0, y) = (Pel)((pSrc.at(0, y + 1) + 3 * pDst.at(0, y) + 2) >> 2);
+      const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width;
+      const int maxScale = 2;
+
+      m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.invAngle - 2) - 8));
+      m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0;
     }
   }
 
-  return;
+  // high level conditions and DC intra prediction
+  if(   sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag()
+    || !isLuma( chType )
+    || useISP
+    || PU::isMIP( pu, chType )
+    || m_ipaParam.multiRefIndex
+    || DC_IDX == dirMode
+    )
+  {
+  }
+  else if ((isLuma(chType) && pu.cu->bdpcmMode) || (!isLuma(chType) && pu.cu->bdpcmModeChroma)) // BDPCM
+  {
+    m_ipaParam.refFilterFlag = false;
+  }
+  else if (dirMode == PLANAR_IDX) // Planar intra prediction
+  {
+    m_ipaParam.refFilterFlag = puSize.width * puSize.height > 32 ? true : false;
+  }
+  else if (!useISP)// HOR, VER and angular modes (MDIS)
+  {
+    bool filterFlag = false;
+    {
+      const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
+      const int log2Size = ((floorLog2(puSize.width) + floorLog2(puSize.height)) >> 1);
+      CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
+      filterFlag = (diff > m_aucIntraFilter[log2Size]);
+    }
+
+    // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter
+    if (filterFlag)
+    {
+      const bool isRefFilter       =  isIntegerSlope(absAng);
+      CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" );
+      m_ipaParam.refFilterFlag     =  isRefFilter;
+      m_ipaParam.interpolationFlag = !isRefFilter;
+    }
+  }
 }
-#endif
 
-// Function for deriving the angular Intra predictions
 
 /** Function for deriving the simplified angular intra predictions.
 *
@@ -549,39 +450,16 @@ void IntraPrediction::xDCPredFiltering(const CPelBuf &pSrc, PelBuf &pDst, const
 * from the extended main reference.
 */
 //NOTE: Bit-Limit - 25-bit source
-#if HEVC_USE_HOR_VER_PREDFILTERING
-void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const bool bEnableEdgeFilters, const SPS& sps
-  , int multiRefIdx
-  , const bool enableBoundaryFilter )
-#else
-void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps,
-                                           int      multiRefIdx,
-                                     const bool     useFilteredPredSamples ,
-                                     const bool     useISP,
-                                     const Size     cuSize )
-#endif
+
+void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng)
 {
   int width =int(pDst.width);
   int height=int(pDst.height);
 
-  CHECK( !( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ), "Invalid intra dir" );
-  int              predMode           = useISP ? getWideAngle( cuSize.width, cuSize.height, dirMode ) : getWideAngle( width, height, dirMode );
-  const bool       bIsModeVer         = predMode >= DIA_IDX;
-  const int        intraPredAngleMode = (bIsModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
-  const int        absAngMode         = abs(intraPredAngleMode);
-  const int        signAng            = intraPredAngleMode < 0 ? -1 : 1;
-#if HEVC_USE_HOR_VER_PREDFILTERING
-  const bool       edgeFilter         = bEnableEdgeFilters && isLuma(channelType) && (width <= MAXIMUM_INTRA_FILTERED_WIDTH) && (height <= MAXIMUM_INTRA_FILTERED_HEIGHT);
-#endif
-
-  // Set bitshifts and scale the angle parameter to block size
-
-  static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
-  static const int invAngTable[32] = { 0, 8192, 4096, 2731, 2048, 1365,  1024,  819,  683,  585,  512,  455,  410,  356,  315,  282,  256,  234,  210, 182, 160, 144, 128, 112,  95,  80,  64,  48,  32,  24,  16,    8 }; // (256 * 32) / Angle
-
-  int invAngle                    = invAngTable[absAngMode];
-  int absAng                      = angTable   [absAngMode];
-  int intraPredAngle              = signAng * absAng;
+  const bool bIsModeVer     = m_ipaParam.isModeVer;
+  const int  multiRefIdx    = m_ipaParam.multiRefIndex;
+  const int  intraPredAngle = m_ipaParam.intraPredAngle;
+  const int  invAngle       = m_ipaParam.invAngle;
 
   Pel* refMain;
   Pel* refSide;
@@ -589,132 +467,134 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
   Pel  refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
   Pel  refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
 
-  const int whRatio = useISP ? std::max( unsigned( 1 ), cuSize.width / cuSize.height ) : std::max( 1, width / height );
-  const int hwRatio = useISP ? std::max( unsigned( 1 ), cuSize.height / cuSize.width ) : std::max( 1, height / width );
-
   // Initialize the Main and Left reference array.
   if (intraPredAngle < 0)
   {
-    auto width    = int(pDst.width) +1;
-    auto height   = int(pDst.height)+1;
-    auto lastIdx  = (bIsModeVer ? width : height) + multiRefIdx;
-    auto firstIdx = ( ((bIsModeVer ? height : width) -1) * intraPredAngle ) >> 5;
-    for (int x = 0; x < width + 1 + multiRefIdx; x++)
+    for (int x = 0; x <= width + 1 + multiRefIdx; x++)
     {
-      refAbove[x + height - 1] = pSrc.at( x, 0 );
+      refAbove[x + height] = pSrc.at(x, 0);
     }
-    for (int y = 0; y < height + 1 + multiRefIdx; y++)
+    for (int y = 0; y <= height + 1 + multiRefIdx; y++)
     {
-      refLeft[y + width - 1] = pSrc.at( 0, y );
+      refLeft[y + width] = pSrc.at(y, 1);
     }
-    refMain = (bIsModeVer ? refAbove + height : refLeft  + width ) - 1;
-    refSide = (bIsModeVer ? refLeft  + width  : refAbove + height) - 1;
+    refMain = bIsModeVer ? refAbove + height : refLeft + width;
+    refSide = bIsModeVer ? refLeft + width : refAbove + height;
 
     // Extend the Main reference to the left.
-    int invAngleSum    = 128;       // rounding for (shift by 8)
-    for( int k = -1; k > firstIdx; k-- )
+    int sizeSide = bIsModeVer ? height : width;
+    for (int k = -sizeSide; k <= -1; k++)
     {
-      invAngleSum += invAngle;
-      refMain[k] = refSide[invAngleSum>>8];
+      refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)];
     }
-    refMain[lastIdx] = refMain[lastIdx-1];
-    refMain[firstIdx] = refMain[firstIdx+1];
   }
   else
   {
-    for (int x = 0; x < m_topRefLength + 1 + (whRatio + 1) * multiRefIdx; x++)
+    for (int x = 0; x <= m_topRefLength + multiRefIdx; x++)
     {
-      refAbove[x+1] = pSrc.at(x, 0);
+      refAbove[x] = pSrc.at(x, 0);
     }
-    for (int y = 0; y < m_leftRefLength + 1 + (hwRatio + 1) * multiRefIdx; y++)
+    for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++)
     {
-      refLeft[y+1]  = pSrc.at(0, y);
+      refLeft[y] = pSrc.at(y, 1);
     }
-    refMain = bIsModeVer ? refAbove : refLeft ;
-    refSide = bIsModeVer ? refLeft  : refAbove;
 
-    refMain++;
-    refSide++;
-    refMain[-1] = refMain[0];
-    auto lastIdx = 1 + ((bIsModeVer) ? m_topRefLength + (whRatio + 1) * multiRefIdx : m_leftRefLength +  (hwRatio + 1) * multiRefIdx);
-    refMain[lastIdx] = refMain[lastIdx-1];
+    refMain = bIsModeVer ? refAbove : refLeft;
+    refSide = bIsModeVer ? refLeft : refAbove;
+
+    // Extend main reference to right using replication
+    const int log2Ratio = floorLog2(width) - floorLog2(height);
+    const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
+    const int maxIndex  = (multiRefIdx << s) + 2;
+    const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
+    const Pel val       = refMain[refLength + multiRefIdx];
+    for (int z = 1; z <= maxIndex; z++)
+    {
+      refMain[refLength + multiRefIdx + z] = val;
+    }
   }
 
   // swap width/height if we are doing a horizontal mode:
-  Pel tempArray[MAX_CU_SIZE*MAX_CU_SIZE];
-  const int dstStride = bIsModeVer ? pDst.stride : MAX_CU_SIZE;
-  Pel *pDstBuf = bIsModeVer ? pDst.buf : tempArray;
   if (!bIsModeVer)
   {
     std::swap(width, height);
   }
+  Pel       tempArray[MAX_CU_SIZE * MAX_CU_SIZE];
+  const int dstStride = bIsModeVer ? pDst.stride : width;
+  Pel *     pDstBuf   = bIsModeVer ? pDst.buf : tempArray;
 
   // compensate for line offset in reference line buffers
   refMain += multiRefIdx;
   refSide += multiRefIdx;
 
+  Pel *pDsty = pDstBuf;
+
   if( intraPredAngle == 0 )  // pure vertical or pure horizontal
   {
     for( int y = 0; y < height; y++ )
     {
       for( int x = 0; x < width; x++ )
       {
-        pDstBuf[y*dstStride + x] = refMain[x + 1];
+        pDsty[x] = refMain[x + 1];
       }
-    }
-#if HEVC_USE_HOR_VER_PREDFILTERING
-    if (edgeFilter && multiRefIdx == 0)
-    {
-      for( int y = 0; y < height; y++ )
+
+      if (m_ipaParam.applyPDPC)
       {
-        pDstBuf[y*dstStride] = ClipPel( pDstBuf[y*dstStride] + ( ( refSide[y + 1] - refSide[0] ) >> 1 ), clpRng );
+        const int scale   = (floorLog2(width) + floorLog2(height) - 2) >> 2;
+        const Pel topLeft = refMain[0];
+        const Pel left    = refSide[1 + y];
+        for (int x = 0; x < std::min(3 << scale, width); x++)
+        {
+          const int wL  = 32 >> (2 * x >> scale);
+          const Pel val = pDsty[x];
+          pDsty[x]      = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng);
+        }
       }
+
+      pDsty += dstStride;
     }
-#endif
   }
   else
   {
-    Pel *pDsty=pDstBuf;
     for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
     {
       const int deltaInt   = deltaPos >> 5;
-      const int deltaFract = deltaPos & (32 - 1);
+      const int deltaFract = deltaPos & 31;
 
-      if (absAng != 0 && absAng != 32)
+      if ( !isIntegerSlope( abs(intraPredAngle) ) )
       {
         if( isLuma(channelType) )
         {
-          Pel                        p[4];
-          const bool                 useCubicFilter = useISP ? ( width <= 8 ) : ( !useFilteredPredSamples || multiRefIdx > 0 );
-          TFilterCoeff const * const f              = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : g_intraGaussFilter[deltaFract];
+          const bool useCubicFilter = !m_ipaParam.interpolationFlag;
 
-          int         refMainIndex   = deltaInt + 1;
+          const TFilterCoeff        intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)};
+          const TFilterCoeff* const f                       = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter;
 
-          for( int x = 0; x < width; x++, refMainIndex++ )
+          for (int x = 0; x < width; x++)
           {
-            p[0] = refMain[refMainIndex - 1];
-            p[1] = refMain[refMainIndex];
-            p[2] = refMain[refMainIndex + 1];
-            p[3] = f[3] != 0 ? refMain[refMainIndex + 2] : 0;
+            Pel p[4];
 
-            pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
+            p[0] = refMain[deltaInt + x];
+            p[1] = refMain[deltaInt + x + 1];
+            p[2] = refMain[deltaInt + x + 2];
+            p[3] = refMain[deltaInt + x + 3];
 
-            if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
-            {
-              pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
-            }
+            Pel val = (f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32) >> 6;
+
+            pDsty[x] = ClipPel(val, clpRng);   // always clip even though not always needed
           }
         }
         else
         {
           // Do linear filtering
-          const Pel *pRM = refMain + deltaInt + 1;
-          int lastRefMainPel = *pRM++;
-          for( int x = 0; x < width; pRM++, x++ )
+          for (int x = 0; x < width; x++)
           {
-            int thisRefMainPel = *pRM;
-            pDsty[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
-            lastRefMainPel = thisRefMainPel;
+            Pel p[2];
+
+            p[0] = refMain[deltaInt + x + 1];
+            p[1] = refMain[deltaInt + x + 2];
+
+            pDsty[x] = p[0] + ((deltaFract * (p[1] - p[0]) + 16) >> 5);
           }
         }
       }
@@ -726,61 +606,21 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
           pDsty[x] = refMain[x + deltaInt + 1];
         }
       }
-      const int numModes = 8;
-      const int scale = ((g_aucLog2[width] - 2 + g_aucLog2[height] - 2 + 2) >> 2);
-      CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
-      if( !useISP )
+      if (m_ipaParam.applyPDPC)
       {
-      if ((predMode == 2 || predMode == VDIA_IDX) && multiRefIdx == 0)
-      {
-        int wT = 16 >> std::min(31, ((y << 1) >> scale));
-
-        for (int x = 0; x < width; x++)
-        {
-          int wL = 16 >> std::min(31, ((x << 1) >> scale));
-          if (wT + wL == 0) break;
+        const int scale       = m_ipaParam.angularScale;
+        int       invAngleSum = 256;
 
-          int c = x + y + 1;
-          if (c >= 2 * height) { wL = 0; }
-          if (c >= 2 * width)  { wT = 0; }
-          const Pel left = (wL != 0) ? refSide[c + 1] : 0;
-          const Pel top  = (wT != 0) ? refMain[c + 1] : 0;
-
-          pDsty[x] = ClipPel((wL * left + wT * top + (64 - wL - wT) * pDsty[x] + 32) >> 6, clpRng);
-        }
-      }
-      else if (((predMode >= VDIA_IDX - numModes && predMode != VDIA_IDX) || (predMode != 2 && predMode <= (2 + numModes))) && multiRefIdx == 0)
-      {
-        int invAngleSum0 = 2;
-        for (int x = 0; x < width; x++)
+        for (int x = 0; x < std::min(3 << scale, width); x++)
         {
-          invAngleSum0 += invAngle;
-          int deltaPos0 = invAngleSum0 >> 2;
-          int deltaFrac0 = deltaPos0 & 63;
-          int deltaInt0 = deltaPos0 >> 6;
-
-          int deltay = y + deltaInt0 + 1;
-          if (deltay >(bIsModeVer ? m_leftRefLength : m_topRefLength) - 1) break;
-
-          int wL = 32 >> std::min(31, ((x << 1) >> scale));
-          if (wL == 0) break;
-          Pel *p = refSide + deltay;
+          invAngleSum += invAngle;
 
-          Pel left = p[deltaFrac0 >> 5];
-          pDsty[x] = ClipPel((wL * left + (64 - wL) * pDsty[x] + 32) >> 6, clpRng);
+          int wL   = 32 >> (2 * x >> scale);
+          Pel left = refSide[y + (invAngleSum >> 9) + 1];
+          pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
         }
       }
-      }
-    }
-#if HEVC_USE_HOR_VER_PREDFILTERING
-    if( edgeFilter && absAng <= 1 )
-    {
-      for( int y = 0; y < height; y++ )
-      {
-        pDstBuf[y*dstStride] = ClipPel( pDstBuf[y*dstStride] + ((refSide[y + 1] - refSide[0]) >> 2), clpRng );
-      }
     }
-#endif
   }
 
   // Flip the block if this is the horizontal mode
@@ -797,91 +637,81 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
   }
 }
 
-
-bool IntraPrediction::useDPCMForFirstPassIntraEstimation(const PredictionUnit &pu, const uint32_t &uiDirMode)
+void IntraPrediction::xPredIntraBDPCM(const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng )
 {
-  return CU::isRDPCMEnabled(*pu.cu) && pu.cu->transQuantBypass && (uiDirMode == HOR_IDX || uiDirMode == VER_IDX);
+  const int wdt = pDst.width;
+  const int hgt = pDst.height;
+
+  const int strideP = pDst.stride;
+  const int strideS = pSrc.stride;
+
+  CHECK( !( dirMode == 1 || dirMode == 2 ), "Incorrect BDPCM mode parameter." );
+
+  Pel* pred = &pDst.buf[0];
+  if( dirMode == 1 )
+  {
+    Pel  val;
+    for( int y = 0; y < hgt; y++ )
+    {
+      val = pSrc.buf[(y + 1) + strideS];
+      for( int x = 0; x < wdt; x++ )
+      {
+        pred[x] = val;
+      }
+      pred += strideP;
+    }
+  }
+  else
+  {
+    for( int y = 0; y < hgt; y++ )
+    {
+      for( int x = 0; x < wdt; x++ )
+      {
+        pred[x] = pSrc.buf[x + 1];
+      }
+      pred += strideP;
+    }
+  }
 }
 
 void IntraPrediction::geneWeightedPred(const ComponentID compId, PelBuf &pred, const PredictionUnit &pu, Pel *srcBuf)
 {
   const int            width = pred.width;
+  CHECK(width == 2, "Width of 2 is not supported");
   const int            height = pred.height;
   const int            srcStride = width;
   const int            dstStride = pred.stride;
 
-  const uint32_t       dirMode = PU::getFinalIntraMode(pu, toChannelType(compId));
-  const ClpRng&        clpRng(pu.cu->cs->slice->clpRng(compId));
   Pel*                 dstBuf = pred.buf;
-  int                  k, l;
+  int wIntra, wMerge;
 
-  bool                 modeDC = (dirMode <= DC_IDX);
-  Pel                  wIntra1 = 6, wInter1 = 2, wIntra2 = 5, wInter2 = 3, wIntra3 = 3, wInter3 = 5, wIntra4 = 2, wInter4 = 6;
+  const Position posBL = pu.Y().bottomLeft();
+  const Position posTR = pu.Y().topRight();
+  const PredictionUnit *neigh0 = pu.cs->getPURestricted(posBL.offset(-1, 0), pu, CHANNEL_TYPE_LUMA);
+  const PredictionUnit *neigh1 = pu.cs->getPURestricted(posTR.offset(0, -1), pu, CHANNEL_TYPE_LUMA);
+  bool isNeigh0Intra = neigh0 && (CU::isIntra(*neigh0->cu));
+  bool isNeigh1Intra = neigh1 && (CU::isIntra(*neigh1->cu));
 
-  if (modeDC || width < 4 || height < 4)
+  if (isNeigh0Intra && isNeigh1Intra)
   {
-    for (k = 0; k<height; k++)
-    {
-      for (l = 0; l<width; l++)
-      {
-        dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * 4) + (srcBuf[k*srcStride + l] * 4)) >> 3), clpRng);
-      }
-    }
+    wIntra = 3; wMerge = 1;
   }
   else
   {
-    if (dirMode <= DIA_IDX)
+    if (!isNeigh0Intra && !isNeigh1Intra)
     {
-      int interval = (width >> 2);
-
-      for (k = 0; k<height; k++)
-      {
-        for (l = 0; l<width; l++)
-        {
-          if (l<interval)
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter1) + (srcBuf[k*srcStride + l] * wIntra1)) >> 3), clpRng);
-          }
-          else if (l >= interval && l < (2 * interval))
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter2) + (srcBuf[k*srcStride + l] * wIntra2)) >> 3), clpRng);
-          }
-          else if (l >= (interval * 2) && l < (3 * interval))
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter3) + (srcBuf[k*srcStride + l] * wIntra3)) >> 3), clpRng);
-          }
-          else
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter4) + (srcBuf[k*srcStride + l] * wIntra4)) >> 3), clpRng);
-          }
-        }
-      }
+      wIntra = 1; wMerge = 3;
     }
     else
     {
-      int interval = (height >> 2);
-      for (k = 0; k<height; k++)
-      {
-        for (l = 0; l<width; l++)
-        {
-          if (k<interval)
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter1) + (srcBuf[k*srcStride + l] * wIntra1)) >> 3), clpRng);
-          }
-          else if (k >= interval && k < (2 * interval))
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter2) + (srcBuf[k*srcStride + l] * wIntra2)) >> 3), clpRng);
-          }
-          else if (k >= (interval * 2) && k < (3 * interval))
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter3) + (srcBuf[k*srcStride + l] * wIntra3)) >> 3), clpRng);
-          }
-          else
-          {
-            dstBuf[k*dstStride + l] = ClipPel((((dstBuf[k*dstStride + l] * wInter4) + (srcBuf[k*srcStride + l] * wIntra4)) >> 3), clpRng);
-          }
-        }
-      }
+      wIntra = 2; wMerge = 2;
+    }
+  }
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      dstBuf[y*dstStride + x] = (wMerge * dstBuf[y*dstStride + x] + wIntra * srcBuf[y*srcStride + x] + 2) >> 2;
     }
   }
 }
@@ -900,25 +730,27 @@ void IntraPrediction::switchBuffer(const PredictionUnit &pu, ComponentID compID,
 
 void IntraPrediction::geneIntrainterPred(const CodingUnit &cu)
 {
-  if (!cu.firstPU->mhIntraFlag)
+  if (!cu.firstPU->ciipFlag)
   {
     return;
   }
 
   const PredictionUnit* pu = cu.firstPU;
 
-  bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, *pu, true, *pu);
-  initIntraPatternChType(cu, pu->Y(), isUseFilter);
-  predIntraAng(COMPONENT_Y, cu.cs->getPredBuf(*pu).Y(), *pu, isUseFilter);
-  isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cb, *pu, true, *pu);
-  initIntraPatternChType(cu, pu->Cb(), isUseFilter);
-  predIntraAng(COMPONENT_Cb, cu.cs->getPredBuf(*pu).Cb(), *pu, isUseFilter);
-  isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cr, *pu, true, *pu);
-  initIntraPatternChType(cu, pu->Cr(), isUseFilter);
-  predIntraAng(COMPONENT_Cr, cu.cs->getPredBuf(*pu).Cr(), *pu, isUseFilter);
+  initIntraPatternChType(cu, pu->Y());
+  predIntraAng(COMPONENT_Y, cu.cs->getPredBuf(*pu).Y(), *pu);
+  if (pu->chromaSize().width > 2)
+  {
+    initIntraPatternChType(cu, pu->Cb());
+    predIntraAng(COMPONENT_Cb, cu.cs->getPredBuf(*pu).Cb(), *pu);
 
+    initIntraPatternChType(cu, pu->Cr());
+    predIntraAng(COMPONENT_Cr, cu.cs->getPredBuf(*pu).Cr(), *pu);
+  }
   for (int currCompID = 0; currCompID < 3; currCompID++)
   {
+    if (pu->chromaSize().width <= 2 && currCompID > 0)
+      continue;
     ComponentID currCompID2 = (ComponentID)currCompID;
     PelBuf tmpBuf = currCompID == 0 ? cu.cs->getPredBuf(*pu).Y() : (currCompID == 1 ? cu.cs->getPredBuf(*pu).Cb() : cu.cs->getPredBuf(*pu).Cr());
     switchBuffer(*pu, currCompID2, tmpBuf, getPredictorPtr2(currCompID2, 0));
@@ -931,26 +763,149 @@ inline int  isLeftAvailable       ( const CodingUnit &cu, const ChannelType &chT
 inline int  isAboveRightAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags );
 inline int  isBelowLeftAvailable  ( const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags );
 
-void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool bFilterRefSamples)
+void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag)
 {
+  CHECK(area.width == 2, "Width of 2 is not supported");
   const CodingStructure& cs   = *cu.cs;
 
-  Pel *refBufUnfiltered   = m_piYuvExt[area.compID][PRED_BUF_UNFILTERED];
-  Pel *refBufFiltered     = m_piYuvExt[area.compID][PRED_BUF_FILTERED];
+  if (!forceRefFilterFlag)
+  {
+    initPredIntraParams(*cu.firstPU, area, *cs.sps);
+  }
+
+  Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
+  Pel *refBufFiltered   = m_refBuffer[area.compID][PRED_BUF_FILTERED];
 
-  setReferenceArrayLengths( cu.ispMode && isLuma( area.compID ) ? cu.blocks[area.compID] : area );
+  setReferenceArrayLengths( area );
 
   // ----- Step 1: unfiltered reference samples -----
   xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu );
   // ----- Step 2: filtered reference samples -----
-  if( bFilterRefSamples )
+  if( m_ipaParam.refFilterFlag || forceRefFilterFlag )
+  {
+    xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.firstPU->multiRefIdx );
+  }
+}
+
+void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf, const bool forceRefFilterFlag)
+{
+  const CodingStructure& cs = *cu.cs;
+
+  if (!forceRefFilterFlag)
+  {
+    initPredIntraParams(*cu.firstPU, area, *cs.sps);
+  }
+
+  const Position posLT = area;
+  bool           isLeftAvail  = (cs.getCURestricted(posLT.offset(-1, 0), cu, CHANNEL_TYPE_LUMA) != NULL) && cs.isDecomp(posLT.offset(-1, 0), CHANNEL_TYPE_LUMA);
+  bool           isAboveAvail = (cs.getCURestricted(posLT.offset(0, -1), cu, CHANNEL_TYPE_LUMA) != NULL) && cs.isDecomp(posLT.offset(0, -1), CHANNEL_TYPE_LUMA);
+  // ----- Step 1: unfiltered reference samples -----
+  if (cu.blocks[area.compID].x == area.x && cu.blocks[area.compID].y == area.y)
+  {
+    Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
+    // With the first subpartition all the CU reference samples are fetched at once in a single call to xFillReferenceSamples
+    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
+    {
+      m_leftRefLength = cu.Y().height << 1;
+      m_topRefLength = cu.Y().width + area.width;
+    }
+    else //if (cu.ispMode == VER_INTRA_SUBPARTITIONS)
+    {
+      m_leftRefLength = cu.Y().height + area.height;
+      m_topRefLength = cu.Y().width << 1;
+    }
+
+
+    xFillReferenceSamples(cs.picture->getRecoBuf(cu.Y()), refBufUnfiltered, cu.Y(), cu);
+
+    // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the current subpartition
+    m_topRefLength = cu.blocks[area.compID].width + area.width;
+    m_leftRefLength = cu.blocks[area.compID].height + area.height;
+  }
+  else
+  {
+
+    m_topRefLength = cu.blocks[area.compID].width + area.width;
+    m_leftRefLength = cu.blocks[area.compID].height + area.height;
+
+    const int predSizeHor = m_topRefLength;
+    const int predSizeVer = m_leftRefLength;
+    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
+    {
+      Pel* src = recBuf.bufAt(0, -1);
+      Pel *ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID];
+      if (isLeftAvail)
+      {
+        for (int i = 0; i <= 2 * cu.blocks[area.compID].height - area.height; i++)
+        {
+          ref[i] = ref[i + area.height];
+        }
+      }
+      else
+      {
+        for (int i = 0; i <= predSizeVer; i++)
+        {
+          ref[i] = src[0];
+        }
+      }
+      Pel *dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + 1;
+      dst[-1]  = ref[0];
+      for (int i = 0; i < area.width; i++)
+      {
+        dst[i] = src[i];
+      }
+      Pel sample = src[area.width - 1];
+      dst += area.width;
+      for (int i = 0; i < predSizeHor - area.width; i++)
+      {
+        dst[i] = sample;
+      }
+    }
+    else
+    {
+      Pel* src = recBuf.bufAt(-1, 0);
+      Pel *ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
+      if (isAboveAvail)
+      {
+        for (int i = 0; i <= 2 * cu.blocks[area.compID].width - area.width; i++)
+        {
+          ref[i] = ref[i + area.width];
+        }
+      }
+      else
+      {
+        for (int i = 0; i <= predSizeHor; i++)
+        {
+          ref[i] = src[0];
+        }
+      }
+      Pel *dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID] + 1;
+      dst[-1]  = ref[0];
+      for (int i = 0; i < area.height; i++)
+      {
+        *dst = *src;
+        src += recBuf.stride;
+        dst++;
+      }
+      Pel sample = src[-recBuf.stride];
+      for (int i = 0; i < predSizeVer - area.height; i++)
+      {
+        *dst = sample;
+        dst++;
+      }
+
+    }
+  }
+  // ----- Step 2: filtered reference samples -----
+  if (m_ipaParam.refFilterFlag || forceRefFilterFlag)
   {
-    xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps
-      , cu.firstPU->multiRefIdx
-    );
+    Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
+    Pel *refBufFiltered   = m_refBuffer[area.compID][PRED_BUF_FILTERED];
+    xFilterReferenceSamples(refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.firstPU->multiRefIdx);
   }
 }
 
+
 void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu )
 {
   const ChannelType      chType = toChannelType( area.compID );
@@ -964,11 +919,8 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
   const int  tuHeight           = area.height;
   const int  predSize           = m_topRefLength;
   const int  predHSize          = m_leftRefLength;
-  const int  cuWidth            = cu.blocks[area.compID].width;
-  const int  cuHeight           = cu.blocks[area.compID].height;
-  const int  whRatio            = cu.ispMode && isLuma(area.compID) ? std::max(1, cuWidth / cuHeight) : std::max(1, tuWidth / tuHeight);
-  const int  hwRatio            = cu.ispMode && isLuma(area.compID) ? std::max(1, cuHeight / cuWidth) : std::max(1, tuHeight / tuWidth);
-  const int  predStride         = predSize + 1 + (whRatio + 1) * multiRefIdx;
+  const int predStride = predSize + 1 + multiRefIdx;
+  m_refBufferStride[area.compID] = predStride;
 
   const bool noShift            = pcv.noChroma2x2 && area.width == 4; // don't shift on the lowest level (chroma not-split)
   const int  unitWidth          = tuWidth  <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth  : pcv.minCUWidth  >> (noShift ? 0 : getComponentScaleX(area.compID, sps.getChromaFormatIdc()));
@@ -1002,7 +954,6 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
   numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB, numLeftBelowUnits,  unitHeight, (neighborFlags + totalLeftUnits - 1 - numLeftUnits) );
 
   // ----- Step 2: fill reference samples (depending on neighborhood) -----
-  CHECK((predHSize + 1) * predStride > m_iYuvExtSize, "Reference sample area not supported");
 
   const Pel*  srcBuf    = recoBuf.buf;
   const int   srcStride = recoBuf.stride;
@@ -1015,15 +966,20 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
   {
     // Fill border with DC value
     for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; }
-    for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = valueDC; }
+    for (int i = 0; i <= predHSize + multiRefIdx; i++)
+    {
+      ptrDst[i + predStride] = valueDC;
+    }
   }
   else if( numIntraNeighbor == totalUnits )
   {
     // Fill top-left border and top and top right with rec. samples
     ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
     for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; }
-    ptrSrc = srcBuf - multiRefIdx * srcStride - (1 + multiRefIdx);
-    for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = *(ptrSrc); ptrSrc += srcStride; }
+    for (int i = 0; i <= predHSize + multiRefIdx; i++)
+    {
+      ptrDst[i + predStride] = ptrSrc[i * srcStride];
+    }
   }
   else // reference samples are partially available
   {
@@ -1033,27 +989,28 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
     if (neighborFlags[totalLeftUnits])
     {
       ptrDst[0] = ptrSrc[0];
+      ptrDst[predStride] = ptrSrc[0];
       for (int i = 1; i <= multiRefIdx; i++)
       {
         ptrDst[i] = ptrSrc[i];
-        ptrDst[i*predStride] = ptrSrc[i*srcStride];
+        ptrDst[i + predStride] = ptrSrc[i * srcStride];
       }
     }
 
     // Fill left & below-left samples if available (downwards)
     ptrSrc += (1 + multiRefIdx) * srcStride;
-    ptrDst += (1 + multiRefIdx) * predStride;
+    ptrDst += (1 + multiRefIdx) + predStride;
     for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--)
     {
       if (neighborFlags[unitIdx])
       {
         for (int i = 0; i < unitHeight; i++)
         {
-          ptrDst[i*predStride] = ptrSrc[i*srcStride];
+          ptrDst[i] = ptrSrc[i * srcStride];
         }
       }
       ptrSrc += unitHeight * srcStride;
-      ptrDst += unitHeight * predStride;
+      ptrDst += unitHeight;
     }
     // Fill last below-left sample(s)
     if (neighborFlags[0])
@@ -1061,7 +1018,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
       int lastSample = (predHSize % unitHeight == 0) ? unitHeight : predHSize % unitHeight;
       for (int i = 0; i < lastSample; i++)
       {
-        ptrDst[i*predStride] = ptrSrc[i*srcStride];
+        ptrDst[i] = ptrSrc[i * srcStride];
       }
     }
 
@@ -1102,7 +1059,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
       }
 
       // first available sample
-      int firstAvailRow = 0;
+      int firstAvailRow = -1;
       int firstAvailCol = 0;
       if (firstAvailUnit < totalLeftUnits)
       {
@@ -1116,7 +1073,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
       {
         firstAvailCol = (firstAvailUnit - totalLeftUnits - 1) * unitWidth + 1 + multiRefIdx;
       }
-      const Pel firstAvailSample = ptrDst[firstAvailCol + firstAvailRow * predStride];
+      const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride];
 
       // last sample below-left (n.a.)
       int lastRow = predHSize + multiRefIdx;
@@ -1124,7 +1081,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
       // fill left column
       for (int i = lastRow; i > firstAvailRow; i--)
       {
-        ptrDst[i*predStride] = firstAvailSample;
+        ptrDst[i + predStride] = firstAvailSample;
       }
       // fill top row
       if (firstAvailCol > 0)
@@ -1144,7 +1101,7 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
       if (!neighborFlags[currUnit]) // samples not available
       {
         // last available sample
-        int lastAvailRow = 0;
+        int lastAvailRow = -1;
         int lastAvailCol = 0;
         if (lastAvailUnit < totalLeftUnits)
         {
@@ -1158,21 +1115,21 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
         {
           lastAvailCol = (lastAvailUnit - totalLeftUnits) * unitWidth + multiRefIdx;
         }
-        const Pel lastAvailSample = ptrDst[lastAvailCol + lastAvailRow * predStride];
+        const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride];
 
         // fill current unit with last available sample
         if (currUnit < totalLeftUnits)
         {
           for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--)
           {
-            ptrDst[i*predStride] = lastAvailSample;
+            ptrDst[i + predStride] = lastAvailSample;
           }
         }
         else if (currUnit == totalLeftUnits)
         {
-          for (int i = 1; i < multiRefIdx + 1; i++)
+          for (int i = 0; i < multiRefIdx + 1; i++)
           {
-            ptrDst[i*predStride] = lastAvailSample;
+            ptrDst[i + predStride] = lastAvailSample;
           }
           for (int j = 0; j < multiRefIdx + 1; j++)
           {
@@ -1191,265 +1148,166 @@ void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBuf
       lastAvailUnit = currUnit;
       currUnit++;
     }
-}
-  // padding of extended samples above right with the last sample
-  int lastSample = multiRefIdx + predSize;
-  for (int j = 1; j <= whRatio * multiRefIdx; j++) { ptrDst[lastSample + j] = ptrDst[lastSample]; }
-  // padding of extended samples below left with the last sample
-  lastSample = multiRefIdx + predHSize;
-  for (int i = 1; i <= hwRatio * multiRefIdx; i++) { ptrDst[(lastSample + i)*predStride] = ptrDst[lastSample*predStride]; }
+  }
 }
 
-void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps
-  , int multiRefIdx
+void IntraPrediction::xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *refBufFiltered, const CompArea &area,
+                                              const SPS &sps, int multiRefIdx
 )
 {
   if (area.compID != COMPONENT_Y)
   {
     multiRefIdx = 0;
   }
-  int whRatio          = std::max(1, int(area.width  / area.height));
-  int hwRatio          = std::max(1, int(area.height / area.width));
-  const int  predSize  = m_topRefLength  + (whRatio + 1) * multiRefIdx;
-  const int  predHSize = m_leftRefLength + (hwRatio + 1) * multiRefIdx;
-  const int  predStride = predSize + 1;
-
-
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  // Strong intra smoothing
-  ChannelType chType = toChannelType( area.compID );
-  if( sps.getUseStrongIntraSmoothing() && isLuma( chType ) )
-  {
-    const Pel bottomLeft = refBufUnfiltered[predStride * predHSize];
-    const Pel topLeft    = refBufUnfiltered[0];
-    const Pel topRight   = refBufUnfiltered[predSize];
-
-    const int  threshold     = 1 << (sps.getBitDepth( chType ) - 5);
-    const bool bilinearLeft  = abs( (bottomLeft + topLeft)  - (2 * refBufUnfiltered[predStride * tuHeight]) ) < threshold; //difference between the
-    const bool bilinearAbove = abs( (topLeft    + topRight) - (2 * refBufUnfiltered[             tuWidth ]) ) < threshold; //ends and the middle
-
-    if( tuWidth >= 32 && tuHeight >= 32 && bilinearLeft && bilinearAbove )
-#if !HEVC_USE_INTRA_SMOOTHING_T32
-    if( tuWidth > 32 && tuHeight > 32 )
-#endif
-#if !HEVC_USE_INTRA_SMOOTHING_T64
-    if( tuWidth < 64 && tuHeight < 64 )
-#endif
-    {
-      Pel *piDestPtr = refBufFiltered + (predStride * predHSize); // bottom left
-
-      // apply strong intra smoothing
-      for (int i = 0; i < predHSize; i++, piDestPtr -= predStride) //left column (bottom to top)
-      {
-        *piDestPtr = (((predHSize - i) * bottomLeft) + (i * topLeft) + predHSize / 2) / predHSize;
-      }
-      for( uint32_t i = 0; i <= predSize; i++, piDestPtr++ )            //full top row (left-to-right)
-      {
-        *piDestPtr = (((predSize - i) * topLeft) + (i * topRight) + predSize / 2) / predSize;
-      }
+  const int predSize = m_topRefLength + multiRefIdx;
+  const int predHSize = m_leftRefLength + multiRefIdx;
+  const size_t predStride = m_refBufferStride[area.compID];
 
-      return;
-    }
-  }
-#endif
+  const Pel topLeft =
+    (refBufUnfiltered[0] + refBufUnfiltered[1] + refBufUnfiltered[predStride] + refBufUnfiltered[predStride + 1] + 2)
+    >> 2;
 
-  // Regular reference sample filter
-  const Pel *piSrcPtr  = refBufUnfiltered + (predStride * predHSize); // bottom left
-        Pel *piDestPtr = refBufFiltered   + (predStride * predHSize); // bottom left
+  refBufFiltered[0] = topLeft;
 
-  // bottom left (not filtered)
-  *piDestPtr = *piSrcPtr;
-  piDestPtr -= predStride;
-  piSrcPtr  -= predStride;
-  //left column (bottom to top)
-  for( int i = 1; i < predHSize; i++, piDestPtr -= predStride, piSrcPtr -= predStride)
-  {
-    *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[-predStride] + 2) >> 2;
-  }
-  //top-left
-  *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[1] + 2) >> 2;
-  piDestPtr++;
-  piSrcPtr++;
-  //top row (left-to-right)
-  for( uint32_t i=1; i < predSize; i++, piDestPtr++, piSrcPtr++ )
+  for (int i = 1; i < predSize; i++)
   {
-    *piDestPtr = (piSrcPtr[1] + 2 * piSrcPtr[0] + piSrcPtr[-1] + 2) >> 2;
+    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
   }
-  // top right (not filtered)
-  *piDestPtr=*piSrcPtr;
-}
-
-bool IntraPrediction::useFilteredIntraRefSamples( const ComponentID &compID, const PredictionUnit &pu, bool modeSpecific, const UnitArea &tuArea )
-{
-  const SPS         &sps    = *pu.cs->sps;
-  const ChannelType  chType = toChannelType( compID );
-
-  // high level conditions
-  if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() )                                       { return false; }
-  if( !isLuma( chType ) && pu.chromaFormat != CHROMA_444 )                                               { return false; }
-
-  if( pu.cu->ispMode && isLuma(compID) )                                                                 { return false; }
+  refBufFiltered[predSize] = refBufUnfiltered[predSize];
 
-  if( !modeSpecific )                                                                                    { return true; }
+  refBufFiltered += predStride;
+  refBufUnfiltered += predStride;
 
-  if (pu.multiRefIdx)                                                                                    { return false; }
+  refBufFiltered[0] = topLeft;
 
-  // pred. mode related conditions
-  const int dirMode = PU::getFinalIntraMode( pu, chType );
-  int predMode = getWideAngle(tuArea.blocks[compID].width, tuArea.blocks[compID].height, dirMode);
-  if (predMode != dirMode )                                                                              { return true; }
-  if (dirMode == DC_IDX)                                                                                 { return false; }
-  if (dirMode == PLANAR_IDX)
+  for (int i = 1; i < predHSize; i++)
   {
-    return tuArea.blocks[compID].width * tuArea.blocks[compID].height > 32 ? true : false;
+    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
   }
-
-  int diff = std::min<int>( abs( dirMode - HOR_IDX ), abs( dirMode - VER_IDX ) );
-  int log2Size = ((g_aucLog2[tuArea.blocks[compID].width] + g_aucLog2[tuArea.blocks[compID].height]) >> 1);
-  CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
-  return (diff > m_aucIntraFilter[chType][log2Size]);
+  refBufFiltered[predHSize] = refBufUnfiltered[predHSize];
 }
 
-
 bool isAboveLeftAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLT)
 {
   const CodingStructure& cs = *cu.cs;
   const Position refPos = posLT.offset(-1, -1);
-  const CodingUnit* pcCUAboveLeft = cs.isDecomp( refPos, chType ) ? cs.getCURestricted( refPos, cu, chType ) : nullptr;
-  const bool isConstrained = cs.pps->getConstrainedIntraPred();
-  bool bAboveLeftFlag;
 
-  if (isConstrained)
-  {
-    bAboveLeftFlag = pcCUAboveLeft && CU::isIntra(*pcCUAboveLeft);
-  }
-  else
+  if (!cs.isDecomp(refPos, chType))
   {
-    bAboveLeftFlag = (pcCUAboveLeft ? true : false);
+    return false;
   }
 
-  return bAboveLeftFlag;
+  return (cs.getCURestricted(refPos, cu, chType) != NULL);
 }
 
 int isAboveAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLT, const uint32_t uiNumUnitsInPU, const uint32_t unitWidth, bool *bValidFlags)
 {
   const CodingStructure& cs = *cu.cs;
-  const bool isConstrained = cs.pps->getConstrainedIntraPred();
-  bool *pbValidFlags = bValidFlags;
-  int iNumIntra = 0;
-  int maxDx = uiNumUnitsInPU * unitWidth;
 
-  for (uint32_t dx = 0; dx < maxDx; dx += unitWidth)
+  bool *    validFlags = bValidFlags;
+  int       numIntra   = 0;
+  const int maxDx      = uiNumUnitsInPU * unitWidth;
+
+  for (int dx = 0; dx < maxDx; dx += unitWidth)
   {
     const Position refPos = posLT.offset(dx, -1);
 
-    const CodingUnit* pcCUAbove = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr;
-
-    if( pcCUAbove && ( ( isConstrained && CU::isIntra( *pcCUAbove ) ) || !isConstrained ) )
-    {
-      iNumIntra++;
-      *pbValidFlags = true;
-    }
-    else if( !pcCUAbove )
+    if (!cs.isDecomp(refPos, chType))
     {
-      return iNumIntra;
+      break;
     }
 
-    pbValidFlags++;
+    const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL);
+    numIntra += valid ? 1 : 0;
+    *validFlags = valid;
+
+    validFlags++;
   }
-  return iNumIntra;
+
+  return numIntra;
 }
 
 int isLeftAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *bValidFlags)
 {
   const CodingStructure& cs = *cu.cs;
-  const bool isConstrained = cs.pps->getConstrainedIntraPred();
-  bool *pbValidFlags = bValidFlags;
-  int iNumIntra = 0;
-  int maxDy = uiNumUnitsInPU * unitHeight;
 
-  for (uint32_t dy = 0; dy < maxDy; dy += unitHeight)
+  bool *    validFlags = bValidFlags;
+  int       numIntra   = 0;
+  const int maxDy      = uiNumUnitsInPU * unitHeight;
+
+  for (int dy = 0; dy < maxDy; dy += unitHeight)
   {
     const Position refPos = posLT.offset(-1, dy);
 
-    const CodingUnit* pcCULeft = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr;
-
-    if( pcCULeft && ( ( isConstrained && CU::isIntra( *pcCULeft ) ) || !isConstrained ) )
-    {
-      iNumIntra++;
-      *pbValidFlags = true;
-    }
-    else if( !pcCULeft )
+    if (!cs.isDecomp(refPos, chType))
     {
-      return iNumIntra;
+      break;
     }
 
-    pbValidFlags--; // opposite direction
+    const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL);
+    numIntra += valid ? 1 : 0;
+    *validFlags = valid;
+
+    validFlags--;
   }
 
-  return iNumIntra;
+  return numIntra;
 }
 
 int isAboveRightAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitWidth, bool *bValidFlags )
 {
   const CodingStructure& cs = *cu.cs;
-  const bool isConstrained = cs.pps->getConstrainedIntraPred();
-  bool *pbValidFlags = bValidFlags;
-  int iNumIntra = 0;
 
-  uint32_t maxDx = uiNumUnitsInPU * unitWidth;
+  bool *    validFlags = bValidFlags;
+  int       numIntra   = 0;
+  const int maxDx      = uiNumUnitsInPU * unitWidth;
 
-  for (uint32_t dx = 0; dx < maxDx; dx += unitWidth)
+  for (int dx = 0; dx < maxDx; dx += unitWidth)
   {
     const Position refPos = posRT.offset(unitWidth + dx, -1);
 
-    const CodingUnit* pcCUAbove = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr;
-
-    if( pcCUAbove && ( ( isConstrained && CU::isIntra( *pcCUAbove ) ) || !isConstrained ) )
+    if (!cs.isDecomp(refPos, chType))
     {
-      iNumIntra++;
-      *pbValidFlags = true;
-    }
-    else if( !pcCUAbove )
-    {
-      return iNumIntra;
+      break;
     }
 
-    pbValidFlags++;
+    const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL);
+    numIntra += valid ? 1 : 0;
+    *validFlags = valid;
+
+    validFlags++;
   }
 
-  return iNumIntra;
+  return numIntra;
 }
 
 int isBelowLeftAvailable(const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *bValidFlags )
 {
   const CodingStructure& cs = *cu.cs;
-  const bool isConstrained = cs.pps->getConstrainedIntraPred();
-  bool *pbValidFlags = bValidFlags;
-  int iNumIntra = 0;
-  int maxDy = uiNumUnitsInPU * unitHeight;
 
-  for (uint32_t dy = 0; dy < maxDy; dy += unitHeight)
+  bool *    validFlags = bValidFlags;
+  int       numIntra   = 0;
+  const int maxDy      = uiNumUnitsInPU * unitHeight;
+
+  for (int dy = 0; dy < maxDy; dy += unitHeight)
   {
     const Position refPos = posLB.offset(-1, unitHeight + dy);
 
-    const CodingUnit* pcCULeft = cs.isDecomp(refPos, chType) ? cs.getCURestricted(refPos, cu, chType) : nullptr;
-
-    if( pcCULeft && ( ( isConstrained && CU::isIntra( *pcCULeft ) ) || !isConstrained ) )
-    {
-      iNumIntra++;
-      *pbValidFlags = true;
-    }
-    else if ( !pcCULeft )
+    if (!cs.isDecomp(refPos, chType))
     {
-      return iNumIntra;
+      break;
     }
 
-    pbValidFlags--; // opposite direction
+    const bool valid = (cs.getCURestricted(refPos, cu, chType) != NULL);
+    numIntra += valid ? 1 : 0;
+    *validFlags = valid;
+
+    validFlags--;
   }
 
-  return iNumIntra;
+  return numIntra;
 }
+
 // LumaRecPixels
 void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chromaArea)
 {
@@ -1463,15 +1321,14 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
   }
   else
   {
-  iDstStride = MAX_CU_SIZE + 1;
-  pDst0 = m_piTemp + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES;
+    iDstStride = MAX_CU_SIZE + 1;
+    pDst0 = m_piTemp + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES;
   }
   //assert 420 chroma subsampling
   CompArea lumaArea = CompArea( COMPONENT_Y, pu.chromaFormat, chromaArea.lumaPos(), recalcSize( pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus)
 
-
-  CHECK( lumaArea.width  == chromaArea.width, "" );
-  CHECK( lumaArea.height == chromaArea.height, "" );
+  CHECK(lumaArea.width == chromaArea.width && CHROMA_444 != pu.chromaFormat, "");
+  CHECK(lumaArea.height == chromaArea.height && CHROMA_444 != pu.chromaFormat && CHROMA_422 != pu.chromaFormat, "");
 
   const SizeType uiCWidth = chromaArea.width;
   const SizeType uiCHeight = chromaArea.height;
@@ -1479,7 +1336,11 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
   const CPelBuf Src = pu.cs->picture->getRecoBuf( lumaArea );
   Pel const* pRecSrc0   = Src.bufAt( 0, 0 );
   int iRecStride        = Src.stride;
-  int iRecStride2       = iRecStride << 1;
+  int logSubWidthC  = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, pu.chromaFormat);
+  int logSubHeightC = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, pu.chromaFormat);
+
+  int iRecStride2       = iRecStride << logSubHeightC;
+  const int mult        =          1 << logSubWidthC ;
 
   const CodingUnit& lumaCU = isChroma( pu.chType ) ? *pu.cs->picture->cs->getCU( lumaArea.pos(), CH_L ) : *pu.cu;
   const CodingUnit&     cu = *pu.cu;
@@ -1492,13 +1353,14 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
   int iBaseUnitSize = ( 1 << MIN_CU_LOG2 );
 
   const int  iUnitWidth       = iBaseUnitSize >> getComponentScaleX( area.compID, area.chromaFormat );
-  const int  iUnitHeight      = iBaseUnitSize >> getComponentScaleX( area.compID, area.chromaFormat );
-  const int  iTUWidthInUnits  = uiTuWidth  / iUnitWidth;
+  const int  iUnitHeight = iBaseUnitSize >> getComponentScaleY(area.compID, area.chromaFormat);
+
+  const int  iTUWidthInUnits = uiTuWidth / iUnitWidth;
   const int  iTUHeightInUnits = uiTuHeight / iUnitHeight;
   const int  iAboveUnits      = iTUWidthInUnits;
   const int  iLeftUnits       = iTUHeightInUnits;
   const int  chromaUnitWidth = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, area.chromaFormat);
-  const int  chromaUnitHeight = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, area.chromaFormat);
+  const int  chromaUnitHeight = iBaseUnitSize >> getComponentScaleY(COMPONENT_Cb, area.chromaFormat);
   const int  topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H.
   const int  leftTemplateSampNum = 2 * uiCHeight;
   assert(m_topRefLength >= topTemplateSampNum);
@@ -1538,7 +1400,33 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
   Pel*       pDst  = nullptr;
   Pel const* piSrc = nullptr;
 
-  bool isFirstRowOfCtu = ((pu.block(COMPONENT_Cb).y)&(((pu.cs->sps)->getMaxCUWidth() >> 1) - 1)) == 0;
+  bool isFirstRowOfCtu = ( lumaArea.y & ((pu.cs->sps)->getCTUSize() - 1) ) == 0;
+  const int strOffset = (CHROMA_444 == pu.chromaFormat) ? 0 : iRecStride;
+
+  int c0_2tap = 1, c1_2tap = 1,                                                     offset_2tap = 1, shift_2tap = 1; //sum = 2
+  int c0_3tap = 2, c1_3tap = 1, c2_3tap = 1,                                        offset_3tap = 2, shift_3tap = 2; //sum = 4
+  int c0_5tap = 1, c1_5tap = 4, c2_5tap = 1, c3_5tap = 1, c4_5tap = 1,              offset_5tap = 4, shift_5tap = 3; //sum = 8
+  int c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 2, c4_6tap = 1, c5_6tap = 1, offset_6tap = 4, shift_6tap = 3; //sum = 8
+
+  switch (pu.chromaFormat)
+  {
+    case CHROMA_422: //overwrite filter coefficient values for 422
+      c0_2tap = 1, c1_2tap = 0,                                                     offset_2tap = 0, shift_2tap = 0; //sum = 1
+      c0_3tap = 2, c1_3tap = 1, c2_3tap = 1,                                        offset_3tap = 2, shift_3tap = 2; //sum = 4
+      c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0,              offset_5tap = 0, shift_5tap = 0; //sum = 1
+      c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 2, shift_6tap = 2; //sum = 4
+      break;
+
+    case CHROMA_444:  //overwrite filter coefficient values for 422
+      c0_2tap = 1, c1_2tap = 0,                                                     offset_2tap = 0, shift_2tap = 0; //sum = 1
+      c0_3tap = 1, c1_3tap = 0, c2_3tap = 0,                                        offset_3tap = 0, shift_3tap = 0; //sum = 1
+      c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0,              offset_5tap = 0, shift_5tap = 0; //sum = 1
+      c0_6tap = 1, c1_6tap = 0, c2_6tap = 0, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 0, shift_6tap = 0; //sum = 1
+      break;
+
+    default:
+      break;
+  }
 
   if( bAboveAvaillable )
   {
@@ -1554,44 +1442,44 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
       {
         piSrc = pRecSrc0 - iRecStride;
 
-        if (i == 0 && !bLeftAvaillable)
+        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + addedAboveRight - 1 + logSubWidthC))
         {
-          pDst[i] = piSrc[2 * i];
+          pDst[i] = piSrc[mult * i];
         }
         else
         {
-          pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - 1] + piSrc[2 * i + 1] + 2 ) >> 2;
+          pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - 1] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap;
         }
       }
       else if( pu.cs->sps->getCclmCollocatedChromaFlag() )
       {
         piSrc = pRecSrc0 - iRecStride2;
 
-        if( i == 0 && !bLeftAvaillable )
+        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + addedAboveRight - 1 + logSubWidthC))
         {
-          pDst[i] = ( piSrc[2 * i] * 2 + piSrc[2 * i - iRecStride] + piSrc[2 * i + iRecStride] + 2 ) >> 2;
+          pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - strOffset] * c1_3tap + piSrc[mult * i + strOffset] * c2_3tap + offset_3tap) >> shift_3tap;
         }
         else
         {
-          pDst[i] = ( piSrc[2 * i - iRecStride]
-                    + piSrc[2 * i             ] * 4 + piSrc[2 * i - 1] + piSrc[2 * i + 1]
-                    + piSrc[2 * i + iRecStride]
-                    + 4 ) >> 3;
+          pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap
+                  +  piSrc[mult * i]             * c1_5tap + piSrc[mult * i - 1] * c2_5tap + piSrc[mult * i + 1] * c3_5tap
+                  +  piSrc[mult * i + strOffset] * c4_5tap
+                  +  offset_5tap) >> shift_5tap;
         }
       }
       else
       {
         piSrc = pRecSrc0 - iRecStride2;
 
-        if (i == 0 && !bLeftAvaillable)
+        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + addedAboveRight - 1 + logSubWidthC))
         {
-          pDst[i] = ( piSrc[2 * i] + piSrc[2 * i + iRecStride] + 1 ) >> 1;
+          pDst[i] = (piSrc[mult * i] * c0_2tap + piSrc[mult * i + strOffset] * c1_2tap + offset_2tap) >> shift_2tap;
         }
         else
         {
-          pDst[i] = ( ( ( piSrc[2 * i             ] * 2 ) + piSrc[2 * i - 1             ] + piSrc[2 * i + 1             ] )
-                    + ( ( piSrc[2 * i + iRecStride] * 2 ) + piSrc[2 * i - 1 + iRecStride] + piSrc[2 * i + 1 + iRecStride] )
-                    + 4 ) >> 3;
+          pDst[i] = ((piSrc[mult * i]            * c0_6tap + piSrc[mult * i - 1]             * c1_6tap + piSrc[mult * i + 1]             * c2_6tap)
+                  + (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i - 1 + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap)
+                  + offset_6tap) >> shift_6tap;
         }
       }
     }
@@ -1600,33 +1488,36 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
   if( bLeftAvaillable )
   {
     pDst  = pDst0    - 1;
-    piSrc = pRecSrc0 - 3;
+
+    piSrc = pRecSrc0 - 2 - logSubWidthC;
+
     int addedLeftBelow = 0;
     if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX))
     {
       addedLeftBelow = avaiLeftBelowUnits*chromaUnitHeight;
     }
+
     for (int j = 0; j < uiCHeight + addedLeftBelow; j++)
     {
       if( pu.cs->sps->getCclmCollocatedChromaFlag() )
       {
-        if( j == 0 && !bAboveAvaillable )
+        if ((j == 0 && !bAboveAvaillable) || (j == uiCHeight + addedLeftBelow - 1 + logSubWidthC))
         {
-          pDst[0] = ( piSrc[1] * 2 + piSrc[0] + piSrc[2] + 2 ) >> 2;
+          pDst[0] = ( piSrc[1] * c0_3tap + piSrc[0] * c1_3tap + piSrc[2] * c2_3tap + offset_3tap) >> shift_3tap;
         }
         else
         {
-          pDst[0] = ( piSrc[1 - iRecStride]
-                    + piSrc[1             ] * 4 + piSrc[0] + piSrc[2]
-                    + piSrc[1 + iRecStride]
-                    + 4 ) >> 3;
+          pDst[0] = ( piSrc[1 - strOffset] * c0_5tap
+                    + piSrc[1            ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap
+                    + piSrc[1 + strOffset] * c4_5tap
+                    + offset_5tap ) >> shift_5tap;
         }
       }
       else
       {
-        pDst[0] = ( ( piSrc[1             ] * 2 + piSrc[0         ] + piSrc[2             ] )
-                  + ( piSrc[1 + iRecStride] * 2 + piSrc[iRecStride] + piSrc[2 + iRecStride] )
-                  + 4 ) >> 3;
+        pDst[0] = ((piSrc[1]             * c0_6tap + piSrc[0]         * c1_6tap + piSrc[2]             * c2_6tap)
+                +  (piSrc[1 + strOffset] * c3_6tap + piSrc[strOffset] * c4_6tap + piSrc[2 + strOffset] * c5_6tap)
+                +   offset_6tap) >> shift_6tap;
       }
 
       piSrc += iRecStride2;
@@ -1634,7 +1525,6 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
     }
   }
 
-
   // inner part from reconstructed picture buffer
   for( int j = 0; j < uiCHeight; j++ )
   {
@@ -1644,38 +1534,47 @@ void IntraPrediction::xGetLumaRecPixels(const PredictionUnit &pu, CompArea chrom
       {
         if( i == 0 && !bLeftAvaillable )
         {
-          if( j == 0 && !bAboveAvaillable )
+          if ( j == 0 && !bAboveAvaillable )
           {
-            pDst0[i] = pRecSrc0[2 * i];
+            pDst0[i] = pRecSrc0[mult * i];
           }
           else
           {
-            pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - iRecStride] + pRecSrc0[2 * i + iRecStride] + 2 ) >> 2;
+            pDst0[i] = (pRecSrc0[mult * i] * c0_3tap + pRecSrc0[mult * i - strOffset] * c1_3tap + pRecSrc0[mult * i + strOffset] * c2_3tap + offset_3tap) >> shift_3tap;
           }
         }
-        else if( j == 0 && !bAboveAvaillable )
+        else if ( j == 0 && !bAboveAvaillable )
         {
-          pDst0[i] = ( pRecSrc0[2 * i] * 2 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1] + 2 ) >> 2;
+          pDst0[i] = (pRecSrc0[mult * i] * c0_3tap + pRecSrc0[mult * i - 1] * c1_3tap + pRecSrc0[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap;
         }
         else
         {
-          pDst0[i] = ( pRecSrc0[2 * i - iRecStride]
-                     + pRecSrc0[2 * i             ] * 4 + pRecSrc0[2 * i - 1] + pRecSrc0[2 * i + 1]
-                     + pRecSrc0[2 * i + iRecStride]
-                     + 4 ) >> 3;
+          pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap
+                   +  pRecSrc0[mult * i]             * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
+                   +  pRecSrc0[mult * i + strOffset] * c4_5tap
+                   +  offset_5tap) >> shift_5tap;
         }
       }
       else
       {
-        if( i == 0 && !bLeftAvaillable )
+
+        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth - 1 + logSubWidthC))
         {
-          pDst0[i] = ( pRecSrc0[2 * i] + pRecSrc0[2 * i + iRecStride] + 1 ) >> 1;
+          pDst0[i] = (pRecSrc0[mult * i] * c0_2tap + pRecSrc0[mult * i + strOffset] * c1_2tap + offset_2tap) >> shift_2tap;
         }
         else
         {
-          pDst0[i] = ( pRecSrc0[2 * i             ] * 2 + pRecSrc0[2 * i + 1             ] + pRecSrc0[2 * i - 1             ]
-                     + pRecSrc0[2 * i + iRecStride] * 2 + pRecSrc0[2 * i + 1 + iRecStride] + pRecSrc0[2 * i - 1 + iRecStride]
-                     + 4 ) >> 3;
+          int s = offset_6tap;
+          s += pRecSrc0[mult * i] * c0_6tap;
+          s += pRecSrc0[mult * i + 1] * c1_6tap;
+          s += pRecSrc0[mult * i - 1] * c2_6tap;
+          if (pu.chromaFormat == CHROMA_420)
+          {
+            s += pRecSrc0[mult * i + strOffset] * c3_6tap;
+            s += pRecSrc0[mult * i + 1 + strOffset] * c4_6tap;
+            s += pRecSrc0[mult * i - 1 + strOffset] * c5_6tap;
+          }
+          pDst0[i] = s >> shift_6tap;
         }
       }
     }
@@ -1705,7 +1604,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
 
   const int baseUnitSize = 1 << MIN_CU_LOG2;
   const int unitWidth    = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat);
-  const int unitHeight   = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat);
+  const int unitHeight   = baseUnitSize >> getComponentScaleY(chromaArea.compID, nChromaFormat);
 
   const int tuWidthInUnits  = tuWidth / unitWidth;
   const int tuHeightInUnits = tuHeight / unitHeight;
@@ -1751,7 +1650,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
     avaiAboveRightUnits = isAboveRightAvailable(cu, CHANNEL_TYPE_CHROMA, chromaArea.topRightComp(chromaArea.compID), aboveRightUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + aboveUnits + 1));
   }
   Pel *srcColor0, *curChroma0;
-  int  srcStride, curStride;
+  int srcStride;
 
   PelBuf temp;
   if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX))
@@ -1761,15 +1660,12 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
   }
   else
   {
-  srcStride = MAX_CU_SIZE + 1;
-  temp        = PelBuf(m_piTemp + srcStride + 1, srcStride, Size(chromaArea));
+    srcStride = MAX_CU_SIZE + 1;
+    temp        = PelBuf(m_piTemp + srcStride + 1, srcStride, Size(chromaArea));
   }
   srcColor0 = temp.bufAt(0, 0);
   curChroma0 = getPredictorPtr(compID);
 
-  curStride = m_topRefLength + 1;
-
-  curChroma0 += curStride + 1;
 
   unsigned internalBitDepth = sps.getBitDepth(CHANNEL_TYPE_CHROMA);
 
@@ -1777,71 +1673,89 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
   int maxLuma[2] = { -MAX_INT, 0 };
 
   Pel *src = srcColor0 - srcStride;
-  Pel *cur = curChroma0 - curStride;
-  int minDim = 1;
   int actualTopTemplateSampNum = 0;
   int actualLeftTemplateSampNum = 0;
   if (curChromaMode == MDLM_T_IDX)
   {
     leftAvailable = 0;
+    avaiAboveRightUnits = avaiAboveRightUnits > (cHeight/unitWidth) ?  cHeight/unitWidth : avaiAboveRightUnits;
     actualTopTemplateSampNum = unitWidth*(avaiAboveUnits + avaiAboveRightUnits);
-    minDim = actualTopTemplateSampNum;
   }
   else if (curChromaMode == MDLM_L_IDX)
   {
     aboveAvailable = 0;
+    avaiLeftBelowUnits = avaiLeftBelowUnits > (cWidth/unitHeight) ? cWidth/unitHeight : avaiLeftBelowUnits;
     actualLeftTemplateSampNum = unitHeight*(avaiLeftUnits + avaiLeftBelowUnits);
-    minDim = actualLeftTemplateSampNum;
   }
   else if (curChromaMode == LM_CHROMA_IDX)
   {
     actualTopTemplateSampNum = cWidth;
     actualLeftTemplateSampNum = cHeight;
-    minDim = leftAvailable && aboveAvailable ? 1 << g_aucPrevLog2[std::min(actualLeftTemplateSampNum, actualTopTemplateSampNum)]
-      : 1 << g_aucPrevLog2[leftAvailable ? actualLeftTemplateSampNum : actualTopTemplateSampNum];
   }
-  int numSteps = minDim;
+  int startPos[2]; //0:Above, 1: Left
+  int pickStep[2];
+
+  int aboveIs4 = leftAvailable  ? 0 : 1;
+  int leftIs4 =  aboveAvailable ? 0 : 1;
+
+  startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
+  pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4));
+
+  startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
+  pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4));
 
+  Pel selectLumaPix[4] = { 0, 0, 0, 0 };
+  Pel selectChromaPix[4] = { 0, 0, 0, 0 };
+
+  int cntT, cntL;
+  cntT = cntL = 0;
+  int cnt = 0;
   if (aboveAvailable)
   {
-    for (int j = 0; j < numSteps; j++)
+    cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
+    src = srcColor0 - srcStride;
+    const Pel *cur = curChroma0 + 1;
+    for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
     {
-      int idx = (j * actualTopTemplateSampNum) / minDim;
-
-      if (minLuma[0] > src[idx])
-      {
-        minLuma[0] = src[idx];
-        minLuma[1] = cur[idx];
-      }
-      if (maxLuma[0] < src[idx])
-      {
-        maxLuma[0] = src[idx];
-        maxLuma[1] = cur[idx];
-      }
+      selectLumaPix[cnt] = src[pos];
+      selectChromaPix[cnt] = cur[pos];
     }
   }
 
   if (leftAvailable)
   {
+    cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 );
     src = srcColor0 - 1;
-    cur = curChroma0 - 1;
-
-    for (int i = 0; i < numSteps; i++)
+    const Pel *cur = curChroma0 + m_refBufferStride[compID] + 1;
+    for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
     {
-      int idx = (i * actualLeftTemplateSampNum) / minDim;
-
-      if (minLuma[0] > src[srcStride * idx])
-      {
-        minLuma[0] = src[srcStride * idx];
-        minLuma[1] = cur[curStride * idx];
-      }
-      if (maxLuma[0] < src[srcStride * idx])
-      {
-        maxLuma[0] = src[srcStride * idx];
-        maxLuma[1] = cur[curStride * idx];
-      }
+      selectLumaPix[cnt + cntT] = src[pos * srcStride];
+      selectChromaPix[cnt + cntT] = cur[pos];
     }
   }
+  cnt = cntL + cntT;
+
+  if (cnt == 2)
+  {
+    selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
+    selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
+    selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
+    selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
+  }
+
+  int minGrpIdx[2] = { 0, 2 };
+  int maxGrpIdx[2] = { 1, 3 };
+  int *tmpMinGrp = minGrpIdx;
+  int *tmpMaxGrp = maxGrpIdx;
+  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]);
+  if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]);
+  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp);
+  if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]);
+
+  minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1 )>>1;
+  minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
+  maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1 )>>1;
+  maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
 
   if (leftAvailable || aboveAvailable)
   {
@@ -1862,7 +1776,8 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
       int add = 1 << y >> 1;
       a = (diffC * v + add) >> y;
       iShift = 3 + x - y;
-      if ( iShift < 1 ) {
+      if ( iShift < 1 )
+      {
         iShift = 1;
         a = ( (a == 0)? 0: (a < 0)? -15 : 15 );   // a=Sign(a)*15
       }
@@ -1885,4 +1800,108 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
   }
 }
 
+void IntraPrediction::initIntraMip( const PredictionUnit &pu, const CompArea &area )
+{
+  CHECK( area.width > MIP_MAX_WIDTH || area.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" );
+
+  // prepare input (boundary) data for prediction
+  CHECK( m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP" );
+  Pel *ptrSrc = getPredictorPtr( COMPONENT_Y );
+  const int srcStride  = m_refBufferStride[COMPONENT_Y];
+  const int srcHStride = 2;
+
+  m_matrixIntraPred.prepareInputForPred( CPelBuf( ptrSrc, srcStride, srcHStride ), area, pu.cu->slice->getSPS()->getBitDepth( CHANNEL_TYPE_LUMA ) );
+}
+
+void IntraPrediction::predIntraMip( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu )
+{
+  CHECK( compId != COMPONENT_Y, "Error: chroma not supported" );
+  CHECK( piPred.width > MIP_MAX_WIDTH || piPred.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" );
+  CHECK( piPred.width != (1 << floorLog2(piPred.width)) || piPred.height != (1 << floorLog2(piPred.height)), "Error: expecting blocks of size 2^M x 2^N" );
+
+  // generate mode-specific prediction
+  const int bitDepth = pu.cu->slice->getSPS()->getBitDepth( CHANNEL_TYPE_LUMA );
+
+  static_vector<int, MIP_MAX_WIDTH* MIP_MAX_HEIGHT> predMip( piPred.width * piPred.height );
+  m_matrixIntraPred.predBlock( predMip.data(), pu.intraDir[CHANNEL_TYPE_LUMA], pu.mipTransposedFlag, bitDepth );
+
+  for( int y = 0; y < piPred.height; y++ )
+  {
+    for( int x = 0; x < piPred.width; x++ )
+    {
+      piPred.at( x, y ) = Pel(predMip[y * piPred.width + x]);
+    }
+  }
+}
+void IntraPrediction::reorderPLT(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
+{
+  CodingUnit &cu = *cs.getCU(partitioner.chType);
+
+  uint8_t        reusePLTSizetmp = 0;
+  uint8_t        pltSizetmp = 0;
+  Pel            curPLTtmp[MAX_NUM_COMPONENT][MAXPLTSIZE];
+  bool           curPLTpred[MAXPLTPREDSIZE];
+
+  for (int idx = 0; idx < MAXPLTPREDSIZE; idx++)
+  {
+    curPLTpred[idx] = false;
+    cu.reuseflag[compBegin][idx] = false;
+  }
+  for (int idx = 0; idx < MAXPLTSIZE; idx++)
+  {
+    curPLTpred[idx] = false;
+  }
+
+  for (int predidx = 0; predidx < cs.prevPLT.curPLTSize[compBegin]; predidx++)
+  {
+    bool match = false;
+    int curidx = 0;
+
+    for (curidx = 0; curidx < cu.curPLTSize[compBegin]; curidx++)
+    {
+      bool matchTmp = true;
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+      {
+        matchTmp = matchTmp && (cu.curPLT[comp][curidx] == cs.prevPLT.curPLT[comp][predidx]);
+      }
+      if (matchTmp)
+      {
+        match = true;
+        break;
+      }
+    }
+
+    if (match)
+    {
+      cu.reuseflag[compBegin][predidx] = true;
+      curPLTpred[curidx] = true;
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+      {
+        curPLTtmp[comp][reusePLTSizetmp] = cs.prevPLT.curPLT[comp][predidx];
+      }
+      reusePLTSizetmp++;
+      pltSizetmp++;
+    }
+  }
+  cu.reusePLTSize[compBegin] = reusePLTSizetmp;
+  for (int curidx = 0; curidx < cu.curPLTSize[compBegin]; curidx++)
+  {
+    if (!curPLTpred[curidx])
+    {
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+      {
+        curPLTtmp[comp][pltSizetmp] = cu.curPLT[comp][curidx];
+      }
+      pltSizetmp++;
+    }
+  }
+  assert(pltSizetmp == cu.curPLTSize[compBegin]);
+  for (int curidx = 0; curidx < cu.curPLTSize[compBegin]; curidx++)
+  {
+    for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+    {
+      cu.curPLT[comp][curidx] = curPLTtmp[comp][curidx];
+    }
+  }
+}
 //! \}
diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h
index 22a2af108d4bcd2c6549a2758e2d4b08322a3c69..d8a8f4a232ffe07dcb85d3512efa62d6b4c85f93 100644
--- a/source/Lib/CommonLib/IntraPrediction.h
+++ b/source/Lib/CommonLib/IntraPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,7 @@
 #include "Buffer.h"
 #include "Picture.h"
 
+#include "MatrixIntraPrediction.h"
 
 //! \ingroup CommonLib
 //! \{
@@ -64,55 +65,80 @@ static const uint32_t MAX_INTRA_FILTER_DEPTHS=8;
 
 class IntraPrediction
 {
-private:
+protected:
+  Pel      m_refBuffer[MAX_NUM_COMPONENT][NUM_PRED_BUF][(MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX) * 2];
+  uint32_t m_refBufferStride[MAX_NUM_COMPONENT];
 
-  Pel* m_piYuvExt[MAX_NUM_COMPONENT][NUM_PRED_BUF];
-  int  m_iYuvExtSize;
+private:
 
   Pel* m_yuvExt2[MAX_NUM_COMPONENT][4];
   int  m_yuvExtSize2;
 
-  static const uint8_t m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS];
+  static const uint8_t m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS];
+
+  struct IntraPredParam //parameters of Intra Prediction
+  {
+    bool refFilterFlag;
+    bool applyPDPC;
+    bool isModeVer;
+    int  multiRefIndex;
+    int  intraPredAngle;
+    int  invAngle;
+    bool interpolationFlag;
+    int  angularScale;
+
+    // clang-format off
+    IntraPredParam()
+      : refFilterFlag(false)
+      , applyPDPC(false)
+      , isModeVer(false)
+      , multiRefIndex(-1)
+      , intraPredAngle(std::numeric_limits<int>::max())
+      , invAngle(std::numeric_limits<int>::max())
+      , interpolationFlag(false)
+      , angularScale(-1)
+    // clang-format on
+    {
+    }
+  };
+
+  IntraPredParam m_ipaParam;
 
   Pel* m_piTemp;
   Pel* m_pMdlmTemp; // for MDLM mode
-protected:
+  MatrixIntraPrediction m_matrixIntraPred;
+
+
 
+protected:
   ChromaFormat  m_currChromaFormat;
 
   int m_topRefLength;
   int m_leftRefLength;
+  ScanElement* m_scanOrder;
+  bool         m_bestScanRotationMode;
   // prediction
-  void xPredIntraPlanar           ( const CPelBuf &pSrc, PelBuf &pDst,                                                                                                         const SPS& sps );
-  void xPredIntraDc               ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType,                                                                                          const bool enableBoundaryFilter = true );
-#if HEVC_USE_HOR_VER_PREDFILTERING
-  void xPredIntraAng              ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const bool bEnableEdgeFilters, const SPS& sps
-    , int multiRefIdx
-    , const bool enableBoundaryFilter = true );
-#else
-  void xPredIntraAng              ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps,
-                                          int  multiRefIdx,
-                                    const bool useFilteredPredSamples,
-                                    const bool useISP = false,
-                                    const Size cuSize = Size( 0, 0 ) );
-#endif
+  void xPredIntraPlanar           ( const CPelBuf &pSrc, PelBuf &pDst );
+  void xPredIntraDc               ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter = true );
+  void xPredIntraAng              ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng);
+
+  void initPredIntraParams        ( const PredictionUnit & pu,  const CompArea compArea, const SPS& sps );
+
+  static bool isIntegerSlope(const int absAng) { return (0 == (absAng & 0x1F)); }
+
+  void xPredIntraBDPCM            ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng );
   Pel  xGetPredValDc              ( const CPelBuf &pSrc, const Size &dstSize );
 
   void xFillReferenceSamples      ( const CPelBuf &recoBuf,      Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu );
-  void xFilterReferenceSamples    ( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps
-    , int multiRefIdx
+  void xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *refBufFiltered, const CompArea &area, const SPS &sps,
+                               int multiRefIdx
   );
 
-#if HEVC_USE_DC_PREDFILTERING
-  // dc filtering
-  void xDCPredFiltering           ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType &channelType );
-#endif
   static int getWideAngle         ( int width, int height, int predMode );
   void setReferenceArrayLengths   ( const CompArea &area );
 
   void destroy                    ();
 
-  void xFilterGroup               ( Pel* pMulDst[], int i, Pel const* const piSrc, int iRecStride, bool bAboveAvaillable, bool bLeftAvaillable);
   void xGetLMParameters(const PredictionUnit &pu, const ComponentID compID, const CompArea& chromaArea, int& a, int& b, int& iShift);
 public:
   IntraPrediction();
@@ -121,21 +147,28 @@ public:
   void init                       (ChromaFormat chromaFormatIDC, const unsigned bitDepthY);
 
   // Angular Intra
-  void predIntraAng               ( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu, const bool useFilteredPredSamples );
-  Pel*  getPredictorPtr           (const ComponentID compID, const bool bUseFilteredPredictions = false) { return m_piYuvExt[compID][bUseFilteredPredictions?PRED_BUF_FILTERED:PRED_BUF_UNFILTERED]; }
+  void predIntraAng               ( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu);
+  Pel *getPredictorPtr(const ComponentID compId)
+  {
+    return m_refBuffer[compId][m_ipaParam.refFilterFlag ? PRED_BUF_FILTERED : PRED_BUF_UNFILTERED];
+  }
+
   // Cross-component Chroma
   void predIntraChromaLM(const ComponentID compID, PelBuf &piPred, const PredictionUnit &pu, const CompArea& chromaArea, int intraDir);
   void xGetLumaRecPixels(const PredictionUnit &pu, CompArea chromaArea);
   /// set parameters from CU data for accessing intra data
-  void initIntraPatternChType     (const CodingUnit &cu, const CompArea &area, const bool bFilterRefSamples = false );
+  void initIntraPatternChType     (const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers
+  void initIntraPatternChTypeISP  (const CodingUnit& cu, const CompArea& area, PelBuf& piReco, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers
 
-static bool useFilteredIntraRefSamples( const ComponentID &compID, const PredictionUnit &pu, bool modeSpecific, const UnitArea &tuArea );
-  static bool useDPCMForFirstPassIntraEstimation(const PredictionUnit &pu, const uint32_t &uiDirMode);
+  // Matrix-based intra prediction
+  void initIntraMip               (const PredictionUnit &pu, const CompArea &area);
+  void predIntraMip               (const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu);
 
   void geneWeightedPred           (const ComponentID compId, PelBuf &pred, const PredictionUnit &pu, Pel *srcBuf);
   Pel* getPredictorPtr2           (const ComponentID compID, uint32_t idx) { return m_yuvExt2[compID][idx]; }
   void switchBuffer               (const PredictionUnit &pu, ComponentID compID, PelBuf srcBuff, Pel *dst);
   void geneIntrainterPred         (const CodingUnit &cu);
+  void reorderPLT                 (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp);
 };
 
 //! \}
diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp
index 2cd1e89ce9fb82af3f2a73b8290912f958be06a9..f2ca851eae904fbfd67aee1d9ff870f6a0b0566f 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/LoopFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -63,12 +63,10 @@
 // Tables
 // ====================================================================================================================
 
-const uint8_t LoopFilter::sm_tcTable[MAX_QP + 1 + DEFAULT_INTRA_TC_OFFSET] =
+const uint16_t LoopFilter::sm_tcTable[MAX_QP + 1 + DEFAULT_INTRA_TC_OFFSET] =
 {
-  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,5,5,6,6,7,8,9,10,11,13,14,16,18,20,22,25
-  , 28, 31, 35, 39, 44, 50, 56, 63, 70, 79, 88, 99
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,4,4,4,5,5,5,5,7,7,8,9,10,10,11,13,14,15,17,19,21,24,25,29,33,36,41,45,51,57,64,71,80,89,100,112,125,141,157,177,198,222,250,280,314,352,395
 };
-
 const uint8_t LoopFilter::sm_betaTable[MAX_QP + 1] =
 {
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,7,8,9,10,11,12,13,14,15,16,17,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64
@@ -84,7 +82,6 @@ inline static uint32_t getRasterIdx(const Position& pos, const PreCalcValues& pc
 // utility functions
 // ====================================================================================================================
 
-#if HEVC_TILES_WPP
 static bool isAvailableLeft( const CodingUnit& cu, const CodingUnit& cu2, const bool bEnforceSliceRestriction, const bool bEnforceTileRestriction )
 {
   return ( ( !bEnforceSliceRestriction || CU::isSameSlice( cu, cu2 ) ) && ( !bEnforceTileRestriction || CU::isSameTile( cu, cu2 ) ) );
@@ -94,12 +91,6 @@ static bool isAvailableAbove( const CodingUnit& cu, const CodingUnit& cu2, const
 {
   return ( !bEnforceSliceRestriction || CU::isSameSlice( cu, cu2 ) ) && ( !bEnforceTileRestriction || CU::isSameTile( cu, cu2 ) );
 }
-#else
-static bool isAvailable( const CodingUnit& cu, const CodingUnit& cu2, const bool bEnforceSliceRestriction )
-{
-  return ( !bEnforceSliceRestriction || CU::isSameSlice( cu, cu2 ) );
-}
-#endif
 
 
 // ====================================================================================================================
@@ -155,6 +146,8 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
                                 )
 {
   const PreCalcValues& pcv = *cs.pcv;
+  m_shiftHor = ::getComponentScaleX( COMPONENT_Cb, cs.pcv->chrFormat );
+  m_shiftVer = ::getComponentScaleY( COMPONENT_Cb, cs.pcv->chrFormat );
 
   DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", cs.slice->getPOC() ) ) );
 #if ENABLE_TRACING
@@ -175,8 +168,15 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
     {
       memset( m_aapucBS       [EDGE_VER].data(), 0,     m_aapucBS       [EDGE_VER].byte_size() );
       memset( m_aapbEdgeFilter[EDGE_VER].data(), false, m_aapbEdgeFilter[EDGE_VER].byte_size() );
+      memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) );
+      memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) );
+      memset( m_transformEdge, false, sizeof(m_transformEdge) );
+      m_ctuXLumaSamples = x << pcv.maxCUWidthLog2;
+      m_ctuYLumaSamples = y << pcv.maxCUHeightLog2;
 
       const UnitArea ctuArea( pcv.chrFormat, Area( x << pcv.maxCUWidthLog2, y << pcv.maxCUHeightLog2, pcv.maxCUWidth, pcv.maxCUWidth ) );
+      CodingUnit* firstCU = cs.getCU( ctuArea.lumaPos(), CH_L);
+      cs.slice = firstCU->slice;
 
       // CU-based deblocking
       for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_L ), CH_L ) )
@@ -188,6 +188,9 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
       {
         memset( m_aapucBS       [EDGE_VER].data(), 0,     m_aapucBS       [EDGE_VER].byte_size() );
         memset( m_aapbEdgeFilter[EDGE_VER].data(), false, m_aapbEdgeFilter[EDGE_VER].byte_size() );
+        memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) );
+        memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) );
+        memset( m_transformEdge, false, sizeof(m_transformEdge) );
 
         for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_C ), CH_C ) )
         {
@@ -204,8 +207,15 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
     {
       memset( m_aapucBS       [EDGE_HOR].data(), 0,     m_aapucBS       [EDGE_HOR].byte_size() );
       memset( m_aapbEdgeFilter[EDGE_HOR].data(), false, m_aapbEdgeFilter[EDGE_HOR].byte_size() );
+      memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) );
+      memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) );
+      memset( m_transformEdge, false, sizeof(m_transformEdge) );
+      m_ctuXLumaSamples = x << pcv.maxCUWidthLog2;
+      m_ctuYLumaSamples = y << pcv.maxCUHeightLog2;
 
       const UnitArea ctuArea( pcv.chrFormat, Area( x << pcv.maxCUWidthLog2, y << pcv.maxCUHeightLog2, pcv.maxCUWidth, pcv.maxCUWidth ) );
+      CodingUnit* firstCU = cs.getCU( ctuArea.lumaPos(), CH_L);
+      cs.slice = firstCU->slice;
 
       // CU-based deblocking
       for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_L ), CH_L ) )
@@ -217,6 +227,9 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
       {
         memset( m_aapucBS       [EDGE_HOR].data(), 0,     m_aapucBS       [EDGE_HOR].byte_size() );
         memset( m_aapbEdgeFilter[EDGE_HOR].data(), false, m_aapbEdgeFilter[EDGE_HOR].byte_size() );
+        memset( m_maxFilterLengthP, 0, sizeof(m_maxFilterLengthP) );
+        memset( m_maxFilterLengthQ, 0, sizeof(m_maxFilterLengthQ) );
+        memset( m_transformEdge, false, sizeof(m_transformEdge) );
 
         for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, CH_C ), CH_C ) )
         {
@@ -250,23 +263,49 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
   const PreCalcValues& pcv = *cu.cs->pcv;
   const Area area          = cu.Y().valid() ? cu.Y() : Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) );
 
+  bool horEdgeFilter = false, verEdgeFilter = false;
+  int  numHorVirBndry = 0, numVerVirBndry = 0;
+  int  horVirBndryPos[] = { 0, 0, 0 };
+  int  verVirBndryPos[] = { 0, 0, 0 };
+
+  bool isCuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries( area.x, area.y, area.width, area.height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cu.cs->picHeader );
+
   xSetLoopfilterParam( cu );
-  bool implicitTU = false;
-  for( auto &currTU : CU::traverseTUs( cu ) )
+  static_vector<int, 2*MAX_CU_SIZE> edgeIdx;
+  edgeIdx.clear();
+
+  if (m_enc)
   {
-    const Area& areaTu    = cu.Y().valid() ? currTU.block( COMPONENT_Y ) : area;
-    const bool xOff = currTU.blocks[cu.chType].x != cu.blocks[cu.chType].x;
-    const bool yOff = currTU.blocks[cu.chType].y != cu.blocks[cu.chType].y;
-    if ((yOff != 0) && (edgeDir == EDGE_HOR))
+    m_shiftHor = ::getComponentScaleX(COMPONENT_Cb, cu.chromaFormat);
+    m_shiftVer = ::getComponentScaleY(COMPONENT_Cb, cu.chromaFormat);
+    int x, y;
+    if (cu.Y().valid())
     {
-      implicitTU = true;
+      x = cu.block(COMPONENT_Y).x;
+      y = cu.block(COMPONENT_Y).y;
     }
-    if ((xOff != 0) && (edgeDir == EDGE_VER))
+    else
+    {
+      x = cu.block(COMPONENT_Cb).x << m_shiftHor;
+      y = cu.block(COMPONENT_Cb).y << m_shiftVer;
+    }
+    m_ctuXLumaSamples = x & ~(cu.slice->getSPS()->getMaxCUWidth()  - 1);
+    m_ctuYLumaSamples = y & ~(cu.slice->getSPS()->getMaxCUHeight() - 1);
+  }
+
+  for( auto &currTU : CU::traverseTUs( cu ) )
+  {
+    const Area& areaTu    = cu.Y().valid() ? currTU.block( COMPONENT_Y ) : area;
+    verEdgeFilter = m_stLFCUParam.internalEdge;
+    horEdgeFilter = m_stLFCUParam.internalEdge;
+    if( isCuCrossedByVirtualBoundaries )
     {
-      implicitTU = true;
+      xDeriveEdgefilterParam( areaTu.x, areaTu.y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter );
     }
-    xSetEdgefilterMultiple( cu, EDGE_VER, areaTu, m_stLFCUParam.internalEdge );
-    xSetEdgefilterMultiple( cu, EDGE_HOR, areaTu, m_stLFCUParam.internalEdge );
+    xSetEdgefilterMultiple( cu, EDGE_VER, areaTu, verEdgeFilter );
+    xSetEdgefilterMultiple( cu, EDGE_HOR, areaTu, horEdgeFilter );
+    xSetMaxFilterLengthPQFromTransformSizes( edgeDir, cu, currTU );
+    edgeIdx.push_back( ( edgeDir == EDGE_HOR ) ? ( currTU.blocks[cu.chType].y - cu.blocks[cu.chType].y ) / 4 : ( currTU.blocks[cu.chType].x - cu.blocks[cu.chType].x ) / 4 );
   }
 
   bool mvSubBlocks = false;
@@ -277,8 +316,16 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
     const bool xOff    = currPU.blocks[cu.chType].x != cu.blocks[cu.chType].x;
     const bool yOff    = currPU.blocks[cu.chType].y != cu.blocks[cu.chType].y;
 
-    xSetEdgefilterMultiple( cu, EDGE_VER, areaPu, (xOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.leftEdge), xOff );
-    xSetEdgefilterMultiple( cu, EDGE_HOR, areaPu, (yOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.topEdge),  yOff );
+    verEdgeFilter = (xOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.leftEdge);
+    horEdgeFilter = (yOff ? m_stLFCUParam.internalEdge : m_stLFCUParam.topEdge);
+    if( isCuCrossedByVirtualBoundaries )
+    {
+      xDeriveEdgefilterParam( areaPu.x, areaPu.y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter );
+    }
+
+    xSetEdgefilterMultiple( cu, EDGE_VER, areaPu, verEdgeFilter, xOff );
+    xSetEdgefilterMultiple( cu, EDGE_HOR, areaPu, horEdgeFilter, yOff );
+    edgeIdx.push_back( ( edgeDir == EDGE_HOR ) ? ( currPU.blocks[cu.chType].y - cu.blocks[cu.chType].y ) / 4 : ( currPU.blocks[cu.chType].x - cu.blocks[cu.chType].x ) / 4 );
 
     if ((currPU.mergeFlag && (currPU.mergeType == MRG_TYPE_SUBPU_ATMVP)) || cu.affine)
     {
@@ -288,7 +335,14 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
         for (uint32_t off = subBlockSize; off < areaPu.height; off += subBlockSize)
         {
           const Area mvBlockH(cu.Y().x, cu.Y().y + off, cu.Y().width, pcv.minCUHeight);
-          xSetEdgefilterMultiple(cu, EDGE_HOR, mvBlockH, m_stLFCUParam.internalEdge, 1);
+          horEdgeFilter = m_stLFCUParam.internalEdge;
+          if( isCuCrossedByVirtualBoundaries )
+          {
+            xDeriveEdgefilterParam( mvBlockH.x, mvBlockH.y, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter );
+          }
+
+          xSetEdgefilterMultiple(cu, EDGE_HOR, mvBlockH, horEdgeFilter, 1);
+          edgeIdx.push_back( ( currPU.blocks[cu.chType].y + off - cu.blocks[cu.chType].y ) / 4 );
         }
       }
       else
@@ -296,34 +350,19 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
         for (uint32_t off = subBlockSize; off < areaPu.width; off += subBlockSize)
         {
           const Area mvBlockV(cu.Y().x + off, cu.Y().y, pcv.minCUWidth, cu.Y().height);
-          xSetEdgefilterMultiple(cu, EDGE_VER, mvBlockV, m_stLFCUParam.internalEdge, 1);
+          verEdgeFilter = m_stLFCUParam.internalEdge;
+          if( isCuCrossedByVirtualBoundaries )
+          {
+            xDeriveEdgefilterParam( mvBlockV.x, mvBlockV.y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos, verEdgeFilter, horEdgeFilter );
+          }
+
+          xSetEdgefilterMultiple(cu, EDGE_VER, mvBlockV, verEdgeFilter, 1);
+          edgeIdx.push_back( ( currPU.blocks[cu.chType].x + off - cu.blocks[cu.chType].x ) / 4 );
         }
       }
     }
-  }
-  if (cu.firstPU->mhIntraFlag)
-  {
-    const uint32_t dirMode = PU::getFinalIntraMode(*(cu.firstPU), cu.chType);
-    if (edgeDir == EDGE_VER && dirMode == HOR_IDX)
-    {
-      mvSubBlocks = true;
-      subBlockSize = std::max(8u, (area.width >> 2));
-      for (uint32_t off = subBlockSize; off < area.width; off += subBlockSize)
-      {
-        const Area mvBlockV(cu.Y().x + off, cu.Y().y, pcv.minCUWidth, cu.Y().height);
-        xSetEdgefilterMultiple(cu, EDGE_VER, mvBlockV, m_stLFCUParam.internalEdge, 1);
-      }
-    }
-    else if (edgeDir == EDGE_HOR && dirMode == VER_IDX)
-    {
-      mvSubBlocks = true;
-      subBlockSize = std::max(8u, (area.height >> 2));
-      for (uint32_t off = subBlockSize; off < area.height; off += subBlockSize)
-      {
-        const Area mvBlockH(cu.Y().x, cu.Y().y + off, cu.Y().width, pcv.minCUHeight);
-        xSetEdgefilterMultiple(cu, EDGE_HOR, mvBlockH, m_stLFCUParam.internalEdge, 1);
-      }
-    }
+
+    xSetMaxFilterLengthPQForCodingSubBlocks( edgeDir, cu, currPU, mvSubBlocks, subBlockSize, areaPu );
   }
 
   const unsigned uiPelsInPart = pcv.minCUWidth;
@@ -343,124 +382,248 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
     }
   }
 
-  if (edgeDir == EDGE_HOR)
-  {
-    if (!((cu.block(COMPONENT_Y).y % 8) == 0))
-      return;
-  }
-  else
-  {
-    if (!((cu.block(COMPONENT_Y).x % 8) == 0))
-      return;
-  }
 
-  unsigned int orthogonalLength = 1;
-  unsigned int orthogonalIncrement = 1;
-#if FIX_DB_MAX_TRANSFORM_SIZE
-#if MAX_TB_SIZE_SIGNALLING
-  const int maxTsize = cu.slice->getSPS()->getMaxTbSize();
-#else
-  const int maxTsize = MAX_TB_SIZEY;
-#endif
-#endif
-#if FIX_DB_MAX_TRANSFORM_SIZE
-  int maxFilterLengthQ = 7;
-  int maxFilterLengthP = 7;
-  if (implicitTU && maxTsize < 32)
-  {
-    maxFilterLengthQ = 3;
-    maxFilterLengthP = 3;
-  }
-#else
-  int maxFilterLength = 7;
-#endif
-  if (cu.blocks[COMPONENT_Y].valid())
+  std::sort( edgeIdx.begin(), edgeIdx.end() );
+  int prevEdgeIdx = -1;
+  for ( const int& edge : edgeIdx )
   {
-    if (mvSubBlocks)
+    if ( edge == prevEdgeIdx ) // skip duplicate edgeIdx marked by both transform and coding subblock processes
     {
-#if FIX_DB_MAX_TRANSFORM_SIZE
-      maxFilterLengthQ = std::min(maxFilterLengthQ, 5);
-#else
-      maxFilterLength = 5;
-#endif
-      orthogonalIncrement = subBlockSize / 4;
-      orthogonalLength = (edgeDir == EDGE_HOR) ? cu.blocks[COMPONENT_Y].height / 4 : cu.blocks[COMPONENT_Y].width / 4;
+      continue;
     }
-#if FIX_DB_MAX_TRANSFORM_SIZE
-    if ((cu.blocks[COMPONENT_Y].height > maxTsize) && (edgeDir == EDGE_HOR) && !mvSubBlocks)
+    prevEdgeIdx = edge;
+
+    if ( cu.blocks[COMPONENT_Y].valid() )
     {
-      orthogonalIncrement = maxTsize / 4;
-      orthogonalLength = cu.blocks[COMPONENT_Y].height / 4;
+      xEdgeFilterLuma( cu, edgeDir, edge );
     }
-    if ((cu.blocks[COMPONENT_Y].width > maxTsize) && (edgeDir == EDGE_VER) && !mvSubBlocks)
+    if ( cu.blocks[COMPONENT_Cb].valid() && pcv.chrFormat != CHROMA_400 )
     {
-      orthogonalIncrement = maxTsize / 4;
-      orthogonalLength = cu.blocks[COMPONENT_Y].width / 4;
+      if ( !cu.ispMode || edge == 0 )
+      {
+        xEdgeFilterChroma( cu, edgeDir, edge );
+      }
+    }
+  }
+}
 
+inline bool LoopFilter::isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader )
+{
+  numHorVirBndry = 0; numVerVirBndry = 0;
+  if (picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag())
+  {
+    for (int i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++)
+    {
+      if (yPos <= picHeader->getVirtualBoundariesPosY(i) && picHeader->getVirtualBoundariesPosY(i) < yPos + height)
+      {
+        horVirBndryPos[numHorVirBndry++] = picHeader->getVirtualBoundariesPosY(i);
+      }
     }
-#else
-    if ((cu.blocks[COMPONENT_Y].height > 64) && (edgeDir == EDGE_HOR) && !mvSubBlocks)
+    for (int i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++)
     {
-      orthogonalIncrement = 64 / 4;
-      orthogonalLength = cu.blocks[COMPONENT_Y].height / 4;
+      if (xPos <= picHeader->getVirtualBoundariesPosX(i) && picHeader->getVirtualBoundariesPosX(i) < xPos + width)
+      {
+        verVirBndryPos[numVerVirBndry++] = picHeader->getVirtualBoundariesPosX(i);
+      }
     }
-    if ((cu.blocks[COMPONENT_Y].width > 64) && (edgeDir == EDGE_VER) && !mvSubBlocks)
+  }
+  return numHorVirBndry > 0 || numVerVirBndry > 0;
+}
+
+inline void LoopFilter::xDeriveEdgefilterParam( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter )
+{
+  for (int i = 0; i < numVerVirBndry; i++)
+  {
+    if (verVirBndryPos[i] == xPos)
     {
-      orthogonalIncrement = 64 / 4;
-      orthogonalLength = cu.blocks[COMPONENT_Y].width / 4;
+      verEdgeFilter = false;
+      break;
+    }
+  }
 
+  for (int i = 0; i < numHorVirBndry; i++)
+  {
+    if (horVirBndryPos[i] == yPos)
+    {
+      horEdgeFilter = false;
+      break;
     }
-#endif
   }
+}
 
-  for (int edge = 0; edge < orthogonalLength; edge += orthogonalIncrement)
+void LoopFilter::xSetMaxFilterLengthPQFromTransformSizes( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const TransformUnit& currTU )
+{
+  const TransformUnit& tuQ = currTU;
+
+  if ( edgeDir == EDGE_HOR )
   {
-    if (cu.blocks[COMPONENT_Y].valid())
+    for ( int cIdx = 0; cIdx < MAX_NUM_COMPONENT; cIdx++ ) // per component
     {
-      if (edge == 0)
-      {
-#if FIX_DB_MAX_TRANSFORM_SIZE
-        xEdgeFilterLuma(cu, edgeDir, edge, maxFilterLengthP, maxFilterLengthQ);
-#else
-        xEdgeFilterLuma(cu, edgeDir, edge, 7, maxFilterLength);
-#endif
-      }
-      else
+      const ComponentID comp = ComponentID(cIdx);
+      const ChannelType ch   = toChannelType(comp);
+      const int shiftHor     = ( ( ch == CH_L ) ? 0 : m_shiftHor );
+      const int shiftVer     = ( ( ch == CH_L ) ? 0 : m_shiftVer );
+      const int ctuXOff      = currTU.block(comp).x - ( m_ctuXLumaSamples >> shiftHor ); // x offset from left edge of CTU in respective channel sample units
+      const int ctuYOff      = currTU.block(comp).y - ( m_ctuYLumaSamples >> shiftVer ); // y offset from top edge of CTU in respective channel sample units
+      const int minCUWidth   = cu.cs->pcv->minCUWidth >> shiftHor;
+      if ( currTU.block(comp).valid() && ( ( currTU.block(comp).y == cu.block(comp).y ) ? m_stLFCUParam.topEdge : m_stLFCUParam.internalEdge ) ) // Edge deblocking needs to be recomputed since ISP contains whole CU chroma transforms in last TU of the CU
       {
-#if FIX_DB_MAX_TRANSFORM_SIZE
-        if (implicitTU && ((edge % (maxTsize / 4)) == 0))
-#else
-        if ( implicitTU && (edge == (64 / 4)) )
-#endif
+        for ( int x = 0; x < currTU.blocks[cIdx].width; x += minCUWidth )
         {
-#if FIX_DB_MAX_TRANSFORM_SIZE
-          xEdgeFilterLuma(cu, edgeDir, edge, maxFilterLengthQ, maxFilterLengthQ);
-#else
-          xEdgeFilterLuma(cu, edgeDir, edge, maxFilterLength, maxFilterLength);
-#endif
+          const Position  posQ     = Position( currTU.blocks[ch].x + x, currTU.blocks[ch].y );
+          const Position  posP     = posQ.offset( 0, -1 );
+          const int sizeQSide      = tuQ.block(comp).height;
+          const TransformUnit& tuP = *cu.cs->getTU( posP, ch );
+          const int sizePSide      = tuP.block(comp).height;
+          m_transformEdge[cIdx][ctuXOff+x][ctuYOff] = true;
+
+          if ( comp == COMPONENT_Y )
+          {
+            bool smallBlock = (sizePSide <= 4) || (sizeQSide <= 4);
+            if (smallBlock)
+            {
+              m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff] = 1;
+              m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff] = 1;
+            }
+            else
+            {
+              m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff] = (sizeQSide >= 32) ? 7 : 3;
+              m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff] = (sizePSide >= 32) ? 7 : 3;
+            }
+          }
+          else
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1;
+            m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1;
+          }
         }
-#if FIX_DB_MAX_TRANSFORM_SIZE
-        else if ((edge == 2 || edge == (orthogonalLength - 2)) || (implicitTU && (((edge - 2) % ((maxTsize) / 4) == 0) || ((edge + 2) % ((maxTsize) / 4) == 0))))
-#else
-        else if ( (edge == 2 || edge == (orthogonalLength - 2)) || (implicitTU && (edge == (56 / 4) || edge == (72 / 4))) )
-#endif
+      }
+    }
+  }
+  if ( edgeDir == EDGE_VER )
+  {
+    for ( int cIdx = 0; cIdx < MAX_NUM_COMPONENT; cIdx++ ) // per component
+    {
+      const ComponentID comp = ComponentID(cIdx);
+      const ChannelType ch   = toChannelType(comp);
+      const int shiftHor     = ( ( ch == CH_L ) ? 0 : m_shiftHor );
+      const int shiftVer     = ( ( ch == CH_L ) ? 0 : m_shiftVer );
+      const int ctuXOff      = currTU.block(comp).x - ( m_ctuXLumaSamples >> shiftHor ); // x offset from left edge of CTU in respective channel sample units
+      const int ctuYOff      = currTU.block(comp).y - ( m_ctuYLumaSamples >> shiftVer ); // y offset from top edge of CTU in respective channel sample units
+      const int minCUHeight  = cu.cs->pcv->minCUHeight >> shiftVer;
+      if ( currTU.block(comp).valid() && ( ( currTU.block(comp).x == cu.block(comp).x ) ? m_stLFCUParam.leftEdge : m_stLFCUParam.internalEdge ) ) // Edge deblocking needs to be recomputed since ISP contains whole CU chroma transforms in last TU of the CU
+      {
+        for ( int y = 0; y < currTU.blocks[cIdx].height; y += minCUHeight )
         {
-          xEdgeFilterLuma(cu, edgeDir, edge, 2, 2);
+          const Position  posQ     = Position( currTU.blocks[ch].x, currTU.blocks[ch].y + y );
+          const Position  posP     = posQ.offset( -1, 0 );
+          const int sizeQSide      = tuQ.block(comp).width;
+          const TransformUnit& tuP = *cu.cs->getTU( posP, ch );
+          const int sizePSide      = tuP.block(comp).width;
+          m_transformEdge[cIdx][ctuXOff][ctuYOff+y] = true;
+
+          if ( comp == COMPONENT_Y )
+          {
+            bool smallBlock = (sizePSide <= 4) || (sizeQSide <= 4);
+            if (smallBlock)
+            {
+              m_maxFilterLengthQ[cIdx][ctuXOff][ctuYOff + y] = 1;
+              m_maxFilterLengthP[cIdx][ctuXOff][ctuYOff + y] = 1;
+            }
+            else
+            {
+              m_maxFilterLengthQ[cIdx][ctuXOff][ctuYOff + y] = (sizeQSide >= 32) ? 7 : 3;
+              m_maxFilterLengthP[cIdx][ctuXOff][ctuYOff + y] = (sizePSide >= 32) ? 7 : 3;
+            }
+          }
+          else
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff][ctuYOff+y] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1;
+            m_maxFilterLengthP[cIdx][ctuXOff][ctuYOff+y] = ( sizeQSide >= 8 && sizePSide >= 8 ) ? 3 : 1;
+          }
         }
-        else
+      }
+    }
+  }
+}
+
+void LoopFilter::xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const PredictionUnit& currPU, const bool& mvSubBlocks, const int& subBlockSize, const Area& areaPu )
+{
+  if ( mvSubBlocks && currPU.Y().valid() )
+  {
+    const int cIdx         = 0;
+    const ComponentID comp = ComponentID(cIdx);
+    const int ctuYOff      = currPU.block(comp).y - m_ctuYLumaSamples; // y offset from top edge of CTU in luma samples
+    const int ctuXOff      = currPU.block(comp).x - m_ctuXLumaSamples; // x offset from left edge of CTU in luma samples
+    const int minCUWidth   = cu.cs->pcv->minCUWidth;
+    const int minCUHeight  = cu.cs->pcv->minCUHeight;
+    if ( edgeDir == EDGE_HOR )
+    {
+      for ( int y = 0; y < areaPu.height; y += subBlockSize )
+      {
+        for ( int x = 0; x < areaPu.width; x += minCUWidth )
         {
-          xEdgeFilterLuma(cu, edgeDir, edge, 3, 3);
+          if ( m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y] )
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y], 5);
+            if ( y > 0 )
+            {
+              m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y], 5);
+            }
+          }
+          else if (y > 0 && (m_transformEdge[cIdx][ctuXOff + x][ctuYOff + y - 4] || ((y + 4) >= areaPu.height) || m_transformEdge[cIdx][ctuXOff + x][ctuYOff + y + 4])) // adjacent to transform edge  +/- 4
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff + y] = 1;
+            m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff + y] = 1;
+          }
+          else if (y > 0 && ( m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y-8] || (( y + 8 ) >= areaPu.height) || m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y+8] )) // adjacent to transform edge on 8x8 grid
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 2;
+            m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 2;
+          }
+          else
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 3;
+            m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 3;
+          }
         }
       }
     }
-    if (cu.blocks[COMPONENT_Cb].valid() && pcv.chrFormat != CHROMA_400)
+    else // edgeDir == EDGE_VER
     {
-      xEdgeFilterChroma(cu, edgeDir, edge);
+      for ( int x = 0; x < areaPu.width; x += subBlockSize )
+      {
+        for ( int y = 0; y < areaPu.height; y += minCUHeight )
+        {
+          if ( m_transformEdge[cIdx][ctuXOff+x][ctuYOff+y] )
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y], 5);
+            if ( x > 0 )
+            {
+              m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = std::min<int>(m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y], 5);
+            }
+          }
+          else if (x > 0 && (m_transformEdge[cIdx][ctuXOff + x - 4][ctuYOff + y] || ((x + 4) >= areaPu.width) || m_transformEdge[cIdx][ctuXOff + x + 4][ctuYOff + y])) // adjacent to transform edge +/- 4
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff + x][ctuYOff + y] = 1;
+            m_maxFilterLengthP[cIdx][ctuXOff + x][ctuYOff + y] = 1;
+          }
+          else if ( x > 0 && ( m_transformEdge[cIdx][ctuXOff+x-8][ctuYOff+y] || ( (x + 8) >= areaPu.width ) || m_transformEdge[cIdx][ctuXOff+x+8][ctuYOff+y] ) ) // adjacent to transform edge on 8x8 grid
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 2;
+            m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 2;
+          }
+          else
+          {
+            m_maxFilterLengthQ[cIdx][ctuXOff+x][ctuYOff+y] = 3;
+            m_maxFilterLengthP[cIdx][ctuXOff+x][ctuYOff+y] = 3;
+          }
+        }
+      }
     }
   }
 }
 
-
 void LoopFilter::xSetEdgefilterMultiple( const CodingUnit&    cu,
                                          const DeblockEdgeDir edgeDir,
                                          const Area&          area,
@@ -476,9 +639,16 @@ void LoopFilter::xSetEdgefilterMultiple( const CodingUnit&    cu,
   for( int ui = 0; ui < uiNumElem; ui++ )
   {
     m_aapbEdgeFilter[edgeDir][uiBsIdx] = bValue;
-    if( ! EdgeIdx )
+    if ( m_aapucBS[edgeDir][uiBsIdx] && bValue ) 
     {
-      m_aapucBS[edgeDir][uiBsIdx] = bValue;
+      m_aapucBS[edgeDir][uiBsIdx] = 3;  // both the TU and PU edge
+    }
+    else 
+    {
+      if( ! EdgeIdx )
+      {
+        m_aapucBS[edgeDir][uiBsIdx] = bValue;
+      }
     }
     uiBsIdx += uiAdd;
   }
@@ -486,9 +656,7 @@ void LoopFilter::xSetEdgefilterMultiple( const CodingUnit&    cu,
 void LoopFilter::xSetLoopfilterParam( const CodingUnit& cu )
 {
   const Slice& slice = *cu.slice;
-#if HEVC_TILES_WPP
   const PPS&   pps   = *cu.cs->pps;
-#endif
 
   if( slice.getDeblockingFilterDisable() )
   {
@@ -499,14 +667,8 @@ void LoopFilter::xSetLoopfilterParam( const CodingUnit& cu )
   const Position& pos = cu.blocks[cu.chType].pos();
 
   m_stLFCUParam.internalEdge = true;
-#if HEVC_TILES_WPP
-  m_stLFCUParam.leftEdge     = ( 0 < pos.x ) && isAvailableLeft ( cu, *cu.cs->getCU( pos.offset( -1,  0 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() );
-  m_stLFCUParam.topEdge      = ( 0 < pos.y ) && isAvailableAbove( cu, *cu.cs->getCU( pos.offset(  0, -1 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() );
-#else
-  m_stLFCUParam.leftEdge     = ( 0 < pos.x ) && isAvailable ( cu, *cu.cs->getCU( pos.offset( -1,  0 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag());
-  m_stLFCUParam.topEdge      = ( 0 < pos.y ) && isAvailable ( cu, *cu.cs->getCU( pos.offset(  0, -1 ), cu.chType ), !slice.getLFCrossSliceBoundaryFlag());
-#endif
-  m_stLFCUParam.internalEdge &= !cu.ispMode;
+  m_stLFCUParam.leftEdge     = ( 0 < pos.x ) && isAvailableLeft ( cu, *cu.cs->getCU( pos.offset( -1,  0 ), cu.chType ), !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() );
+  m_stLFCUParam.topEdge      = ( 0 < pos.y ) && isAvailableAbove( cu, *cu.cs->getCU( pos.offset(  0, -1 ), cu.chType ), !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() );
 }
 
 unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const Position& localPos ) const
@@ -523,17 +685,20 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
   const CodingUnit& cuQ = cu;
   const CodingUnit& cuP = *cu.cs->getCU( posP, cu.chType );
 
+
   //-- Set BS for Intra MB : BS = 4 or 3
   if( ( MODE_INTRA == cuP.predMode ) || ( MODE_INTRA == cuQ.predMode ) )
   {
-    return (BsSet(2, COMPONENT_Y) + BsSet(2, COMPONENT_Cb) + BsSet(2, COMPONENT_Cr));
+    int bsY = (MODE_INTRA == cuP.predMode && cuP.bdpcmMode) && (MODE_INTRA == cuQ.predMode && cuQ.bdpcmMode) ? 0 : 2;
+    int bsC = (MODE_INTRA == cuP.predMode && cuP.bdpcmModeChroma) && (MODE_INTRA == cuQ.predMode && cuQ.bdpcmModeChroma) ? 0 : 2;
+    return (BsSet(bsY, COMPONENT_Y) + BsSet(bsC, COMPONENT_Cb) + BsSet(bsC, COMPONENT_Cr));
   }
 
   const TransformUnit& tuQ = *cuQ.cs->getTU(posQ, cuQ.chType);
-  const TransformUnit& tuP = *cuP.cs->getTU(posP, cuP.chType);
+  const TransformUnit& tuP = *cuP.cs->getTU(posP, cuQ.chType); 
   const PreCalcValues& pcv = *cu.cs->pcv;
   const unsigned rasterIdx = getRasterIdx( Position{ localPos.x,  localPos.y }, pcv );
-  if (m_aapucBS[edgeDir][rasterIdx] && (cuP.firstPU->mhIntraFlag || cuQ.firstPU->mhIntraFlag))
+  if (m_aapucBS[edgeDir][rasterIdx] && (cuP.firstPU->ciipFlag || cuQ.firstPU->ciipFlag))
   {
      return (BsSet(2, COMPONENT_Y) + BsSet(2, COMPONENT_Cb) + BsSet(2, COMPONENT_Cr));
   }
@@ -546,12 +711,12 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
     tmpBs += BsSet(1, COMPONENT_Y);
   }
   // U
-  if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cb) || TU::getCbf(tuP, COMPONENT_Cb)))
+  if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cb) || TU::getCbf(tuP, COMPONENT_Cb) || tuQ.jointCbCr || tuP.jointCbCr))
   {
     tmpBs += BsSet(1, COMPONENT_Cb);
   }
   // V
-  if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cr) || TU::getCbf(tuP, COMPONENT_Cr)))
+  if (m_aapucBS[edgeDir][rasterIdx] && (TU::getCbf(tuQ, COMPONENT_Cr) || TU::getCbf(tuP, COMPONENT_Cr) || tuQ.jointCbCr || tuP.jointCbCr))
   {
     tmpBs += BsSet(1, COMPONENT_Cr);
   }
@@ -559,7 +724,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
   {
     return tmpBs;
   }
-  if ((cuP.firstPU->mhIntraFlag || cuQ.firstPU->mhIntraFlag))
+  if ((cuP.firstPU->ciipFlag || cuQ.firstPU->ciipFlag))
   {
     return 1;
   }
@@ -570,6 +735,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
   }
 
   // and now the pred
+  if ( m_aapucBS[edgeDir][rasterIdx] != 0 && m_aapucBS[edgeDir][rasterIdx] != 3 ) return tmpBs;
   const Position& lumaPosQ  = Position{ localPos.x,  localPos.y };
   const Position  lumaPosP  = ( edgeDir == EDGE_VER ) ? lumaPosQ.offset( -1, 0 ) : lumaPosQ.offset( 0, -1 );
   const MotionInfo&     miQ = cuQ.cs->getMotionInfo( lumaPosQ );
@@ -589,7 +755,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
     if( 0 <= miQ.refIdx[0] ) { mvQ0 = miQ.mv[0]; }
     if( 0 <= miQ.refIdx[1] ) { mvQ1 = miQ.mv[1]; }
 
-    int nThreshold = 1 << MV_FRACTIONAL_BITS_INTERNAL;
+    int nThreshold = (1 << MV_FRACTIONAL_BITS_INTERNAL) >> 1;
     unsigned uiBs = 0;
 
     //th can be optimized
@@ -641,7 +807,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
   Mv mvP0 = miP.mv[0];
   Mv mvQ0 = miQ.mv[0];
 
-  int nThreshold = 1 << MV_FRACTIONAL_BITS_INTERNAL;
+  int nThreshold = (1 << MV_FRACTIONAL_BITS_INTERNAL) >> 1;
   return ( ( abs( mvQ0.getHor() - mvP0.getHor() ) >= nThreshold ) || ( abs( mvQ0.getVer() - mvP0.getVer() ) >= nThreshold ) ) ? (tmpBs + 1) : tmpBs;
 }
 
@@ -675,7 +841,7 @@ void LoopFilter::deriveLADFShift( const Pel* src, const int stride, int& shift,
 }
 #endif
 
-void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge, const int initialMaxFilterLengthP, const int initialMaxFilterLengthQ)
+void LoopFilter::xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge )
 {
   const CompArea&  lumaArea = cu.block(COMPONENT_Y);
   const PreCalcValues& pcv = *cu.cs->pcv;
@@ -687,7 +853,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
   const PPS     &pps      = *(cu.cs->pps);
   const SPS     &sps      = *(cu.cs->sps);
   const Slice   &slice    = *(cu.slice);
-  const bool    ppsTransquantBypassEnabledFlag = pps.getTransquantBypassEnabledFlag();
+  const bool    spsPaletteEnabledFlag          = sps.getPLTMode();
   const int     bitDepthLuma                   = sps.getBitDepth(CHANNEL_TYPE_LUMA);
   const ClpRng& clpRng( cu.cs->slice->clpRng(COMPONENT_Y) );
 
@@ -697,7 +863,6 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
   unsigned     uiBsAbsIdx   = 0, uiBs = 0;
   int          iOffset, iSrcStep;
 
-  bool  bPCMFilter      = (sps.getPCMEnabledFlag() && sps.getPCMFilterDisableFlag()) ? true : false;
   bool  bPartPNoFilter  = false;
   bool  bPartQNoFilter  = false;
   int   betaOffsetDiv2  = slice.getDeblockingFilterBetaOffsetDiv2();
@@ -733,6 +898,15 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
     pos.x += xoffset;
     pos.y += yoffset;
 
+    // Deblock luma boundaries on 4x4 grid only
+    if (edgeDir == EDGE_HOR && (pos.y % 4) != 0)
+    {
+      continue;
+    }
+    if (edgeDir == EDGE_VER && (pos.x % 4) != 0)
+    {
+      continue;
+    }
     uiBsAbsIdx = getRasterIdx( pos, pcv );
     uiBs = BsGet(m_aapucBS[edgeDir][uiBsAbsIdx], COMPONENT_Y);
 
@@ -743,19 +917,19 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
       // Derive neighboring PU index
       if (edgeDir == EDGE_VER)
       {
-#if HEVC_TILES_WPP
-        CHECK( !isAvailableLeft( cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ), "Neighbour not available" );
-#else
-        CHECK( !isAvailable( cu, cuP, !slice.getLFCrossSliceBoundaryFlag() ), "Neighbour not available" );
-#endif
+        if (!isAvailableLeft(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()))
+        {
+          m_aapucBS[edgeDir][uiBsAbsIdx] = uiBs = 0;
+          continue;
+        }
       }
       else  // (iDir == EDGE_HOR)
       {
-#if HEVC_TILES_WPP
-        CHECK( !isAvailableAbove( cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag() ), "Neighbour not available" );
-#else
-        CHECK( !isAvailable( cu, cuP, !slice.getLFCrossSliceBoundaryFlag() ), "Neighbour not available" );
-#endif
+        if (!isAvailableAbove(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()))
+        {
+          m_aapucBS[edgeDir][uiBsAbsIdx] = uiBs = 0;
+          continue;
+        }
       }
 
       iQP = (cuP.qp + cuQ.qp + 1) >> 1;
@@ -771,23 +945,15 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
 
       bool sidePisLarge   = false;
       bool sideQisLarge   = false;
-      int maxFilterLengthP = initialMaxFilterLengthP;
-      int maxFilterLengthQ = initialMaxFilterLengthQ;
+      int maxFilterLengthP = m_maxFilterLengthP[COMPONENT_Y][pos.x-m_ctuXLumaSamples][pos.y-m_ctuYLumaSamples];
+      int maxFilterLengthQ = m_maxFilterLengthQ[COMPONENT_Y][pos.x-m_ctuXLumaSamples][pos.y-m_ctuYLumaSamples];
       if (maxFilterLengthP > 3)
       {
-        sidePisLarge = (edgeDir == EDGE_VER && cuP.block(COMPONENT_Y).width >= 32)
-          || (edgeDir == EDGE_HOR && cuP.block(COMPONENT_Y).height >= 32);
-
-        if (sidePisLarge && maxFilterLengthP > 5)
+        sidePisLarge = true;
+        if ( maxFilterLengthP > 5 )
         {
           // restrict filter length if sub-blocks are used (e.g affine or ATMVP)
-          bool ciipSubBlock = false;
-          if (cuP.firstPU->mhIntraFlag)
-          {
-            const uint32_t dirMode = PU::getFinalIntraMode(*(cuP.firstPU), cuP.chType);
-            ciipSubBlock = edgeDir == EDGE_HOR ? dirMode == VER_IDX : dirMode == HOR_IDX;
-          }
-          if (cuP.affine || ciipSubBlock)
+          if (cuP.affine)
           {
             maxFilterLengthP = std::min(maxFilterLengthP, 5);
           }
@@ -795,8 +961,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
       }
       if (maxFilterLengthQ > 3)
       {
-        sideQisLarge = (edgeDir == EDGE_VER && cuQ.block(COMPONENT_Y).width >= 32)
-          || (edgeDir == EDGE_HOR && cuQ.block(COMPONENT_Y).height >= 32);
+        sideQisLarge = true;
       }
 
       if (edgeDir == EDGE_HOR && pos.y % slice.getSPS()->getCTUSize() == 0)
@@ -806,7 +971,7 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
       const int iIndexTC  = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(iQP + DEFAULT_INTRA_TC_OFFSET*(uiBs - 1) + (tcOffsetDiv2 << 1)));
       const int iIndexB   = Clip3(0, MAX_QP, iQP + (betaOffsetDiv2 << 1));
 
-      const int iTc       = sm_tcTable  [iIndexTC] * iBitdepthScale;
+      const int iTc = bitDepthLuma < 10 ? ((sm_tcTable[iIndexTC] + 2) >> (10 - bitDepthLuma)) : ((sm_tcTable[iIndexTC]) << (bitDepthLuma - 10));
       const int iBeta     = sm_betaTable[iIndexB ] * iBitdepthScale;
       const int iSideThreshold = ( iBeta + ( iBeta >> 1 ) ) >> 3;
       const int iThrCut   = iTc * 10;
@@ -846,17 +1011,11 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
           int dL = d0L + d3L;
 
           bPartPNoFilter = bPartQNoFilter = false;
-          if (bPCMFilter)
-          {
-            // Check if each of PUs is I_PCM with LF disabling
-            bPartPNoFilter = cuP.ipcm;
-            bPartQNoFilter = cuQ.ipcm;
-          }
-          if (ppsTransquantBypassEnabledFlag)
+          if (spsPaletteEnabledFlag)
           {
-            // check if each of PUs is lossless coded
-            bPartPNoFilter = bPartPNoFilter || cuP.transQuantBypass;
-            bPartQNoFilter = bPartQNoFilter || cuQ.transQuantBypass;
+            // check if each of PUs is palette coded
+            bPartPNoFilter = bPartPNoFilter || CU::isPLT(cuP);
+            bPartQNoFilter = bPartQNoFilter || CU::isPLT(cuQ);
           }
 
           if (dL < iBeta)
@@ -891,23 +1050,22 @@ void LoopFilter::xEdgeFilterLuma(const CodingUnit& cu, const DeblockEdgeDir edge
         const int d  = d0  + d3;
 
         bPartPNoFilter = bPartQNoFilter = false;
-        if( bPCMFilter )
-        {
-          // Check if each of PUs is I_PCM with LF disabling
-          bPartPNoFilter = cuP.ipcm;
-          bPartQNoFilter = cuQ.ipcm;
-        }
-        if( ppsTransquantBypassEnabledFlag )
+        if( spsPaletteEnabledFlag)
         {
-          // check if each of PUs is lossless coded
-          bPartPNoFilter = bPartPNoFilter || cuP.transQuantBypass;
-          bPartQNoFilter = bPartQNoFilter || cuQ.transQuantBypass;
+          // check if each of PUs is palette coded
+          bPartPNoFilter = bPartPNoFilter || CU::isPLT(cuP);
+          bPartQNoFilter = bPartQNoFilter || CU::isPLT(cuQ);
         }
 
         if( d < iBeta )
         {
-          const bool bFilterP = (dp < iSideThreshold);
-          const bool bFilterQ = (dq < iSideThreshold);
+          bool bFilterP = false;
+          bool bFilterQ = false;
+          if (maxFilterLengthP > 1 && maxFilterLengthQ > 1)
+          {
+            bFilterP = (dp < iSideThreshold);
+            bFilterQ = (dq < iSideThreshold);
+          }
           bool sw = false;
           if (maxFilterLengthP > 2 && maxFilterLengthQ > 2)
           {
@@ -948,7 +1106,6 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
   int       iOffset, iSrcStep;
   unsigned  uiLoopLength;
 
-  bool      bPCMFilter      = (sps.getPCMEnabledFlag() && sps.getPCMFilterDisableFlag()) ? true : false;
   bool      bPartPNoFilter  = false;
   bool      bPartQNoFilter  = false;
   const int tcOffsetDiv2    = slice.getDeblockingFilterTcOffsetDiv2();
@@ -1018,49 +1175,38 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
     if (bS[0] > 0 || bS[1] > 0)
     {
       const CodingUnit& cuQ =  cu;
-      const CodingUnit& cuP = *cu.cs->getCU( recalcPosition( cu.chromaFormat, CHANNEL_TYPE_LUMA, cu.chType, pos.offset( xoffset - uiNumPelsLuma, yoffset - uiNumPelsLuma ) ), cu.chType );
+      CodingUnit& cuP1 = *cu.cs->getCU( recalcPosition( cu.chromaFormat, CHANNEL_TYPE_LUMA, cu.chType, pos.offset( xoffset - uiNumPelsLuma, yoffset - uiNumPelsLuma ) ), cu.chType );
+      CodingUnit& cuP  = *cu.cs->getCU( recalcPosition( cu.chromaFormat, CHANNEL_TYPE_LUMA, (cuP1.isSepTree() ? CHANNEL_TYPE_CHROMA : cu.chType), pos.offset( xoffset - uiNumPelsLuma, yoffset - uiNumPelsLuma ) ), (cuP1.isSepTree() ? CHANNEL_TYPE_CHROMA : cu.chType));
 
       if (edgeDir == EDGE_VER)
       {
-#if HEVC_TILES_WPP
-        CHECK(!isAvailableLeft(cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available");
-#else
-        CHECK(!isAvailable(cu, cuP, !slice.getLFCrossSliceBoundaryFlag()), "Neighbour not available");
-#endif
+        CHECK(!isAvailableLeft(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available");
       }
       else  // (iDir == EDGE_HOR)
       {
-#if HEVC_TILES_WPP
-        CHECK(!isAvailableAbove(cu, cuP, !slice.getLFCrossSliceBoundaryFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available");
-#else
-        CHECK(!isAvailable(cu, cuP, !slice.getLFCrossSliceBoundaryFlag()), "Neighbour not available");
-#endif
+        CHECK(!isAvailableAbove(cu, cuP, !pps.getLoopFilterAcrossSlicesEnabledFlag(), !pps.getLoopFilterAcrossTilesEnabledFlag()), "Neighbour not available");
       }
 
       bPartPNoFilter = bPartQNoFilter = false;
-      if (bPCMFilter)
+      if ( sps.getPLTMode())
       {
-        // Check if each of PUs is I_PCM with LF disabling
-        bPartPNoFilter = cuP.ipcm;
-        bPartQNoFilter = cuQ.ipcm;
+        // check if each of PUs is palette coded
+        bPartPNoFilter = bPartPNoFilter || CU::isPLT(cuP);
+        bPartQNoFilter = bPartQNoFilter || CU::isPLT(cuQ);
       }
-      if( pps.getTransquantBypassEnabledFlag() )
+
+      const int maxFilterLengthP = m_maxFilterLengthP[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer];
+      const int maxFilterLengthQ = m_maxFilterLengthQ[COMPONENT_Cb][(pos.x-m_ctuXLumaSamples)>>m_shiftHor][(pos.y-m_ctuYLumaSamples)>>m_shiftVer];
+      bool largeBoundary         = false;
+      bool isChromaHorCTBBoundary = false;
+      if ( maxFilterLengthP >= 3 && maxFilterLengthQ >= 3 )
       {
-        // check if each of PUs is lossless coded
-        bPartPNoFilter = bPartPNoFilter || cuP.transQuantBypass;
-        bPartQNoFilter = bPartQNoFilter || cuQ.transQuantBypass;
+        largeBoundary = true;
       }
 
-      const unsigned cuPWidth  = cuP.block(COMPONENT_Cb).width;
-      const unsigned cuPHeight = cuP.block(COMPONENT_Cb).height;
-      const unsigned cuQWidth  = cuQ.block(COMPONENT_Cb).width;
-      const unsigned cuQHeight = cuQ.block(COMPONENT_Cb).height;
-
-      bool largeBoundary = ((edgeDir == EDGE_VER && cuPWidth >= 8 && cuQWidth >= 8) || (edgeDir == EDGE_HOR && cuPHeight >= 8 && cuQHeight >= 8));
-
       if (edgeDir == EDGE_HOR && pos.y % cuP.slice->getSPS()->getCTUSize() == 0)
       {
-        largeBoundary = false;
+        isChromaHorCTBBoundary = true;
       }
 
       for( int chromaIdx = 0; chromaIdx < 2; chromaIdx++ )
@@ -1068,53 +1214,54 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
         if ((bS[chromaIdx] == 2) || (largeBoundary && (bS[chromaIdx] == 1)))
         {
         const ClpRng& clpRng( cu.cs->slice->clpRng( ComponentID( chromaIdx + 1 )) );
-        const int chromaQPOffset = pps.getQpOffset( ComponentID( chromaIdx + 1 ) );
         Pel* piTmpSrcChroma = (chromaIdx == 0) ? piTmpSrcCb : piTmpSrcCr;
 
-        int iQP = ( ( cuP.qp + cuQ.qp + 1 ) >> 1 ) + chromaQPOffset;
-        if (iQP >= chromaQPMappingTableSize)
-        {
-          if( sps.getChromaFormatIdc() == CHROMA_420 )
-          {
-            iQP -= 6;
-          }
-          else if( iQP > MAX_QP )
-          {
-            iQP = MAX_QP;
-          }
-        }
-        else if( iQP >= 0 )
-        {
-          iQP = getScaledChromaQP(iQP, sps.getChromaFormatIdc());
-        }
+        int shiftHorP = cuP.Y().valid() ? 0 : ::getComponentScaleX(COMPONENT_Cb, cuP.firstPU->chromaFormat);
+        int shiftVerP = cuP.Y().valid() ? 0 : ::getComponentScaleY(COMPONENT_Cb, cuP.firstPU->chromaFormat);
+        int shiftHorQ = cuQ.Y().valid() ? 0 : ::getComponentScaleX(COMPONENT_Cb, cuQ.firstPU->chromaFormat);
+        int shiftVerQ = cuQ.Y().valid() ? 0 : ::getComponentScaleY(COMPONENT_Cb, cuQ.firstPU->chromaFormat);
+        const Position& posQ = Position{ pos.x >> shiftHorQ,  pos.y >> shiftVerQ };
+        const Position& posP1 = Position{ pos.x >> shiftHorP,  pos.y >> shiftVerP };
+        const Position  posP = (edgeDir == EDGE_VER) ? posP1.offset(-1, 0) : posP1.offset(0, -1);
 
-        const int iIndexTC = Clip3<int>(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, iQP + DEFAULT_INTRA_TC_OFFSET * (bS[chromaIdx] - 1) + (tcOffsetDiv2 << 1));
-        const int iTc      = sm_tcTable[iIndexTC] * iBitdepthScale;
+        const TransformUnit& tuQ = *cuQ.cs->getTU(posQ, cuQ.chType);
+        const TransformUnit& tuP = *cuP.cs->getTU(posP, cuP.chType); 
+
+        const QpParam cQP(tuP, ComponentID(chromaIdx + 1));
+        const QpParam cQQ(tuQ, ComponentID(chromaIdx + 1));
+        const int qpBdOffset = tuP.cs->sps->getQpBDOffset(toChannelType(ComponentID(chromaIdx + 1)));
+        int baseQp_P = cQP.Qp(0) - qpBdOffset;
+        int baseQp_Q = cQQ.Qp(0) - qpBdOffset;
+        int iQP = ((baseQp_Q + baseQp_P + 1) >> 1);
 
+
+        const int iIndexTC = Clip3<int>(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, iQP + DEFAULT_INTRA_TC_OFFSET * (bS[chromaIdx] - 1) + (tcOffsetDiv2 << 1));
+        const int iTc = sps.getBitDepth(CHANNEL_TYPE_CHROMA) < 10 ? ((sm_tcTable[iIndexTC] + 2) >> (10 - sps.getBitDepth(CHANNEL_TYPE_CHROMA))) : ((sm_tcTable[iIndexTC]) << (sps.getBitDepth(CHANNEL_TYPE_CHROMA) - 10));
         bool useLongFilter = false;
         if (largeBoundary)
         {
         const int indexB = Clip3<int>(0, MAX_QP, iQP + (betaOffsetDiv2 << 1));
         const int beta = sm_betaTable[indexB] * iBitdepthScale;
 
-        const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset);
+        const int dp0 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, isChromaHorCTBBoundary);
         const int dq0 = xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset);
-        const int dp1 = xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset);
-        const int dq1 = xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset);
+        const int subSamplingShift = ( edgeDir == EDGE_VER ) ? m_shiftVer : m_shiftHor;
+        const int dp3 = (subSamplingShift == 1) ? xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset, isChromaHorCTBBoundary) : xCalcDP(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset, isChromaHorCTBBoundary);
+        const int dq3 = ( subSamplingShift == 1 ) ? xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset) : xCalcDQ(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 3), iOffset);
 
         const int d0 = dp0 + dq0;
-        const int d1 = dp1 + dq1;
-        const int d = d0 + d1;
+        const int d3 = dp3 + dq3;
+        const int d = d0 + d3;
 
           if (d < beta)
           {
             useLongFilter = true;
-            const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc)
-                && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 1), iOffset, 2 * d1, beta, iTc);
+            const bool sw = xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + 0), iOffset, 2 * d0, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary)
+              && xUseStrongFiltering(piTmpSrcChroma + iSrcStep*(iIdx*uiLoopLength + ((subSamplingShift == 1) ? 1 : 3)), iOffset, 2 * d3, beta, iTc, false, false, 7, 7, isChromaHorCTBBoundary);
 
             for (unsigned step = 0; step < uiLoopLength; step++)
             {
-              xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary);
+              xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, sw, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary);
             }
           }
         }
@@ -1122,7 +1269,7 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
         {
           for (unsigned step = 0; step < uiLoopLength; step++)
           {
-            xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary);
+            xPelFilterChroma(piTmpSrcChroma + iSrcStep*(step + iIdx*uiLoopLength), iOffset, iTc, false, bPartPNoFilter, bPartQNoFilter, clpRng, largeBoundary, isChromaHorCTBBoundary);
           }
         }
         }
@@ -1342,7 +1489,7 @@ inline void LoopFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int
  \param bPartQNoFilter  indicator to disable filtering on partQ
  \param bitDepthChroma  chroma bit depth
  */
-inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const
+inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const
 {
   int delta;
 
@@ -1357,12 +1504,22 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i
 
   if (sw)
   {
+    if (isChromaHorCTBBoundary)
+    {
+      piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((3 * m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0
+      piSrc[0] = Clip3(m4 - tc, m4 + tc, ((2 * m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
+      piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
+      piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+    }
+    else
+    {
       piSrc[-iOffset * 3] = Clip3(m1 - tc, m1 + tc, ((3 * m0 + 2 * m1 + m2 + m3 + m4 + 4) >> 3));       // p2
       piSrc[-iOffset * 2] = Clip3(m2 - tc, m2 + tc, ((2 * m0 + m1 + 2 * m2 + m3 + m4 + m5 + 4) >> 3));  // p1
       piSrc[-iOffset * 1] = Clip3(m3 - tc, m3 + tc, ((m0 + m1 + m2 + 2 * m3 + m4 + m5 + m6 + 4) >> 3)); // p0
-      piSrc[0]            = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
-      piSrc[iOffset * 1]  = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
-      piSrc[iOffset * 2]  = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+      piSrc[0] = Clip3(m4 - tc, m4 + tc, ((m1 + m2 + m3 + 2 * m4 + m5 + m6 + m7 + 4) >> 3)); // q0
+      piSrc[iOffset * 1] = Clip3(m5 - tc, m5 + tc, ((m2 + m3 + m4 + 2 * m5 + m6 + 2 * m7 + 4) >> 3));  // q1
+      piSrc[iOffset * 2] = Clip3(m6 - tc, m6 + tc, ((m3 + m4 + m5 + 2 * m6 + 3 * m7 + 4) >> 3));       // q2
+    }
   }
   else
   {
@@ -1401,13 +1558,18 @@ inline void LoopFilter::xPelFilterChroma( Pel* piSrc, const int iOffset, const i
  \param tc              tc value
  \param piSrc           pointer to picture data
  */
-inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ ) const
+inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ, bool isChromaHorCTBBoundary) const
 {
   const Pel m4 = piSrc[ 0          ];
   const Pel m3 = piSrc[-iOffset    ];
   const Pel m7 = piSrc[ iOffset * 3];
   const Pel m0 = piSrc[-iOffset * 4];
-  int       sp3      = abs(m0 - m3);
+  const Pel m2 = piSrc[-iOffset * 2];
+  int       sp3 = abs(m0 - m3);
+  if (isChromaHorCTBBoundary)
+  {
+    sp3 = abs(m2 - m3);
+  }
   int       sq3      = abs(m7 - m4);
   const int d_strong = sp3 + sq3;
 
@@ -1415,6 +1577,41 @@ inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, cons
   {
     Pel mP4;
     Pel m11;
+#if JVET_Q0054
+    if (sidePisLarge)
+    {
+      if (maxFilterLengthP == 7)
+      {
+        const Pel mP5 = piSrc[-iOffset * 5];
+        const Pel mP6 = piSrc[-iOffset * 6];
+        const Pel mP7 = piSrc[-iOffset * 7];;
+        mP4 = piSrc[-iOffset * 8];
+        sp3 = sp3 + abs(mP5 - mP6 - mP7 + mP4);
+      }
+      else
+      {
+        mP4 = piSrc[-iOffset * 6];
+      }
+      sp3 = (sp3 + abs(m0 - mP4) + 1) >> 1;
+    }
+    if (sideQisLarge)
+    {
+      if (maxFilterLengthQ == 7)
+      {
+        const Pel m8 = piSrc[iOffset * 4];
+        const Pel m9 = piSrc[iOffset * 5];
+        const Pel m10 = piSrc[iOffset * 6];;
+        m11 = piSrc[iOffset * 7];
+        sq3 = sq3 + abs(m8 - m9 - m10 + m11);
+      }
+      else
+      {
+        m11 = piSrc[iOffset * 5];
+      }
+      sq3 = (sq3 + abs(m11 - m7) + 1) >> 1;
+  }
+  return ((sp3 + sq3) < (beta*3 >> 5)) && (d < (beta >> 4)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+#else
     if (maxFilterLengthP == 5)
     {
       mP4 = piSrc[-iOffset * 6];
@@ -1441,14 +1638,22 @@ inline bool LoopFilter::xUseStrongFiltering( Pel* piSrc, const int iOffset, cons
       sq3 = (sq3 + abs(m11 - m7) + 1) >> 1;
     }
     return ((sp3 + sq3) < (beta*3 >> 5)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+#endif
   }
   else
   return ( ( d_strong < ( beta >> 3 ) ) && ( d < ( beta >> 2 ) ) && ( abs( m3 - m4 ) < ( ( tc * 5 + 1 ) >> 1 ) ) );
 }
 
-inline int LoopFilter::xCalcDP( Pel* piSrc, const int iOffset ) const
+inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary) const
 {
-  return abs( piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset] );
+  if (isChromaHorCTBBoundary)
+  {
+    return abs(piSrc[-iOffset * 2] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]);
+  }
+  else
+  {
+    return abs(piSrc[-iOffset * 3] - 2 * piSrc[-iOffset * 2] + piSrc[-iOffset]);
+  }
 }
 
 inline int LoopFilter::xCalcDQ( Pel* piSrc, const int iOffset ) const
diff --git a/source/Lib/CommonLib/LoopFilter.h b/source/Lib/CommonLib/LoopFilter.h
index 6ff62b0ad50a49c1558287c041ff436440e2fcc4..b3c916544ef91e1089e8c7a614a0c4822eb2d74c 100644
--- a/source/Lib/CommonLib/LoopFilter.h
+++ b/source/Lib/CommonLib/LoopFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -58,6 +58,11 @@ private:
   static_vector<char, MAX_NUM_PARTS_IN_CTU> m_aapucBS       [NUM_EDGE_DIR];         ///< Bs for [Ver/Hor][Y/U/V][Blk_Idx]
   static_vector<bool, MAX_NUM_PARTS_IN_CTU> m_aapbEdgeFilter[NUM_EDGE_DIR];
   LFCUParam m_stLFCUParam;                   ///< status structure
+  int     m_ctuXLumaSamples, m_ctuYLumaSamples;                            // location of left-edge and top-edge of CTU
+  int     m_shiftHor, m_shiftVer;                                          // shift values to convert location from luma sample units to chroma sample units
+  uint8_t m_maxFilterLengthP[MAX_NUM_COMPONENT][MAX_CU_SIZE][MAX_CU_SIZE]; // maxFilterLengthP for [component][luma/chroma sample distance from left edge of CTU][luma/chroma sample distance from top edge of CTU]
+  uint8_t m_maxFilterLengthQ[MAX_NUM_COMPONENT][MAX_CU_SIZE][MAX_CU_SIZE]; // maxFilterLengthQ for [component][luma/chroma sample distance from left edge of CTU][luma/chroma sample distance from top edge of CTU]
+  bool    m_transformEdge[MAX_NUM_COMPONENT][MAX_CU_SIZE][MAX_CU_SIZE];    // transform edge flag for [component][luma/chroma sample distance from left edge of CTU][luma/chroma sample distance from top edge of CTU]
   PelStorage                   m_encPicYuvBuffer;
   bool                         m_enc;
 private:
@@ -74,24 +79,29 @@ private:
                                     const Area&           area,
                                     const bool            bValue,
                                     const bool            EdgeIdx = false );
-  void xEdgeFilterLuma            ( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge, const int initialMaxFilterLengthP, const int initialMaxFilterLengthQ );
+  void xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge );
   void xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge);
 
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   void deriveLADFShift( const Pel* src, const int stride, int& shift, const DeblockEdgeDir edgeDir, const SPS sps );
 #endif
+  void xSetMaxFilterLengthPQFromTransformSizes( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const TransformUnit& currTU );
+  void xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const PredictionUnit& currPU, const bool& mvSubBlocks, const int& subBlockSize, const Area& areaPu );
 
   inline void xBilinearFilter     ( Pel* srcP, Pel* srcQ, int offset, int refMiddle, int refP, int refQ, int numberPSide, int numberQSide, const int* dbCoeffsP, const int* dbCoeffsQ, int tc ) const;
   inline void xFilteringPandQ     ( Pel* src, int offset, int numberPSide, int numberQSide, int tc ) const;
   inline void xPelFilterLuma      ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const int iThrCut, const bool bFilterSecondP, const bool bFilterSecondQ, const ClpRng& clpRng, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const;
-  inline void xPelFilterChroma    ( Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary ) const;
-  inline bool xUseStrongFiltering ( Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7 ) const;//move the computation outside the function
+  inline void xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const;
+  inline bool xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge = false, bool sideQisLarge = false, int maxFilterLengthP = 7, int maxFilterLengthQ = 7, bool isChromaHorCTBBoundary = false) const;//move the computation outside the function
   inline unsigned BsSet(unsigned val, const ComponentID compIdx) const;
   inline unsigned BsGet(unsigned val, const ComponentID compIdx) const;
 
-  inline int xCalcDP              ( Pel* piSrc, const int iOffset ) const;
+  inline bool isCrossedByVirtualBoundaries ( const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader );
+  inline void xDeriveEdgefilterParam       ( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter );
+
+  inline int xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary = false) const;
   inline int xCalcDQ              ( Pel* piSrc, const int iOffset ) const;
-  static const uint8_t sm_tcTable[MAX_QP + 3];
+  static const uint16_t sm_tcTable[MAX_QP + 3];
   static const uint8_t sm_betaTable[MAX_QP + 1];
 
 public:
diff --git a/source/Lib/CommonLib/MCTS.cpp b/source/Lib/CommonLib/MCTS.cpp
index dd87fb11bdc1c50df48be97763be4e8ba6522ebb..6af615f59d214e42dd0af09f52e0bafc94eb3df1 100644
--- a/source/Lib/CommonLib/MCTS.cpp
+++ b/source/Lib/CommonLib/MCTS.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -90,22 +90,20 @@ void MCTSHelper::clipMvToArea( Mv& rcMv, const Area& block, const Area& clipArea
 
 Area MCTSHelper::getTileArea( const CodingStructure* cs, const int ctuAddr )
 {
-  const TileMap* tileMap = cs->picture->tileMap;
-  const int      tileIdx = tileMap->getTileIdxMap( ctuAddr );
-  const Tile&    currentTile = tileMap->tiles[tileIdx];
-
-  const int      frameWidthInCtus = cs->pcv->widthInCtus;
-  const int  firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr();
-
-  const int tileXPosInCtus = firstCtuRsAddrOfTile % frameWidthInCtus;
-  const int tileYPosInCtus = firstCtuRsAddrOfTile / frameWidthInCtus;
-  const int tileWidthtInCtus = currentTile.getTileWidthInCtus();
-  const int tileHeightInCtus = currentTile.getTileHeightInCtus();
-
+  const PPS *pps = cs->pps;
   const int  maxCUWidth  = cs->pcv->maxCUWidth;
   const int  maxCUHeight = cs->pcv->maxCUHeight;
 
-  const int tileLeftTopPelPosX = maxCUWidth  * tileXPosInCtus;
+  const uint32_t tileIdx = pps->getTileIdx( (uint32_t)ctuAddr );
+  const uint32_t tileX = tileIdx % pps->getNumTileColumns();
+  const uint32_t tileY = tileIdx / pps->getNumTileColumns();
+  
+  const int tileWidthtInCtus = pps->getTileColumnWidth( tileX );
+  const int tileHeightInCtus = pps->getTileRowHeight  ( tileY );  
+  const int tileXPosInCtus   = pps->getTileColumnBd( tileX );
+  const int tileYPosInCtus   = pps->getTileRowBd( tileY );
+
+  const int tileLeftTopPelPosX = maxCUWidth * tileXPosInCtus;
   const int tileLeftTopPelPosY = maxCUHeight * tileYPosInCtus;
   const int tileRightBottomPelPosX = std::min<int>( ( ( tileWidthtInCtus + tileXPosInCtus ) * maxCUWidth ), (int)cs->picture->lwidth() ) - 1;
   const int tileRightBottomPelPosY = std::min<int>( ( ( tileHeightInCtus + tileYPosInCtus ) * maxCUHeight ), (int)cs->picture->lheight() ) - 1;
diff --git a/source/Lib/CommonLib/MCTS.h b/source/Lib/CommonLib/MCTS.h
index ed651655c3c02c7200bc4a4aa0c0594bd3edf457..3d9d3bf52e4207673645aabed5160ecc2fc41c5a 100644
--- a/source/Lib/CommonLib/MCTS.h
+++ b/source/Lib/CommonLib/MCTS.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MatrixIntraPrediction.cpp b/source/Lib/CommonLib/MatrixIntraPrediction.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f3376aa25da490108aa96b72541b597bebead49c
--- /dev/null
+++ b/source/Lib/CommonLib/MatrixIntraPrediction.cpp
@@ -0,0 +1,340 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** \file     MatrixIntraPrediction.cpp
+\brief    matrix-based intra prediction class
+*/
+
+#include "MatrixIntraPrediction.h"
+#include "dtrace_next.h"
+
+#include "UnitTools.h"
+#include "MipData.h"
+
+
+MatrixIntraPrediction::MatrixIntraPrediction():
+  m_reducedBoundary          (MIP_MAX_INPUT_SIZE),
+  m_reducedBoundaryTransposed(MIP_MAX_INPUT_SIZE),
+  m_inputOffset      ( 0 ),
+  m_inputOffsetTransp( 0 ),
+  m_refSamplesTop (MIP_MAX_WIDTH),
+  m_refSamplesLeft(MIP_MAX_HEIGHT),
+  m_blockSize( 0, 0 ),
+  m_sizeId( 0 ),
+  m_reducedBdrySize( 0 ),
+  m_reducedPredSize( 0 ),
+  m_upsmpFactorHor( 0 ),
+  m_upsmpFactorVer( 0 )
+{
+}
+
+
+void MatrixIntraPrediction::prepareInputForPred(const CPelBuf &pSrc, const Area& block, const int bitDepth)
+{
+  // Step 1: Save block size and calculate dependent values
+  initPredBlockParams(block);
+
+  // Step 2: Get the input data (left and top reference samples)
+  m_refSamplesTop.resize(block.width);
+  for (int x = 0; x < block.width; x++)
+  {
+    m_refSamplesTop[x] = pSrc.at(x + 1, 0);
+  }
+
+  m_refSamplesLeft.resize(block.height);
+  for (int y = 0; y < block.height; y++)
+  {
+    m_refSamplesLeft[y] = pSrc.at(y + 1, 1);
+  }
+
+  // Step 3: Compute the reduced boundary via Haar-downsampling (input for the prediction)
+  const int inputSize = 2 * m_reducedBdrySize;
+  m_reducedBoundary          .resize( inputSize );
+  m_reducedBoundaryTransposed.resize( inputSize );
+
+  int* const topReduced = m_reducedBoundary.data();
+  boundaryDownsampling1D( topReduced, m_refSamplesTop.data(), block.width, m_reducedBdrySize );
+
+  int* const leftReduced = m_reducedBoundary.data() + m_reducedBdrySize;
+  boundaryDownsampling1D( leftReduced, m_refSamplesLeft.data(), block.height, m_reducedBdrySize );
+
+  int* const leftReducedTransposed = m_reducedBoundaryTransposed.data();
+  int* const topReducedTransposed  = m_reducedBoundaryTransposed.data() + m_reducedBdrySize;
+  for( int x = 0; x < m_reducedBdrySize; x++ )
+  {
+    topReducedTransposed[x] = topReduced[x];
+  }
+  for( int y = 0; y < m_reducedBdrySize; y++ )
+  {
+    leftReducedTransposed[y] = leftReduced[y];
+  }
+
+  // Step 4: Rebase the reduced boundary
+
+  m_inputOffset       = m_reducedBoundary[0];
+  m_inputOffsetTransp = m_reducedBoundaryTransposed[0];
+
+  const bool hasFirstCol = (m_sizeId < 2);
+  m_reducedBoundary          [0] = hasFirstCol ? (m_inputOffset       - (1 << (bitDepth - 1))) : 0; // first column of matrix not needed for large blocks
+  m_reducedBoundaryTransposed[0] = hasFirstCol ? (m_inputOffsetTransp - (1 << (bitDepth - 1))) : 0;
+  for (int i = 1; i < inputSize; i++)
+  {
+    m_reducedBoundary          [i] -= m_inputOffset;
+    m_reducedBoundaryTransposed[i] -= m_inputOffsetTransp;
+  }
+}
+
+void MatrixIntraPrediction::predBlock(int* const result, const int modeIdx, const bool transpose, const int bitDepth)
+{
+  const bool needUpsampling = ( m_upsmpFactorHor > 1 ) || ( m_upsmpFactorVer > 1 );
+
+  const uint8_t* matrix;
+  int shiftMatrix = 0, offsetMatrix = 0;
+  getMatrixData(matrix, shiftMatrix, offsetMatrix, modeIdx);
+
+  static_vector<int, MIP_MAX_REDUCED_OUTPUT_SAMPLES> bufReducedPred( m_reducedPredSize * m_reducedPredSize );
+  int* const       reducedPred     = needUpsampling ? bufReducedPred.data() : result;
+  const int* const reducedBoundary = transpose ? m_reducedBoundaryTransposed.data() : m_reducedBoundary.data();
+  computeReducedPred( reducedPred, reducedBoundary, matrix, shiftMatrix, offsetMatrix, transpose, bitDepth );
+  if( needUpsampling )
+  {
+    predictionUpsampling( result, reducedPred );
+  }
+}
+
+
+void MatrixIntraPrediction::initPredBlockParams(const Size& block)
+{
+  m_blockSize = block;
+  // init size index
+  m_sizeId = getMipSizeId( m_blockSize );
+
+  // init reduced boundary size
+  m_reducedBdrySize = (m_sizeId == 0) ? 2 : 4;
+
+  // init reduced prediction size
+  m_reducedPredSize = ( m_sizeId < 2 ) ? 4 : 8;
+
+
+  // init upsampling factors
+  m_upsmpFactorHor = m_blockSize.width  / m_reducedPredSize;
+  m_upsmpFactorVer = m_blockSize.height / m_reducedPredSize;
+
+  CHECKD( (m_upsmpFactorHor < 1) || ((m_upsmpFactorHor & (m_upsmpFactorHor - 1)) != 0), "Need power of two horizontal upsampling factor." );
+  CHECKD( (m_upsmpFactorVer < 1) || ((m_upsmpFactorVer & (m_upsmpFactorVer - 1)) != 0), "Need power of two vertical upsampling factor." );
+}
+
+
+
+void MatrixIntraPrediction::boundaryDownsampling1D(int* reducedDst, const int* const fullSrc, const SizeType srcLen, const SizeType dstLen)
+{
+  if (dstLen < srcLen)
+  {
+    // Create reduced boundary by downsampling
+    const SizeType downsmpFactor = srcLen / dstLen;
+    const int log2DownsmpFactor = floorLog2(downsmpFactor);
+    const int roundingOffset = (1 << (log2DownsmpFactor - 1));
+
+    SizeType srcIdx = 0;
+    for( SizeType dstIdx = 0; dstIdx < dstLen; dstIdx++ )
+    {
+      int sum = 0;
+      for( int k = 0; k < downsmpFactor; k++ )
+      {
+        sum += fullSrc[srcIdx++];
+      }
+      reducedDst[dstIdx] = (sum + roundingOffset) >> log2DownsmpFactor;
+    }
+  }
+  else
+  {
+    // Copy boundary if no downsampling is needed
+    for (SizeType i = 0; i < dstLen; ++i)
+    {
+      reducedDst[i] = fullSrc[i];
+    }
+  }
+}
+
+
+void MatrixIntraPrediction::predictionUpsampling1D(int* const dst, const int* const src, const int* const bndry,
+                                                   const SizeType srcSizeUpsmpDim, const SizeType srcSizeOrthDim,
+                                                   const SizeType srcStep, const SizeType srcStride,
+                                                   const SizeType dstStep, const SizeType dstStride,
+                                                   const SizeType bndryStep,
+                                                   const unsigned int upsmpFactor )
+{
+  const int log2UpsmpFactor = floorLog2( upsmpFactor );
+  CHECKD( upsmpFactor <= 1, "Upsampling factor must be at least 2." );
+  const int roundingOffset = 1 << (log2UpsmpFactor - 1);
+
+  SizeType idxOrthDim = 0;
+  const int* srcLine = src;
+  int* dstLine = dst;
+  const int* bndryLine = bndry + bndryStep - 1;
+  while( idxOrthDim < srcSizeOrthDim )
+  {
+    SizeType idxUpsmpDim = 0;
+    const int* before = bndryLine;
+    const int* behind = srcLine;
+    int* currDst = dstLine;
+    while( idxUpsmpDim < srcSizeUpsmpDim )
+    {
+      SizeType pos = 1;
+      int scaledBefore = ( *before ) << log2UpsmpFactor;
+      int scaledBehind = 0;
+      while( pos <= upsmpFactor )
+      {
+        scaledBefore -= *before;
+        scaledBehind += *behind;
+        *currDst = (scaledBefore + scaledBehind + roundingOffset) >> log2UpsmpFactor;
+
+        pos++;
+        currDst += dstStep;
+      }
+
+      idxUpsmpDim++;
+      before = behind;
+      behind += srcStep;
+    }
+
+    idxOrthDim++;
+    srcLine += srcStride;
+    dstLine += dstStride;
+    bndryLine += bndryStep;
+  }
+}
+
+
+void MatrixIntraPrediction::predictionUpsampling( int* const dst, const int* const src ) const
+{
+  const int* verSrc     = src;
+  SizeType   verSrcStep = m_blockSize.width;
+
+  if( m_upsmpFactorHor > 1 )
+  {
+    int* const horDst = dst + (m_upsmpFactorVer - 1) * m_blockSize.width;
+    verSrc = horDst;
+    verSrcStep *= m_upsmpFactorVer;
+
+    predictionUpsampling1D( horDst, src, m_refSamplesLeft.data(),
+                            m_reducedPredSize, m_reducedPredSize,
+                            1, m_reducedPredSize, 1, verSrcStep,
+                            m_upsmpFactorVer, m_upsmpFactorHor );
+  }
+
+  if( m_upsmpFactorVer > 1 )
+  {
+    predictionUpsampling1D( dst, verSrc, m_refSamplesTop.data(),
+                            m_reducedPredSize, m_blockSize.width,
+                            verSrcStep, 1, m_blockSize.width, 1,
+                            1, m_upsmpFactorVer );
+  }
+}
+
+void MatrixIntraPrediction::getMatrixData(const uint8_t*& matrix, int &shiftMatrix, int &offsetMatrix, const int modeIdx) const
+{
+  switch( m_sizeId )
+  {
+  case 0: matrix       = &mipMatrix4x4      [modeIdx][0][0];
+          shiftMatrix  =  mipShiftMatrix4x4 [modeIdx];
+          offsetMatrix =  mipOffsetMatrix4x4[modeIdx];
+          break;
+
+  case 1: matrix       = &mipMatrix8x8      [modeIdx][0][0];
+          shiftMatrix  =  mipShiftMatrix8x8 [modeIdx];
+          offsetMatrix =  mipOffsetMatrix8x8[modeIdx];
+          break;
+
+  case 2: matrix       = &mipMatrix16x16      [modeIdx][0][0];
+          shiftMatrix  =  mipShiftMatrix16x16 [modeIdx];
+          offsetMatrix =  mipOffsetMatrix16x16[modeIdx];
+          break;
+
+  default: THROW( "Invalid mipSizeId" );
+  }
+}
+
+void MatrixIntraPrediction::computeReducedPred( int*const result, const int* const input, 
+                                                const uint8_t*matrix, const int shiftMatrix, const int offsetMatrix,
+                                                const bool transpose, const int bitDepth )
+{
+  const int inputSize = 2 * m_reducedBdrySize;
+
+  // use local buffer for transposed result
+  static_vector<int, MIP_MAX_REDUCED_OUTPUT_SAMPLES> resBufTransposed( m_reducedPredSize * m_reducedPredSize );
+  int*const resPtr = (transpose) ? resBufTransposed.data() : result;
+
+  int sum = 0;
+  for( int i = 0; i < inputSize; i++ ) { sum += input[i]; }
+  const int offset = (1 << (shiftMatrix - 1)) - offsetMatrix * sum;
+  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
+
+  const uint8_t *weight = matrix;
+  const int   inputOffset = transpose ? m_inputOffsetTransp : m_inputOffset;
+
+  const bool redSize = (m_sizeId == 2);
+  int posRes = 0;
+  for( int y = 0; y < m_reducedPredSize; y++ )
+  {
+    for( int x = 0; x < m_reducedPredSize; x++ )
+    {
+      if( redSize ) weight -= 1;
+      int tmp0 = redSize ? 0 : (input[0] * weight[0]);
+      int tmp1 = input[1] * weight[1];
+      int tmp2 = input[2] * weight[2];
+      int tmp3 = input[3] * weight[3];
+      for (int i = 4; i < inputSize; i += 4)
+      {
+        tmp0 += input[i]     * weight[i];
+        tmp1 += input[i + 1] * weight[i + 1];
+        tmp2 += input[i + 2] * weight[i + 2];
+        tmp3 += input[i + 3] * weight[i + 3];
+      }
+      resPtr[posRes++] = ClipBD<int>( ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> shiftMatrix) + inputOffset, bitDepth );
+
+      weight += inputSize;
+    }
+  }
+
+  if( transpose )
+  {
+    for( int y = 0; y < m_reducedPredSize; y++ )
+    {
+      for( int x = 0; x < m_reducedPredSize; x++ )
+      {
+        result[ y * m_reducedPredSize + x ] = resPtr[ x * m_reducedPredSize + y ];
+      }
+    }
+  }
+}
diff --git a/source/Lib/CommonLib/MatrixIntraPrediction.h b/source/Lib/CommonLib/MatrixIntraPrediction.h
new file mode 100644
index 0000000000000000000000000000000000000000..bf90ae11d4ea414386fe60c855787d8d8f11ae86
--- /dev/null
+++ b/source/Lib/CommonLib/MatrixIntraPrediction.h
@@ -0,0 +1,91 @@
+ï»¿/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** \file     MatrixIntraPrediction.h
+\brief    matrix-based intra prediction class (header)
+*/
+
+#ifndef __MATRIXINTRAPPREDICTION__
+#define __MATRIXINTRAPPREDICTION__
+
+
+#include "Unit.h"
+
+static const int MIP_MAX_INPUT_SIZE             =  8;
+static const int MIP_MAX_REDUCED_OUTPUT_SAMPLES = 64;
+
+
+class MatrixIntraPrediction
+{
+public:
+  MatrixIntraPrediction();
+
+  void prepareInputForPred(const CPelBuf &pSrc, const Area& block, const int bitDepth);
+  void predBlock(int* const result, const int modeIdx, const bool transpose, const int bitDepth);
+
+  private:
+    static_vector<int, MIP_MAX_INPUT_SIZE> m_reducedBoundary;           // downsampled             boundary of a block
+    static_vector<int, MIP_MAX_INPUT_SIZE> m_reducedBoundaryTransposed; // downsampled, transposed boundary of a block
+    int                                    m_inputOffset;
+    int                                    m_inputOffsetTransp;
+    static_vector<int, MIP_MAX_WIDTH>      m_refSamplesTop;             // top  reference samples for upsampling
+    static_vector<int, MIP_MAX_HEIGHT>     m_refSamplesLeft;            // left reference samples for upsampling
+
+    Size m_blockSize;
+    int  m_sizeId;
+    int  m_reducedBdrySize;
+    int  m_reducedPredSize;
+    unsigned int m_upsmpFactorHor;
+    unsigned int m_upsmpFactorVer;
+
+    void initPredBlockParams(const Size& block);
+
+    static void boundaryDownsampling1D(int* reducedDst, const int* const fullSrc, const SizeType srcLen, const SizeType dstLen);
+
+    void predictionUpsampling( int* const dst, const int* const src ) const;
+    static void predictionUpsampling1D( int* const dst, const int* const src, const int* const bndry,
+                                        const SizeType srcSizeUpsmpDim, const SizeType srcSizeOrthDim,
+                                        const SizeType srcStep, const SizeType srcStride,
+                                        const SizeType dstStep, const SizeType dstStride,
+                                        const SizeType bndryStep,
+                                        const unsigned int upsmpFactor );
+
+    void getMatrixData(const uint8_t*& matrix, int &shiftMatrix, int &offsetMatrix, const int modeIdx) const;
+
+
+    void computeReducedPred( int*const result, const int* const input, 
+                             const uint8_t*matrix, const int shiftMatrix, const int offsetMatrix,
+                             const bool transpose, const int bitDepth );
+  };
+
+#endif //__MATRIXINTRAPPREDICTION__
diff --git a/source/Lib/CommonLib/MipData.h b/source/Lib/CommonLib/MipData.h
new file mode 100644
index 0000000000000000000000000000000000000000..487565eb11e846bbc5f4e9e279f855e54f786d11
--- /dev/null
+++ b/source/Lib/CommonLib/MipData.h
@@ -0,0 +1,894 @@
+ï»¿/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** \file     MipData.h
+\brief    weight and bias data for matrix-based intra prediction (MIP)
+*/
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipMatrix4x4[16][16][4]) =
+{
+  {
+    {    5,   16,   51,    2},
+    {    5,   22,   18,   36},
+    {    5,   15,    5,   55},
+    {    5,   10,    6,   59},
+    {    4,    6,   12,   59},
+    {    5,    3,    4,   66},
+    {    7,    0,    5,   67},
+    {    8,    1,    7,   65},
+    {    6,    2,    3,   67},
+    {    7,    1,    5,   66},
+    {    9,    1,    6,   66},
+    {   10,    2,    6,   66},
+    {    7,    3,    5,   64},
+    {    9,    2,    6,   64},
+    {   10,    2,    6,   65},
+    {   10,    3,    7,   64}
+  },
+  {
+    {   28,   27,   58,   26},
+    {   28,   27,   52,   27},
+    {   29,   39,   43,   28},
+    {   28,   70,   38,   27},
+    {   28,   28,   61,   28},
+    {   28,   27,   62,   26},
+    {   29,   30,   60,   25},
+    {   25,   61,   49,   23},
+    {   29,   28,   35,   54},
+    {   29,   28,   42,   47},
+    {   28,   26,   50,   36},
+    {   16,   42,   46,   19},
+    {   28,   28,   23,   64},
+    {   29,   28,   24,   64},
+    {   25,   24,   23,   58},
+    {    0,   19,   21,   31}
+  },
+  {
+    {   28,   27,   39,   26},
+    {   29,   32,   29,   27},
+    {   29,   60,   31,   27},
+    {   27,   68,   31,   26},
+    {   28,   27,   51,   27},
+    {   28,   39,   39,   26},
+    {   26,   64,   33,   28},
+    {   21,   64,   27,   35},
+    {   27,   28,   38,   50},
+    {   19,   44,   31,   51},
+    {   10,   57,   22,   54},
+    {    7,   53,   16,   58},
+    {   19,   31,   12,   72},
+    {    6,   47,   14,   64},
+    {    0,   50,   15,   60},
+    {    2,   48,   15,   60}
+  },
+  {
+    {   42,   40,   64,   45},
+    {   43,   41,   44,   52},
+    {   43,   67,   34,   49},
+    {   41,   76,   38,   45},
+    {   42,   41,   50,   67},
+    {   41,   39,   42,   71},
+    {   38,   52,   33,   63},
+    {   31,   70,   31,   47},
+    {   40,   41,   39,   76},
+    {   31,   36,   38,   74},
+    {   17,   30,   35,   69},
+    {    9,   47,   30,   50},
+    {   32,   37,   37,   75},
+    {   15,   29,   36,   68},
+    {    3,   22,   37,   62},
+    {    0,   26,   38,   52}
+  },
+  {
+    {   57,   53,   73,   55},
+    {   58,   75,   61,   55},
+    {   55,   91,   53,   63},
+    {   45,   87,   43,   74},
+    {   58,   60,   80,   63},
+    {   51,   71,   62,   73},
+    {   30,   65,   48,   77},
+    {   14,   50,   47,   73},
+    {   50,   56,   53,   89},
+    {   20,   43,   50,   77},
+    {    2,   32,   53,   67},
+    {    2,   30,   54,   65},
+    {   26,   41,   47,   82},
+    {    2,   28,   53,   65},
+    {    0,   27,   55,   63},
+    {    3,   29,   55,   64}
+  },
+  {
+    {   23,   19,   88,    8},
+    {   23,   44,   61,   25},
+    {   22,   64,   24,   52},
+    {   21,   56,    0,   71},
+    {   21,   25,   53,   61},
+    {   20,   26,   31,   80},
+    {   20,   22,   21,   88},
+    {   19,   20,   17,   88},
+    {   20,   22,   19,   89},
+    {   19,   21,   19,   89},
+    {   18,   20,   20,   88},
+    {   17,   19,   20,   86},
+    {   19,   20,   20,   87},
+    {   18,   21,   21,   86},
+    {   16,   21,   21,   85},
+    {   13,   19,   21,   83}
+  },
+  {
+    {    9,    0,   11,    6},
+    {    9,   19,   11,    8},
+    {    9,   70,   11,    8},
+    {    9,   76,   12,    8},
+    {    9,    0,   12,    6},
+    {    9,   19,   11,    7},
+    {    9,   70,   11,    9},
+    {    9,   76,   12,    9},
+    {    9,    0,   12,    6},
+    {    9,   20,   12,    7},
+    {    9,   70,   12,    9},
+    {    9,   75,   12,    9},
+    {    9,    1,   11,    8},
+    {    9,   20,   12,    8},
+    {    9,   70,   12,    8},
+    {    9,   75,   11,    9}
+  },
+  {
+    {    6,    3,   61,    7},
+    {    7,    0,   34,    7},
+    {    7,   13,    5,    6},
+    {    6,   56,    1,    4},
+    {    6,    7,   74,    5},
+    {    6,    4,   70,    6},
+    {    6,    0,   51,    6},
+    {    6,    8,   19,    5},
+    {    7,    7,   55,   23},
+    {    7,    7,   71,    8},
+    {    6,    5,   74,    5},
+    {    6,    3,   59,    7},
+    {    5,    5,    3,   74},
+    {    6,    6,   36,   41},
+    {    7,    7,   62,   15},
+    {    6,    7,   66,   10}
+  },
+  {
+    {   35,   26,   78,   28},
+    {   34,   39,   35,   33},
+    {   34,   96,   32,   35},
+    {   35,  102,   35,   35},
+    {   31,   30,  107,   25},
+    {   29,   34,   66,   24},
+    {   31,   93,   32,   31},
+    {   35,  101,   34,   35},
+    {   31,   31,   64,   72},
+    {   19,   28,   86,   30},
+    {   18,   85,   47,   20},
+    {   31,   99,   34,   33},
+    {   32,   32,   27,  106},
+    {   12,   26,   52,   71},
+    {    0,   72,   60,   19},
+    {   20,   93,   37,   26}
+  },
+  {
+    {   10,    6,   60,    0},
+    {    7,   14,   39,    4},
+    {    4,   65,   21,   11},
+    {    1,   80,   12,   16},
+    {   10,   11,   52,   46},
+    {    6,    9,   36,   57},
+    {    2,   18,   20,   65},
+    {    0,   23,   11,   65},
+    {   13,   13,   13,   80},
+    {   12,   13,   12,   80},
+    {   12,   12,   13,   80},
+    {   11,   12,   14,   79},
+    {   15,   14,   12,   79},
+    {   15,   14,   13,   79},
+    {   16,   16,   15,   78},
+    {   16,   16,   17,   76}
+  },
+  {
+    {   50,   48,   85,   42},
+    {   47,   44,   59,   44},
+    {   43,   52,   51,   43},
+    {   14,  107,   51,   29},
+    {   51,   49,   92,   77},
+    {   47,   44,   70,   81},
+    {   35,   32,   47,   79},
+    {    0,   47,   35,   60},
+    {   50,   49,   49,  116},
+    {   49,   47,   49,  117},
+    {   45,   41,   49,  115},
+    {   30,   32,   47,  106},
+    {   50,   49,   46,  117},
+    {   49,   49,   48,  115},
+    {   50,   48,   49,  115},
+    {   47,   45,   50,  113}
+  },
+  {
+    {   67,   65,   96,   61},
+    {   66,   65,   90,   62},
+    {   68,   83,   79,   64},
+    {   59,  102,   70,   61},
+    {   67,   67,   93,   74},
+    {   68,   67,   93,   74},
+    {   64,   75,   88,   69},
+    {   30,   85,   69,   49},
+    {   67,   67,   68,   97},
+    {   68,   67,   68,   98},
+    {   43,   60,   69,   80},
+    {    4,   68,   65,   40},
+    {   66,   66,   63,  101},
+    {   62,   63,   62,   99},
+    {   22,   49,   60,   76},
+    {    0,   65,   64,   41}
+  },
+  {
+    {   27,   18,   53,   22},
+    {   26,   35,   22,   28},
+    {   27,   90,   27,   27},
+    {   28,   97,   29,   28},
+    {   22,   19,   80,   29},
+    {   15,   24,   42,   22},
+    {   17,   72,   27,   20},
+    {   22,   89,   28,   23},
+    {   19,   22,   40,   82},
+    {    6,   13,   36,   68},
+    {    0,   19,   14,   53},
+    {    8,   33,   10,   38},
+    {   22,   26,   24,   95},
+    {   13,   19,   25,   91},
+    {    6,   14,   22,   87},
+    {    4,   14,   13,   79}
+  },
+  {
+    {   50,   48,   80,   49},
+    {   50,   45,   73,   49},
+    {   50,   48,   62,   49},
+    {   49,   61,   54,   48},
+    {   50,   50,   84,   48},
+    {   50,   49,   84,   48},
+    {   50,   48,   82,   49},
+    {   50,   47,   76,   49},
+    {   50,   50,   69,   64},
+    {   51,   50,   77,   56},
+    {   48,   48,   82,   50},
+    {   37,   43,   81,   45},
+    {   49,   48,   44,   87},
+    {   40,   43,   50,   78},
+    {   17,   32,   58,   58},
+    {    0,   27,   64,   44}
+  },
+  {
+    {   29,   26,   37,   36},
+    {   24,   42,   16,   40},
+    {   25,   62,   35,   28},
+    {   28,   54,   51,   22},
+    {   24,   29,   22,   52},
+    {    9,   44,   21,   31},
+    {   16,   50,   54,   14},
+    {   27,   40,   65,   19},
+    {   17,   36,   21,   49},
+    {    0,   40,   41,   10},
+    {   15,   37,   65,    9},
+    {   28,   34,   66,   21},
+    {   16,   41,   36,   33},
+    {    6,   37,   54,    3},
+    {   19,   32,   64,   12},
+    {   28,   33,   62,   24}
+  },
+  {
+    {   19,   20,   50,   19},
+    {   19,   21,   49,   20},
+    {   19,   27,   47,   19},
+    {   19,   34,   43,   19},
+    {   19,   22,   54,   18},
+    {   19,   22,   55,   18},
+    {   19,   22,   55,   18},
+    {   19,   22,   53,   18},
+    {   21,   22,   45,   27},
+    {   19,   22,   47,   24},
+    {   18,   21,   48,   23},
+    {   18,   21,   47,   24},
+    {    9,   14,    2,   65},
+    {    3,   12,    2,   62},
+    {    0,   11,    4,   59},
+    {    0,   12,    6,   57}
+  }
+};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t  mipOffsetMatrix4x4[16]) =
+{    1,   28,   28,   42,   56,   22,    9,    6,   35,   14,   50,   66,   29,   50,   31,   19};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipShiftMatrix4x4[16]) =
+{    6,    5,    5,    5,    5,    6,    6,    6,    6,    6,    6,    5,    6,    5,    5,    5};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipMatrix8x8[8][16][8]) =
+{
+  {
+    {   18,   77,   44,   26,    0,   17,   18,   19},
+    {   19,   70,   83,   26,   16,   13,   16,   17},
+    {   20,   40,   98,   36,   16,   16,   15,   16},
+    {   20,   29,   75,   68,   15,   17,   16,   16},
+    {   19,   82,   61,   29,   14,   10,   12,   22},
+    {   21,   59,   88,   30,   17,   13,   14,   17},
+    {   23,   46,   93,   36,   16,   17,   14,   16},
+    {   23,   33,   96,   43,   16,   18,   15,   17},
+    {   19,   82,   64,   30,   16,   17,    8,   17},
+    {   22,   61,   89,   29,   17,   17,   12,   15},
+    {   24,   44,   94,   38,   17,   17,   15,   16},
+    {   25,   34,   90,   47,   16,   19,   15,   17},
+    {   19,   77,   61,   30,   15,   17,   20,   10},
+    {   22,   63,   83,   30,   17,   16,   18,   10},
+    {   24,   44,   89,   39,   17,   18,   16,   16},
+    {   24,   35,   82,   50,   16,   19,   16,   17}
+  },
+  {
+    {   14,   15,   10,   12,   99,   11,   12,   13},
+    {   12,   61,    6,    9,   32,    8,   14,   13},
+    {   12,  104,   54,    2,    9,   17,   11,   14},
+    {   12,   24,  110,   33,   11,   18,   10,   14},
+    {   14,   14,   15,   11,   69,  102,    6,   12},
+    {   13,   13,   11,   10,  105,   30,    5,   14},
+    {   12,   45,    6,    8,   55,    5,   13,   12},
+    {   12,   72,   38,    6,   18,   14,   12,   13},
+    {   14,   12,   18,   10,    2,   77,  102,    3},
+    {   13,   12,   19,    8,   41,  122,   22,    7},
+    {   13,    4,   18,    8,   97,   60,    0,   13},
+    {   12,   16,   18,    9,   73,   17,   11,   12},
+    {   14,   12,   19,    9,   10,    8,   67,   98},
+    {   13,   12,   20,    8,    4,   57,  104,   20},
+    {   13,   10,   22,    7,   29,  109,   37,   12},
+    {   12,    5,   24,    9,   66,   70,   12,   17}
+  },
+  {
+    {   23,   42,   18,   23,   18,   41,   20,   23},
+    {   23,   86,   33,   20,   20,   33,   21,   23},
+    {   23,   18,   90,   25,   22,   32,   20,   23},
+    {   23,   25,   12,   94,   22,   33,   21,   23},
+    {   23,   36,   21,   23,    0,   79,   31,   21},
+    {   23,   68,   29,   21,    0,   67,   29,   21},
+    {   23,   29,   70,   24,    5,   58,   28,   21},
+    {   23,   22,   21,   78,   11,   50,   29,   22},
+    {   22,   28,   23,   23,   18,    9,   97,   25},
+    {   21,   35,   25,   22,   16,    8,   99,   22},
+    {   19,   30,   36,   22,   15,    6,  100,   21},
+    {   18,   22,   22,   45,   16,    5,   92,   23},
+    {   19,   24,   23,   21,   20,   28,    4,  101},
+    {   16,   23,   23,   20,   18,   28,    6,   99},
+    {   11,   24,   22,   18,   16,   28,    8,   97},
+    {    8,   21,   21,   23,   16,   25,   11,   93}
+  },
+  {
+    {   45,   61,   48,   48,   60,   81,   44,   44},
+    {   45,   46,   72,   53,   41,   84,   46,   43},
+    {   45,   42,   60,   78,   38,   76,   50,   43},
+    {   44,   46,   43,   94,   39,   63,   55,   45},
+    {   44,   44,   53,   51,   43,   83,   68,   44},
+    {   45,   33,   59,   63,   39,   68,   77,   44},
+    {   44,   43,   42,   79,   38,   54,   85,   46},
+    {   41,   46,   40,   82,   39,   44,   80,   52},
+    {   44,   41,   49,   52,   43,   39,   98,   60},
+    {   41,   40,   46,   63,   43,   33,   92,   66},
+    {   34,   43,   39,   70,   41,   30,   80,   75},
+    {   26,   40,   41,   68,   35,   36,   62,   83},
+    {   39,   42,   45,   52,   41,   43,   35,  117},
+    {   26,   44,   41,   56,   38,   44,   30,  115},
+    {    8,   36,   44,   53,   28,   51,   23,  108},
+    {    0,   26,   48,   51,   22,   56,   21,  103}
+  },
+  {
+    {   10,   15,   10,   11,   79,   18,    7,   10},
+    {   10,   18,   15,   10,   79,   14,    8,   10},
+    {   10,   15,   24,   13,   72,   12,    8,    9},
+    {   10,   12,    8,   40,   59,   13,    8,    9},
+    {   10,   10,   11,   10,    0,   80,   17,    7},
+    {   10,    9,   11,   11,    4,   82,   12,    6},
+    {    9,   11,   11,   11,    9,   81,   10,    6},
+    {    9,   10,   12,   15,   15,   72,   12,    6},
+    {   10,   11,   10,   10,   12,    2,   77,   14},
+    {   10,   12,   11,   11,   11,    8,   76,   10},
+    {    9,   11,   12,   11,    9,   15,   73,    7},
+    {    9,   11,   11,   14,    9,   24,   63,    8},
+    {   10,   11,   10,   11,    8,   12,    1,   82},
+    {   10,   12,   11,   11,    9,   10,    8,   76},
+    {   10,   11,   12,   12,   10,    7,   17,   69},
+    {    9,   11,   10,   15,   10,    8,   25,   60}
+  },
+  {
+    {   14,   34,   30,   13,   20,   58,    8,   14},
+    {   13,    1,   44,   32,    7,   32,   33,   13},
+    {   13,   12,    2,   56,   11,   11,   36,   33},
+    {   12,   17,    5,   38,   13,    7,   23,   58},
+    {   13,    7,   20,   22,   10,   21,   65,   18},
+    {   11,   10,    6,   29,   13,    5,   39,   56},
+    {    9,   17,    4,   20,   13,    9,   12,   83},
+    {    8,   14,   15,   11,   12,   13,    9,   85},
+    {   12,   14,    9,   19,   15,    7,   23,   74},
+    {    8,   15,   10,   14,   13,   13,    0,   93},
+    {    6,   14,   15,    9,   11,   15,    0,   93},
+    {    6,   14,   16,    9,   10,   15,    5,   87},
+    {   11,   14,   12,   15,   13,   15,    2,   89},
+    {    8,   13,   15,   12,   11,   15,    3,   88},
+    {    6,   14,   15,   11,   11,   16,    4,   87},
+    {    6,   14,   15,   12,   11,   16,    6,   83}
+  },
+  {
+    {   10,    6,    9,   11,   70,   11,    8,    9},
+    {    9,    8,    6,   11,   49,    4,   10,    8},
+    {    9,   38,    4,   11,   25,    6,   11,    8},
+    {    9,   41,   22,   14,   15,    9,   11,    8},
+    {    9,    8,    9,   11,   21,   68,   11,    7},
+    {    9,    6,    7,   11,   49,   49,    4,    8},
+    {    8,    9,    4,   11,   64,   23,    6,    8},
+    {    9,   16,    7,    9,   52,   11,   11,    7},
+    {    9,   10,    8,   10,    7,   19,   73,    5},
+    {    8,    9,    7,   11,    4,   50,   51,    0},
+    {    8,    9,    7,   11,   15,   66,   24,    3},
+    {    9,   10,    6,   12,   33,   53,   14,    6},
+    {    8,    9,    8,   10,   11,    5,   21,   67},
+    {    7,   10,    6,   11,   11,    1,   55,   37},
+    {    7,   11,    6,   11,    8,   13,   69,   15},
+    {    8,   12,    6,   12,   11,   31,   52,    9}
+  },
+  {
+    {   26,   43,   21,   27,   21,   46,   23,   26},
+    {   26,   66,   67,   19,   25,   27,   28,   26},
+    {   27,   18,   64,   63,   27,   27,   27,   27},
+    {   28,   28,   16,  101,   28,   27,   27,   28},
+    {   25,   42,   27,   24,    0,   81,   39,   21},
+    {   25,   39,   84,   23,   23,   32,   28,   25},
+    {   27,   22,   40,   84,   28,   25,   27,   27},
+    {   29,   28,   15,  102,   28,   28,   27,   28},
+    {   24,   34,   37,   22,   21,   19,   87,   30},
+    {   25,   22,   82,   38,   25,   29,   33,   25},
+    {   28,   26,   27,   95,   27,   28,   25,   27},
+    {   30,   27,   17,  101,   28,   28,   27,   28},
+    {   24,   24,   42,   27,   27,   27,   12,   95},
+    {   26,   19,   64,   55,   29,   28,   23,   39},
+    {   29,   28,   26,   94,   27,   28,   27,   26},
+    {   30,   28,   20,   98,   28,   28,   27,   30}
+  }
+};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const short mipOffsetMatrix8x8[8]) =
+{   15,   14,   23,   45,   10,   14,   10,   27};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const short mipShiftMatrix8x8[8]) =
+{    7,    7,    6,    6,    6,    6,    6,    6};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipMatrix16x16[6][64][7]) =
+{
+  {
+    {   22,   13,   15,   50,   16,   17,   14},
+    {   55,    5,   15,   25,   22,   16,   14},
+    {   83,    7,   14,   18,   22,   17,   14},
+    {   58,   47,    7,   15,   21,   17,   15},
+    {   15,   81,    9,   12,   20,   18,   14},
+    {    5,   61,   38,   12,   19,   18,   14},
+    {   14,   20,   73,   13,   17,   18,   15},
+    {   18,    4,   84,   16,   16,   19,   16},
+    {   30,   12,   15,   57,   36,   15,   14},
+    {   54,    8,   14,   44,   33,   18,   13},
+    {   69,   14,   12,   29,   32,   20,   12},
+    {   48,   46,    8,   18,   30,   22,   13},
+    {   17,   68,   14,   11,   26,   24,   13},
+    {   10,   47,   43,   10,   22,   24,   13},
+    {   16,   16,   70,   12,   18,   24,   14},
+    {   20,    6,   77,   15,   16,   24,   16},
+    {   24,   15,   15,   28,   67,   15,   15},
+    {   38,   14,   14,   34,   52,   23,   13},
+    {   46,   21,   13,   29,   45,   27,   11},
+    {   36,   38,   13,   19,   38,   31,   11},
+    {   21,   46,   22,   13,   31,   34,   11},
+    {   18,   32,   43,   10,   26,   34,   12},
+    {   21,   13,   62,   11,   21,   33,   14},
+    {   22,    7,   66,   13,   18,   31,   17},
+    {   18,   17,   15,   12,   58,   40,   12},
+    {   22,   19,   15,   18,   50,   42,   12},
+    {   24,   23,   15,   19,   44,   45,   11},
+    {   25,   27,   19,   16,   37,   47,   11},
+    {   24,   26,   27,   13,   31,   48,   12},
+    {   25,   18,   39,   11,   27,   46,   13},
+    {   25,   12,   48,   10,   23,   43,   16},
+    {   25,   11,   51,   11,   20,   40,   19},
+    {   16,   17,   16,   14,   24,   71,   13},
+    {   14,   19,   16,   15,   27,   67,   14},
+    {   14,   20,   17,   15,   26,   66,   13},
+    {   18,   18,   21,   15,   24,   65,   13},
+    {   24,   14,   27,   15,   22,   64,   14},
+    {   27,   12,   31,   13,   20,   61,   16},
+    {   27,   13,   35,   12,   19,   56,   19},
+    {   26,   14,   37,   12,   18,   50,   23},
+    {   15,   16,   16,   17,    8,   68,   31},
+    {   13,   17,   17,   16,    8,   72,   26},
+    {   13,   17,   18,   17,    8,   74,   23},
+    {   16,   14,   21,   17,    8,   73,   22},
+    {   21,   11,   24,   17,    9,   72,   22},
+    {   25,   11,   24,   16,    9,   69,   24},
+    {   26,   14,   25,   15,   10,   64,   26},
+    {   25,   15,   28,   14,   12,   56,   30},
+    {   14,   15,   16,   16,    9,   37,   62},
+    {   14,   15,   17,   18,    4,   52,   51},
+    {   15,   14,   18,   19,    1,   60,   44},
+    {   17,   13,   19,   20,    0,   64,   41},
+    {   20,   11,   20,   19,    1,   63,   40},
+    {   22,   12,   20,   18,    3,   61,   40},
+    {   23,   14,   20,   17,    5,   58,   40},
+    {   23,   15,   22,   16,    8,   52,   42},
+    {   16,   14,   17,   16,   12,   15,   81},
+    {   17,   13,   17,   17,    6,   29,   71},
+    {   18,   13,   18,   19,    3,   38,   64},
+    {   18,   13,   18,   19,    1,   44,   60},
+    {   19,   13,   17,   20,    2,   45,   58},
+    {   20,   14,   17,   19,    3,   45,   56},
+    {   21,   14,   18,   18,    5,   45,   55},
+    {   22,   15,   19,   18,    7,   43,   53}
+  },
+  {
+    {   42,   21,   26,   96,   39,   29,   20},
+    {   42,   32,   28,   49,   66,   35,   21},
+    {   30,   41,   30,   29,   73,   40,   23},
+    {   23,   42,   35,   23,   72,   42,   26},
+    {   25,   34,   44,   22,   68,   46,   26},
+    {   26,   32,   49,   22,   65,   47,   25},
+    {   24,   33,   53,   21,   62,   47,   25},
+    {   22,   30,   62,   21,   57,   47,   25},
+    {   27,   25,   27,   46,   97,   33,   22},
+    {   11,   31,   31,   22,   94,   53,   22},
+    {    0,   29,   35,   16,   89,   60,   26},
+    {    6,   17,   41,   15,   86,   62,   28},
+    {   20,    4,   42,   16,   84,   63,   28},
+    {   26,    8,   35,   17,   84,   61,   28},
+    {   23,   24,   25,   18,   83,   60,   27},
+    {   21,   31,   27,   19,   77,   58,   28},
+    {   16,   22,   27,   14,  107,   48,   24},
+    {   11,   21,   32,   13,   84,   70,   26},
+    {   11,   17,   36,   14,   77,   73,   29},
+    {   13,   15,   35,   15,   77,   71,   31},
+    {   16,   14,   34,   15,   80,   68,   30},
+    {   17,   16,   31,   15,   84,   64,   30},
+    {   19,   19,   29,   16,   85,   61,   29},
+    {   19,   23,   30,   18,   81,   60,   29},
+    {   17,   18,   28,   14,   81,   76,   24},
+    {   15,   17,   31,   15,   71,   77,   33},
+    {   15,   16,   33,   14,   70,   74,   36},
+    {   15,   17,   32,   14,   73,   71,   36},
+    {   15,   18,   32,   13,   77,   67,   36},
+    {   15,   18,   32,   13,   81,   65,   34},
+    {   16,   17,   32,   13,   82,   64,   32},
+    {   18,   19,   32,   15,   78,   63,   32},
+    {   18,   17,   28,   18,   52,   97,   31},
+    {   16,   16,   31,   15,   64,   76,   41},
+    {   14,   17,   31,   14,   66,   72,   43},
+    {   15,   17,   32,   13,   70,   70,   42},
+    {   16,   18,   33,   11,   72,   70,   38},
+    {   16,   18,   33,   11,   74,   71,   35},
+    {   17,   17,   34,   13,   73,   72,   32},
+    {   18,   19,   33,   14,   70,   72,   32},
+    {   17,   19,   26,   16,   39,   87,   55},
+    {   14,   19,   29,   13,   57,   78,   49},
+    {   15,   18,   31,   12,   61,   76,   45},
+    {   15,   18,   32,   13,   63,   76,   42},
+    {   16,   17,   33,   13,   63,   79,   38},
+    {   17,   15,   35,   13,   62,   82,   34},
+    {   18,   15,   36,   13,   62,   82,   33},
+    {   19,   18,   34,   15,   59,   81,   33},
+    {   17,   19,   25,   15,   34,   59,   89},
+    {   14,   19,   29,   13,   47,   80,   56},
+    {   14,   19,   31,   12,   54,   84,   44},
+    {   16,   18,   32,   13,   57,   83,   41},
+    {   17,   17,   33,   14,   55,   85,   38},
+    {   18,   15,   35,   14,   53,   87,   36},
+    {   19,   14,   36,   15,   53,   86,   35},
+    {   20,   17,   36,   17,   52,   81,   37},
+    {   19,   19,   26,   17,   32,   42,  105},
+    {   17,   19,   28,   15,   40,   75,   65},
+    {   16,   19,   30,   14,   45,   84,   49},
+    {   17,   18,   32,   14,   50,   83,   45},
+    {   18,   17,   33,   16,   50,   82,   43},
+    {   19,   16,   35,   16,   49,   84,   40},
+    {   20,   15,   38,   17,   48,   83,   40},
+    {   21,   17,   38,   18,   48,   78,   41}
+  },
+  {
+    {   52,   46,   46,   55,   50,   46,   47},
+    {   65,   45,   45,   42,   52,   47,   47},
+    {   74,   48,   45,   40,   51,   48,   47},
+    {   67,   61,   44,   39,   50,   48,   48},
+    {   52,   74,   45,   39,   48,   48,   49},
+    {   44,   75,   53,   39,   47,   48,   50},
+    {   44,   57,   71,   40,   46,   46,   51},
+    {   50,   31,   92,   42,   45,   41,   55},
+    {   55,   45,   45,   48,   61,   48,   46},
+    {   65,   46,   45,   36,   57,   53,   46},
+    {   72,   50,   44,   31,   54,   54,   48},
+    {   66,   61,   44,   31,   51,   54,   49},
+    {   54,   73,   45,   32,   48,   52,   52},
+    {   45,   76,   52,   33,   46,   49,   55},
+    {   45,   59,   70,   36,   44,   44,   59},
+    {   50,   30,   93,   40,   43,   37,   64},
+    {   54,   46,   45,   39,   66,   53,   46},
+    {   65,   46,   45,   31,   57,   60,   47},
+    {   71,   50,   44,   26,   52,   61,   50},
+    {   67,   60,   44,   26,   48,   58,   53},
+    {   56,   72,   45,   27,   45,   53,   58},
+    {   47,   76,   51,   29,   43,   47,   63},
+    {   46,   59,   69,   33,   41,   39,   69},
+    {   51,   29,   93,   37,   40,   30,   76},
+    {   53,   46,   46,   35,   60,   64,   47},
+    {   64,   47,   45,   27,   53,   65,   50},
+    {   71,   50,   45,   22,   49,   62,   55},
+    {   68,   59,   44,   22,   46,   57,   61},
+    {   58,   71,   45,   24,   43,   50,   67},
+    {   48,   76,   51,   27,   41,   41,   74},
+    {   46,   59,   69,   32,   39,   31,   82},
+    {   51,   30,   93,   36,   38,   21,   88},
+    {   52,   46,   46,   35,   51,   71,   50},
+    {   63,   47,   45,   24,   51,   63,   58},
+    {   70,   51,   45,   19,   49,   56,   65},
+    {   68,   59,   44,   19,   45,   50,   72},
+    {   58,   71,   45,   22,   42,   41,   80},
+    {   48,   76,   51,   26,   39,   31,   88},
+    {   46,   59,   69,   30,   37,   21,   95},
+    {   51,   30,   93,   34,   36,   13,  100},
+    {   52,   46,   46,   35,   47,   65,   61},
+    {   62,   47,   45,   24,   49,   56,   69},
+    {   69,   51,   45,   19,   47,   47,   77},
+    {   68,   60,   44,   19,   44,   38,   85},
+    {   59,   71,   45,   22,   40,   29,   94},
+    {   49,   75,   51,   25,   37,   19,  102},
+    {   47,   59,   69,   28,   35,   11,  108},
+    {   52,   31,   93,   33,   34,    6,  110},
+    {   52,   47,   46,   36,   46,   50,   77},
+    {   61,   48,   45,   26,   46,   42,   84},
+    {   67,   52,   45,   21,   44,   34,   92},
+    {   66,   60,   44,   21,   41,   26,  100},
+    {   59,   70,   46,   22,   38,   17,  108},
+    {   50,   73,   53,   25,   35,   10,  114},
+    {   48,   57,   70,   28,   33,    3,  118},
+    {   52,   31,   92,   32,   32,    1,  118},
+    {   52,   47,   46,   38,   45,   32,   93},
+    {   59,   49,   46,   30,   43,   27,   99},
+    {   65,   53,   46,   25,   41,   21,  106},
+    {   64,   60,   46,   23,   38,   15,  111},
+    {   58,   67,   48,   24,   36,    9,  117},
+    {   51,   68,   56,   25,   34,    4,  121},
+    {   49,   55,   72,   27,   32,    0,  123},
+    {   52,   32,   92,   31,   30,    0,  123}
+  },
+  {
+    {   18,   16,   16,   71,   13,   16,   16},
+    {   36,   14,   16,   63,   18,   15,   17},
+    {   64,   11,   16,   60,   20,   13,   17},
+    {   64,   30,   13,   55,   22,   13,   17},
+    {   33,   63,    9,   50,   25,   12,   18},
+    {   15,   64,   24,   44,   27,   11,   18},
+    {   19,   27,   59,   38,   30,   10,   18},
+    {   24,    0,   81,   32,   31,   12,   18},
+    {   22,   16,   16,   65,   46,   10,   17},
+    {   25,   17,   16,   63,   50,    8,   18},
+    {   35,   16,   16,   58,   54,    6,   19},
+    {   41,   19,   16,   53,   58,    6,   19},
+    {   36,   30,   15,   48,   62,    5,   19},
+    {   28,   35,   20,   43,   63,    6,   19},
+    {   26,   24,   35,   37,   62,    8,   18},
+    {   26,   10,   50,   31,   59,   11,   19},
+    {   18,   17,   16,   22,   81,   10,   18},
+    {   18,   18,   16,   22,   81,   12,   19},
+    {   19,   18,   16,   21,   81,   13,   19},
+    {   22,   16,   17,   20,   80,   15,   18},
+    {   25,   16,   17,   20,   77,   20,   17},
+    {   26,   19,   17,   20,   73,   25,   16},
+    {   25,   19,   20,   20,   68,   29,   16},
+    {   24,   16,   27,   21,   60,   31,   17},
+    {   16,   17,   16,    9,   59,   45,   14},
+    {   16,   17,   16,   10,   55,   49,   14},
+    {   16,   17,   16,   12,   50,   54,   14},
+    {   17,   16,   16,   13,   45,   59,   13},
+    {   19,   15,   16,   15,   39,   63,   13},
+    {   21,   16,   16,   17,   35,   66,   14},
+    {   22,   17,   16,   18,   33,   65,   16},
+    {   22,   17,   19,   18,   33,   59,   21},
+    {   17,   16,   16,   15,   20,   78,   16},
+    {   17,   16,   16,   16,   18,   77,   19},
+    {   17,   16,   15,   17,   15,   77,   22},
+    {   17,   16,   15,   17,   13,   76,   24},
+    {   18,   16,   15,   18,   11,   75,   27},
+    {   18,   16,   15,   19,   11,   71,   31},
+    {   19,   17,   15,   19,   13,   65,   35},
+    {   19,   17,   16,   18,   16,   57,   39},
+    {   17,   16,   16,   18,    9,   61,   42},
+    {   17,   16,   15,   18,   10,   55,   47},
+    {   18,   16,   15,   18,    9,   51,   52},
+    {   18,   17,   15,   18,    9,   47,   56},
+    {   18,   17,   15,   18,   10,   43,   60},
+    {   18,   17,   14,   18,   11,   39,   63},
+    {   18,   18,   14,   17,   12,   36,   65},
+    {   19,   17,   15,   18,   13,   35,   65},
+    {   16,   16,   15,   16,   14,   24,   75},
+    {   17,   17,   15,   17,   14,   21,   78},
+    {   17,   17,   15,   17,   14,   19,   80},
+    {   18,   17,   15,   17,   13,   17,   82},
+    {   18,   18,   14,   17,   13,   16,   84},
+    {   18,   18,   15,   17,   13,   15,   84},
+    {   19,   17,   15,   17,   13,   16,   84},
+    {   19,   17,   16,   17,   14,   18,   81},
+    {   16,   16,   16,   16,   16,    8,   89},
+    {   17,   16,   16,   16,   16,    8,   89},
+    {   18,   17,   15,   16,   15,    8,   90},
+    {   18,   18,   15,   16,   15,    8,   90},
+    {   19,   18,   15,   17,   15,    8,   90},
+    {   19,   18,   15,   17,   15,    7,   90},
+    {   19,   17,   16,   17,   14,    8,   90},
+    {   19,   17,   17,   17,   15,   10,   87}
+  },
+  {
+    {   29,   12,   13,   53,   18,   11,   15},
+    {   56,   12,   13,   24,   32,    7,   16},
+    {   54,   32,   12,   12,   33,   11,   15},
+    {   21,   61,   15,    8,   27,   18,   13},
+    {    5,   54,   36,    8,   20,   22,   13},
+    {   10,   21,   64,    9,   15,   24,   16},
+    {   16,    2,   75,   11,   11,   23,   23},
+    {   16,    2,   68,   12,    9,   19,   33},
+    {   29,   16,   14,   41,   56,    2,   17},
+    {   28,   29,   15,   18,   56,   10,   15},
+    {   14,   42,   21,    7,   42,   24,   13},
+    {    6,   36,   37,    7,   25,   35,   14},
+    {   11,   14,   55,   10,   13,   37,   21},
+    {   16,    0,   61,   11,    8,   33,   31},
+    {   16,    0,   56,   11,    6,   26,   43},
+    {   15,    6,   46,   12,    7,   20,   53},
+    {   13,   22,   16,    9,   73,   17,   14},
+    {    8,   28,   22,    9,   46,   40,   11},
+    {    6,   23,   32,   11,   21,   53,   15},
+    {   11,    9,   43,   13,    9,   50,   27},
+    {   16,    0,   46,   14,    4,   40,   42},
+    {   17,    2,   41,   14,    5,   28,   55},
+    {   15,    7,   34,   13,    6,   20,   65},
+    {   14,   11,   29,   12,    9,   15,   69},
+    {   10,   18,   19,    8,   35,   62,    9},
+    {    9,   16,   25,   15,   13,   67,   18},
+    {   12,    9,   32,   17,    4,   54,   36},
+    {   15,    5,   34,   17,    4,   36,   53},
+    {   16,    4,   31,   15,    7,   22,   67},
+    {   16,    8,   27,   15,    9,   14,   75},
+    {   14,   12,   22,   13,   10,   11,   79},
+    {   12,   14,   21,   12,   12,   10,   78},
+    {   12,   13,   19,   16,    6,   73,   23},
+    {   13,   10,   24,   17,    4,   49,   46},
+    {   14,    7,   26,   17,    7,   27,   65},
+    {   15,    7,   25,   16,   10,   13,   77},
+    {   15,    9,   22,   15,   12,    7,   83},
+    {   14,   12,   19,   14,   13,    5,   85},
+    {   13,   14,   18,   13,   13,    5,   85},
+    {   12,   14,   18,   12,   14,    7,   82},
+    {   14,   12,   19,   16,    7,   40,   56},
+    {   14,    9,   22,   15,   11,   17,   74},
+    {   14,    8,   22,   15,   13,    6,   84},
+    {   14,    9,   21,   15,   14,    2,   87},
+    {   13,   11,   18,   14,   14,    2,   88},
+    {   13,   13,   17,   14,   15,    2,   88},
+    {   12,   14,   16,   13,   16,    3,   87},
+    {   12,   15,   16,   12,   15,    6,   83},
+    {   14,   11,   18,   14,   14,   10,   81},
+    {   14,    9,   19,   14,   15,    3,   87},
+    {   14,   10,   19,   14,   15,    0,   90},
+    {   13,   11,   17,   14,   15,    0,   90},
+    {   13,   12,   17,   14,   15,    1,   89},
+    {   13,   14,   16,   13,   16,    2,   88},
+    {   12,   15,   15,   12,   16,    3,   86},
+    {   12,   14,   16,   13,   16,    6,   83},
+    {   13,   11,   17,   14,   16,    2,   87},
+    {   14,   11,   18,   14,   16,    1,   88},
+    {   14,   11,   17,   14,   15,    2,   88},
+    {   13,   13,   17,   14,   15,    3,   87},
+    {   13,   13,   17,   14,   15,    3,   86},
+    {   13,   14,   16,   13,   16,    4,   85},
+    {   12,   15,   16,   12,   16,    5,   83},
+    {   12,   15,   16,   13,   15,    7,   81}
+  },
+  {
+    {   11,   11,   11,   53,    9,   12,   11},
+    {   18,   10,   10,   32,   13,   10,   11},
+    {   40,    8,   11,   20,   13,   11,   11},
+    {   56,   14,    9,   16,   13,   12,   10},
+    {   47,   37,    6,   13,   12,   12,   10},
+    {   23,   60,   10,   12,   13,   11,   11},
+    {   10,   50,   33,   11,   12,   11,   11},
+    {   13,   15,   66,   12,   12,   11,   11},
+    {   15,   11,   11,   62,   26,    9,   10},
+    {   13,   12,   10,   57,   17,   11,   10},
+    {   17,   12,   10,   43,   15,   11,   10},
+    {   34,    8,   11,   30,   14,   12,   10},
+    {   49,   12,   10,   22,   13,   12,   10},
+    {   45,   31,    7,   17,   13,   12,   10},
+    {   28,   49,   11,   14,   13,   12,   10},
+    {   17,   41,   31,   13,   13,   11,   10},
+    {   14,   11,   11,   33,   59,    6,   11},
+    {   13,   11,   11,   48,   42,    8,   10},
+    {   12,   12,   11,   51,   30,    9,   10},
+    {   18,   10,   11,   45,   22,   11,   10},
+    {   31,    8,   11,   37,   17,   12,   10},
+    {   41,   13,   10,   28,   15,   12,    9},
+    {   39,   27,    8,   21,   15,   12,   10},
+    {   29,   36,   15,   17,   15,   12,   10},
+    {   11,   12,   11,   11,   63,   26,    8},
+    {   12,   11,   11,   23,   61,   16,   10},
+    {   12,   12,   11,   34,   53,   10,   10},
+    {   13,   12,   11,   41,   43,    9,   10},
+    {   18,   10,   11,   42,   33,    9,   10},
+    {   27,    9,   11,   37,   26,   10,   10},
+    {   33,   14,   10,   31,   22,   11,   10},
+    {   32,   23,   12,   24,   20,   11,   10},
+    {   11,   12,   11,   10,   32,   61,    5},
+    {   11,   12,   11,   11,   49,   42,    6},
+    {   11,   12,   11,   17,   58,   26,    8},
+    {   11,   12,   11,   25,   58,   16,    9},
+    {   13,   11,   11,   32,   52,   11,    9},
+    {   17,   10,   11,   35,   43,   10,   10},
+    {   23,   11,   10,   34,   35,   10,   10},
+    {   27,   15,   11,   29,   29,   12,    9},
+    {   11,   12,   11,   13,    9,   70,   17},
+    {   11,   11,   11,   12,   23,   65,   10},
+    {   11,   12,   10,   12,   38,   53,    6},
+    {   11,   12,   10,   14,   51,   37,    7},
+    {   11,   11,   11,   20,   56,   24,    8},
+    {   12,   11,   11,   26,   53,   17,    9},
+    {   16,   11,   11,   30,   47,   14,    9},
+    {   20,   13,   11,   30,   38,   14,   10},
+    {   11,   11,   10,   13,    9,   35,   52},
+    {   11,   11,   10,   13,   11,   51,   34},
+    {   11,   11,   10,   13,   20,   57,   20},
+    {   11,   12,   10,   13,   33,   52,   12},
+    {   10,   12,   10,   14,   44,   42,   10},
+    {   11,   12,   10,   18,   49,   32,    9},
+    {   12,   12,   11,   23,   49,   24,   10},
+    {   15,   13,   11,   25,   43,   21,   10},
+    {   11,   11,   11,   11,   16,    0,   81},
+    {   10,   12,   10,   12,   16,   16,   65},
+    {   11,   11,   10,   13,   17,   33,   47},
+    {   11,   12,   10,   13,   22,   43,   31},
+    {   11,   12,   10,   13,   29,   45,   21},
+    {   11,   12,   11,   15,   37,   40,   16},
+    {   11,   12,   11,   18,   41,   34,   14},
+    {   12,   13,   12,   20,   40,   28,   14}
+  }
+};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipOffsetMatrix16x16[6]) =
+{   15,   19,   46,   16,   14,   11};
+
+ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const uint8_t mipShiftMatrix16x16[6]) =
+{    6,    7,    5,    6,    6,    6};
diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h
index 020323575ff4e0d2cb70e25c9e827457aa7a29cb..20059c29ce0e150cb974be3494775170ce0383b1 100644
--- a/source/Lib/CommonLib/MotionInfo.h
+++ b/source/Lib/CommonLib/MotionInfo.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -103,14 +103,15 @@ struct MotionInfo
   bool     isInter;
   bool     isIBCmot;
   char     interDir;
+  bool     useAltHpelIf;
   uint16_t   sliceIdx;
   Mv      mv     [ NUM_REF_PIC_LIST_01 ];
   int16_t   refIdx [ NUM_REF_PIC_LIST_01 ];
-  uint8_t         GBiIdx;
+  uint8_t         BcwIdx;
   Mv      bv;
-  MotionInfo() : isInter(false), isIBCmot(false), interDir(0), sliceIdx(0), refIdx{ NOT_VALID, NOT_VALID }, GBiIdx(0) { }
+  MotionInfo() : isInter(false), isIBCmot(false), interDir(0), useAltHpelIf(false), sliceIdx(0), refIdx{ NOT_VALID, NOT_VALID }, BcwIdx(0) { }
   // ensure that MotionInfo(0) produces '\x000....' bit pattern - needed to work with AreaBuf - don't use this constructor for anything else
-  MotionInfo(int i) : isInter(i != 0), isIBCmot(false), interDir(0), sliceIdx(0), refIdx{ 0,         0 }, GBiIdx(0) { CHECKD(i != 0, "The argument for this constructor has to be '0'"); }
+  MotionInfo(int i) : isInter(i != 0), isIBCmot(false), interDir(0), useAltHpelIf(false), sliceIdx(0), refIdx{ 0,         0 }, BcwIdx(0) { CHECKD(i != 0, "The argument for this constructor has to be '0'"); }
 
   bool operator==( const MotionInfo& mi ) const
   {
@@ -143,7 +144,7 @@ struct MotionInfo
   }
 };
 
-class GBiMotionParam
+class BcwMotionParam
 {
   bool       m_readOnly[2][33];       // 2 RefLists, 33 RefFrams
   Mv         m_mv[2][33];
@@ -221,7 +222,10 @@ struct LutMotionCand
 {
   static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lut;
   static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutIbc;
-  static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutShare;
-  static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutShareIbc;
+};
+struct PatentBvCand
+{
+  Mv m_bvCands[IBC_NUM_CANDIDATES];
+  int currCnt;
 };
 #endif // __MOTIONINFO__
diff --git a/source/Lib/CommonLib/Mv.cpp b/source/Lib/CommonLib/Mv.cpp
index 732e756b5ff21ed995286ef3344c52670bdc348a..386d0874680f79432ae174c44271c0a8cc11491b 100644
--- a/source/Lib/CommonLib/Mv.cpp
+++ b/source/Lib/CommonLib/Mv.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,45 +40,64 @@
 #include "Common.h"
 #include "Slice.h"
 
-const MvPrecision Mv::m_amvrPrecision[3] = { MV_PRECISION_QUARTER, MV_PRECISION_INT, MV_PRECISION_4PEL }; // for cu.imv=0, 1 and 2
+const MvPrecision Mv::m_amvrPrecision[4] = { MV_PRECISION_QUARTER, MV_PRECISION_INT, MV_PRECISION_4PEL, MV_PRECISION_HALF }; // for cu.imv=0, 1, 2 and 3
+const MvPrecision Mv::m_amvrPrecAffine[3] = { MV_PRECISION_QUARTER, MV_PRECISION_SIXTEENTH, MV_PRECISION_INT }; // for cu.imv=0, 1 and 2
+const MvPrecision Mv::m_amvrPrecIbc[3] = { MV_PRECISION_INT, MV_PRECISION_INT, MV_PRECISION_4PEL }; // for cu.imv=0, 1 and 2
 
 void roundAffineMv( int& mvx, int& mvy, int nShift )
 {
   const int nOffset = 1 << (nShift - 1);
-  mvx = mvx >= 0 ? (mvx + nOffset) >> nShift : -((-mvx + nOffset) >> nShift);
-  mvy = mvy >= 0 ? (mvy + nOffset) >> nShift : -((-mvy + nOffset) >> nShift);
+  mvx = (mvx + nOffset - (mvx >= 0)) >> nShift;
+  mvy = (mvy + nOffset - (mvy >= 0)) >> nShift;
 }
 
-void clipMv( Mv& rcMv, const Position& pos,
-             const struct Size& size,
-             const SPS& sps )
+void clipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps, const PPS& pps )
 {
+  if (sps.getWrapAroundEnabledFlag())
+  {
+    wrapClipMv(rcMv, pos, size, &sps, &pps);
+    return;
+  }
+
   int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
   int iOffset = 8;
-  int iHorMax = ( sps.getPicWidthInLumaSamples() + iOffset - ( int ) pos.x - 1 ) << iMvShift;
+  int iHorMax = ( pps.getPicWidthInLumaSamples() + iOffset - (int)pos.x - 1 ) << iMvShift;
   int iHorMin = ( -( int ) sps.getMaxCUWidth()   - iOffset - ( int ) pos.x + 1 ) << iMvShift;
 
-  int iVerMax = ( sps.getPicHeightInLumaSamples() + iOffset - ( int ) pos.y - 1 ) << iMvShift;
+  int iVerMax = ( pps.getPicHeightInLumaSamples() + iOffset - (int)pos.y - 1 ) << iMvShift;
   int iVerMin = ( -( int ) sps.getMaxCUHeight()   - iOffset - ( int ) pos.y + 1 ) << iMvShift;
 
-  if( sps.getWrapAroundEnabledFlag() )
+  rcMv.setHor( std::min( iHorMax, std::max( iHorMin, rcMv.getHor() ) ) );
+  rcMv.setVer( std::min( iVerMax, std::max( iVerMin, rcMv.getVer() ) ) );
+}
+
+bool wrapClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS *sps, const PPS *pps )
+{
+  bool wrapRef = true;
+  int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
+  int iOffset = 8;
+  int iHorMax = ( pps->getPicWidthInLumaSamples() + sps->getMaxCUWidth() - size.width + iOffset - (int)pos.x - 1 ) << iMvShift;
+  int iHorMin = ( -( int ) sps->getMaxCUWidth()                                      - iOffset - ( int ) pos.x + 1 ) << iMvShift;
+  int iVerMax = ( pps->getPicHeightInLumaSamples() + iOffset - (int)pos.y - 1 ) << iMvShift;
+  int iVerMin = ( -( int ) sps->getMaxCUHeight()   - iOffset - ( int ) pos.y + 1 ) << iMvShift;
+  int mvX = rcMv.getHor();
+
+  if(mvX > iHorMax)
   {
-    int iHorMax = ( sps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + iOffset - ( int ) pos.x - 1 ) << iMvShift;
-    int iHorMin = ( -( int ) sps.getMaxCUWidth()                                      - iOffset - ( int ) pos.x + 1 ) << iMvShift;
-    int mvX = rcMv.getHor();
-    while( mvX > iHorMax ) {
-      mvX -= ( sps.getWrapAroundOffset() << iMvShift );
-    }
-    while( mvX < iHorMin ) {
-      mvX += ( sps.getWrapAroundOffset() << iMvShift );
-    }
-    rcMv.setHor( mvX );
-    rcMv.setVer( std::min( iVerMax, std::max( iVerMin, rcMv.getVer() ) ) );
-    return;
+    mvX -= ( sps->getWrapAroundOffset() << iMvShift );
+    mvX = std::min( iHorMax, std::max( iHorMin, mvX ) );
+    wrapRef = false;
+  }
+  if(mvX < iHorMin)
+  {
+    mvX += ( sps->getWrapAroundOffset() << iMvShift );
+    mvX = std::min( iHorMax, std::max( iHorMin, mvX ) );
+    wrapRef = false;
   }
 
-  rcMv.setHor( std::min( iHorMax, std::max( iHorMin, rcMv.getHor() ) ) );
+  rcMv.setHor( mvX );
   rcMv.setVer( std::min( iVerMax, std::max( iVerMin, rcMv.getVer() ) ) );
+  return wrapRef;
 }
 
 //! \}
diff --git a/source/Lib/CommonLib/Mv.h b/source/Lib/CommonLib/Mv.h
index 51d08d6822ae026c4a8f1725d8b66824cb6e2fdb..e06db56a94c67556308ff24f48bc10e77eb4f55c 100644
--- a/source/Lib/CommonLib/Mv.h
+++ b/source/Lib/CommonLib/Mv.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -53,14 +53,20 @@ enum MvPrecision
   MV_PRECISION_INT      = 2,      // 1-pel, shift 2 bits from 4-pel
   MV_PRECISION_HALF     = 3,      // 1/2-pel
   MV_PRECISION_QUARTER  = 4,      // 1/4-pel (the precision of regular MV difference signaling), shift 4 bits from 4-pel
-  MV_PRECISION_INTERNAL = 6,      // 1/16-pel (the precision of internal MV), shift 6 bits from 4-pel
+  MV_PRECISION_SIXTEENTH = 6,     // 1/16-pel (the precision of internal MV), shift 6 bits from 4-pel
+  MV_PRECISION_INTERNAL = 2 + MV_FRACTIONAL_BITS_INTERNAL,
 };
 
 /// basic motion vector class
 class Mv
 {
 private:
-  static const MvPrecision m_amvrPrecision[3];
+  static const MvPrecision m_amvrPrecision[4];
+  static const MvPrecision m_amvrPrecAffine[3];
+  static const MvPrecision m_amvrPrecIbc[3];
+
+  static const int mvClipPeriod = (1 << MV_BITS);
+  static const int halMvClipPeriod = (1 << (MV_BITS - 1));
 
 public:
   int   hor;     ///< horizontal component of motion vector
@@ -121,13 +127,12 @@ public:
   //! shift right with rounding
   void divideByPowerOf2 (const int i)
   {
-#if ME_ENABLE_ROUNDING_OF_MVS
-    const int offset = (i == 0) ? 0 : 1 << (i - 1);
-    hor += offset;
-    ver += offset;
-#endif
-    hor >>= i;
-    ver >>= i;
+    if (i != 0)
+    {
+      const int offset = (1 << (i - 1));
+      hor = (hor + offset - (hor >= 0)) >> i;
+      ver = (ver + offset - (ver >= 0)) >> i;
+    }
   }
 
   const Mv& operator<<= (const int i)
@@ -139,8 +144,12 @@ public:
 
   const Mv& operator>>= ( const int i )
   {
-    hor >>= i;
-    ver >>= i;
+    if (i != 0)
+    {
+      const int offset = (1 << (i - 1));
+      hor = (hor + offset - (hor >= 0)) >> i;
+      ver = (ver + offset - (ver >= 0)) >> i;
+    }
     return  *this;
   }
 
@@ -166,8 +175,8 @@ public:
 
   const Mv scaleMv( int iScale ) const
   {
-    const int mvx = Clip3( -131072, 131071, (iScale * getHor() + 127 + (iScale * getHor() < 0)) >> 8 );
-    const int mvy = Clip3( -131072, 131071, (iScale * getVer() + 127 + (iScale * getVer() < 0)) >> 8 );
+    const int mvx = Clip3(MV_MIN, MV_MAX, (iScale * getHor() + 128 - (iScale * getHor() >= 0)) >> 8);
+    const int mvy = Clip3(MV_MIN, MV_MAX, (iScale * getVer() + 128 - (iScale * getVer() >= 0)) >> 8);
     return Mv( mvx, mvy );
   }
 
@@ -182,27 +191,66 @@ public:
     {
       const int rightShift = -shift;
       const int nOffset = 1 << (rightShift - 1);
-      hor = hor >= 0 ? (hor + nOffset) >> rightShift : -((-hor + nOffset) >> rightShift);
-      ver = ver >= 0 ? (ver + nOffset) >> rightShift : -((-ver + nOffset) >> rightShift);
+      hor = hor >= 0 ? (hor + nOffset - 1) >> rightShift : (hor + nOffset) >> rightShift;
+      ver = ver >= 0 ? (ver + nOffset - 1) >> rightShift : (ver + nOffset) >> rightShift;
     }
   }
 
-  void changePrecisionAmvr(const int amvr, const MvPrecision& dst)
-  {
-    changePrecision(m_amvrPrecision[amvr], dst);
-  }
-
   void roundToPrecision(const MvPrecision& src, const MvPrecision& dst)
   {
     changePrecision(src, dst);
     changePrecision(dst, src);
   }
 
-  void roundToAmvrSignalPrecision(const MvPrecision& src, const int amvr)
+  // translational MV
+  void changeTransPrecInternal2Amvr(const int amvr)
+  {
+    changePrecision(MV_PRECISION_INTERNAL, m_amvrPrecision[amvr]);
+  }
+
+  void changeTransPrecAmvr2Internal(const int amvr)
+  {
+    changePrecision(m_amvrPrecision[amvr], MV_PRECISION_INTERNAL);
+  }
+
+  void roundTransPrecInternal2Amvr(const int amvr)
   {
-    roundToPrecision(src, m_amvrPrecision[amvr]);
+    roundToPrecision(MV_PRECISION_INTERNAL, m_amvrPrecision[amvr]);
   }
 
+  // affine MV
+  void changeAffinePrecInternal2Amvr(const int amvr)
+  {
+    changePrecision(MV_PRECISION_INTERNAL, m_amvrPrecAffine[amvr]);
+  }
+
+  void changeAffinePrecAmvr2Internal(const int amvr)
+  {
+    changePrecision(m_amvrPrecAffine[amvr], MV_PRECISION_INTERNAL);
+  }
+
+  void roundAffinePrecInternal2Amvr(const int amvr)
+  {
+    roundToPrecision(MV_PRECISION_INTERNAL, m_amvrPrecAffine[amvr]);
+  }
+
+  // IBC block vector
+  void changeIbcPrecInternal2Amvr(const int amvr)
+  {
+    changePrecision(MV_PRECISION_INTERNAL, m_amvrPrecIbc[amvr]);
+  }
+
+  void changeIbcPrecAmvr2Internal(const int amvr)
+  {
+    changePrecision(m_amvrPrecIbc[amvr], MV_PRECISION_INTERNAL);
+  }
+
+  void roundIbcPrecInternal2Amvr(const int amvr)
+  {
+    roundToPrecision(MV_PRECISION_INTERNAL, m_amvrPrecIbc[amvr]);
+  }
+
+
   Mv getSymmvdMv(const Mv& curMvPred, const Mv& tarMvPred)
   {
     return Mv(tarMvPred.hor - hor + curMvPred.hor, tarMvPred.ver - ver + curMvPred.ver);
@@ -213,6 +261,13 @@ public:
     hor = Clip3( -(1 << 17), (1 << 17) - 1, hor );
     ver = Clip3( -(1 << 17), (1 << 17) - 1, ver );
   }
+  void mvCliptoStorageBitDepth()  // periodic clipping
+  {
+    hor = (hor + mvClipPeriod) & (mvClipPeriod - 1);
+    hor = (hor >= halMvClipPeriod) ? (hor - mvClipPeriod) : hor;
+    ver = (ver + mvClipPeriod) & (mvClipPeriod - 1);
+    ver = (ver >= halMvClipPeriod) ? (ver - mvClipPeriod) : ver;
+  }
 };// END CLASS DEFINITION MV
 
 namespace std
@@ -228,7 +283,15 @@ namespace std
 };
 void clipMv ( Mv& rcMv, const struct Position& pos,
               const struct Size& size,
-              const class SPS& sps );
+              const class SPS& sps
+            , const class PPS& pps
+);
+
+bool wrapClipMv( Mv& rcMv, const Position& pos,
+                 const struct Size& size,
+                 const SPS *sps
+               , const PPS* pps
+);
 
 void roundAffineMv( int& mvx, int& mvy, int nShift );
 
diff --git a/source/Lib/CommonLib/NAL.h b/source/Lib/CommonLib/NAL.h
index 57f98f7812e751f783c99c28c97d7bfbebfa5374..9e167bc790e85c3c2fc1cf49af324a887f54442c 100644
--- a/source/Lib/CommonLib/NAL.h
+++ b/source/Lib/CommonLib/NAL.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,20 +50,32 @@ struct NALUnit
   NalUnitType m_nalUnitType; ///< nal_unit_type
   uint32_t        m_temporalId;  ///< temporal_id
   uint32_t        m_nuhLayerId;  ///< nuh_layer_id
+  uint32_t        m_forbiddenZeroBit;
+  uint32_t        m_nuhReservedZeroBit;
 
   NALUnit(const NALUnit &src)
   :m_nalUnitType (src.m_nalUnitType)
   ,m_temporalId  (src.m_temporalId)
   ,m_nuhLayerId  (src.m_nuhLayerId)
+  , m_forbiddenZeroBit(src.m_forbiddenZeroBit)
+  , m_nuhReservedZeroBit(src.m_nuhReservedZeroBit)
   { }
   /** construct an NALunit structure with given header values. */
   NALUnit(
     NalUnitType nalUnitType,
     int         temporalId = 0,
+    uint32_t nuhReservedZeroBit = 0,
+    uint32_t forbiddenZeroBit = 0,
     int         nuhLayerId = 0)
     :m_nalUnitType (nalUnitType)
     ,m_temporalId  (temporalId)
     ,m_nuhLayerId  (nuhLayerId)
+#if JVET_O0179_PROPOSALB
+    , m_forbiddenZeroBit(forbiddenZeroBit)
+    , m_nuhReservedZeroBit(nuhReservedZeroBit)
+#endif
+
+
   {}
 
   /** default constructor - no initialization; must be performed by user */
@@ -74,32 +86,14 @@ struct NALUnit
   /** returns true if the NALunit is a slice NALunit */
   bool isSlice()
   {
-#if JVET_M0101_HLS
     return m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL
         || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA
         || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
         || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
         || m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR
         || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL
         || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL;
-#else
-    return m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL_R
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL_N
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_R
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_R
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_N
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N
-        || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R;
-#endif
   }
   bool isSei()
   {
@@ -109,7 +103,14 @@ struct NALUnit
 
   bool isVcl()
   {
-    return ( (uint32_t)m_nalUnitType < 32 );
+    return m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
+        || m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR;
   }
 };
 
@@ -148,6 +149,7 @@ struct NALUnitEBSP : public NALUnit
 class AccessUnit : public std::list<NALUnitEBSP*> // NOTE: Should not inherit from STL.
 {
 public:
+  int temporalId;
   ~AccessUnit()
   {
     for (AccessUnit::iterator it = this->begin(); it != this->end(); it++)
diff --git a/source/Lib/CommonLib/PicYuvMD5.cpp b/source/Lib/CommonLib/PicYuvMD5.cpp
index 323b3d888b6c6cada8bfc0479a781f6a32a08d62..febcddaa654c5577514d2f4fb339d8981e30a4c0 100644
--- a/source/Lib/CommonLib/PicYuvMD5.cpp
+++ b/source/Lib/CommonLib/PicYuvMD5.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index 3736daac0c35d20f22b3ef186b867dce329e2888..387ec60c54ce03b303447138e5c0345850268eed 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -38,93 +38,10 @@
 #include "Picture.h"
 #include "SEI.h"
 #include "ChromaFormat.h"
-#if ENABLE_WPP_PARALLELISM
-#if ENABLE_WPP_STATIC_LINK
-#include <atomic>
-#else
-#include <condition_variable>
-#endif
-#endif
-
-
-#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
-#if ENABLE_WPP_PARALLELISM
-#if ENABLE_WPP_STATIC_LINK
-class SyncObj
-{
-public:
-  SyncObj() : m_Val(-1) {}
-  ~SyncObj()            {}
-
-  void reset()
-  {
-    m_Val = -1;
-  }
-
-  bool isReady( int64_t val ) const
-  {
-//    std::cout << "is ready m_Val " << m_Val << " val " << val << std::endl;
-    return m_Val >= val;
-  }
+#include "CommonLib/InterpolationFilter.h"
 
-  void wait( int64_t idx, int ctuPosY  )
-  {
-    while( ! isReady( idx ) )
-    {
-    }
-  }
-
-  void set( int64_t val, int ctuPosY)
-  {
-    m_Val = val;
-  }
-
-private:
-  std::atomic<int>         m_Val;
-};
-#else
-class SyncObj
-{
-public:
-  SyncObj() : m_Val(-1) {}
-  ~SyncObj()            {}
 
-  void reset()
-  {
-    std::unique_lock< std::mutex > lock( m_mutex );
-
-    m_Val = -1;
-  }
-
-  bool isReady( int64_t val ) const
-  {
-    return m_Val >= val;
-  }
-
-  void wait( int64_t idx, int ctuPosY  )
-  {
-    std::unique_lock< std::mutex > lock( m_mutex );
-
-    while( ! isReady( idx ) )
-    {
-      m_cv.wait( lock );
-    }
-  }
-
-  void set( int64_t val, int ctuPosY)
-  {
-    std::unique_lock< std::mutex > lock( m_mutex );
-    m_Val = val;
-    m_cv.notify_all();
-  }
-
-private:
-  int64_t                 m_Val;
-  std::condition_variable m_cv;
-  std::mutex              m_mutex;
-};
-#endif
-#endif
+#if ENABLE_SPLIT_PARALLELISM
 
 int g_wppThreadId( 0 );
 #pragma omp threadprivate(g_wppThreadId)
@@ -138,13 +55,6 @@ int g_splitJobId( 0 );
 #endif
 
 Scheduler::Scheduler() :
-#if ENABLE_WPP_PARALLELISM
-  m_numWppThreads( 1 ),
-  m_numWppDataInstances( 1 )
-#endif
-#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM
-  ,
-#endif
 #if ENABLE_SPLIT_PARALLELISM
   m_numSplitThreads( 1 )
 #endif
@@ -153,13 +63,6 @@ Scheduler::Scheduler() :
 
 Scheduler::~Scheduler()
 {
-#if ENABLE_WPP_PARALLELISM
-  for( auto & so : m_SyncObjs )
-  {
-    delete so;
-  }
-  m_SyncObjs.clear();
-#endif
 }
 
 #if ENABLE_SPLIT_PARALLELISM
@@ -227,37 +130,6 @@ void Scheduler::setSplitThreadId( const int tId )
 #endif
 
 
-#if ENABLE_WPP_PARALLELISM
-unsigned Scheduler::getWppDataId( int lID ) const
-{
-  const int tId = lID == CURR_THREAD_ID ? g_wppThreadId : lID;
-
-#if ENABLE_SPLIT_PARALLELISM
-  if( m_numSplitThreads > 1 )
-  {
-    return tId * NUM_RESERVERD_SPLIT_JOBS;
-  }
-  else
-  {
-    return tId;
-  }
-#else
-  return tId;
-#endif
-}
-
-unsigned Scheduler::getWppThreadId() const
-{
-  return g_wppThreadId;
-}
-
-void Scheduler::setWppThreadId( const int tId )
-{
-  g_wppThreadId = tId == CURR_THREAD_ID ? omp_get_thread_num() : tId;
-
-  CHECK( g_wppThreadId >= PARL_WPP_MAX_NUM_THREADS, "The WPP thread ID " << g_wppThreadId << " is invalid!" );
-}
-#endif
 
 unsigned Scheduler::getDataId() const
 {
@@ -266,12 +138,6 @@ unsigned Scheduler::getDataId() const
   {
     return getSplitDataId();
   }
-#endif
-#if ENABLE_WPP_PARALLELISM
-  if( m_numWppThreads > 1 )
-  {
-    return getWppDataId();
-  }
 #endif
   return 0;
 }
@@ -281,44 +147,6 @@ bool Scheduler::init( const int ctuYsize, const int ctuXsize, const int numWppTh
 #if ENABLE_SPLIT_PARALLELISM
   m_numSplitThreads = numSplitThreads;
 #endif
-#if ENABLE_WPP_PARALLELISM
-  m_firstNonFinishedLine    = 0;
-  m_numWppThreadsRunning    = 1;
-  m_numWppDataInstances     = numWppThreadsRunning+numWppExtraLines;
-  m_numWppThreads           = numWppThreadsRunning;
-  m_ctuYsize                = ctuYsize;
-  m_ctuXsize                = ctuXsize;
-
-  if( m_SyncObjs.size() == 0 )
-  {
-    m_SyncObjs.reserve( ctuYsize );
-    for( int i = (int)m_SyncObjs.size(); i < ctuYsize; i++ )
-    {
-      m_SyncObjs.push_back( new SyncObj );
-    }
-  }
-  else
-  {
-    CHECK( m_SyncObjs.size() != ctuYsize, "");
-  }
-
-  for( int i = 0; i < ctuYsize; i++ )
-  {
-    m_SyncObjs[i]->reset();
-  }
-
-  if( m_numWppThreads != m_numWppDataInstances )
-  {
-    m_LineDone.clear();
-    m_LineDone.resize(ctuYsize, -1);
-
-    m_LineProc.clear();
-    m_LineProc.resize(ctuYsize, false);
-
-    m_SyncObjs[0]->set(0,0);
-    m_LineProc[0]=true;
-  }
-#endif
 
   return true;
 }
@@ -328,107 +156,11 @@ int Scheduler::getNumPicInstances() const
 {
 #if !ENABLE_SPLIT_PARALLELISM
   return 1;
-#elif !ENABLE_WPP_PARALLELISM
-  return ( m_numSplitThreads > 1 ? m_numSplitThreads : 1 );
 #else
-  return m_numSplitThreads > 1 ? m_numWppDataInstances * m_numSplitThreads : 1;
+  return ( m_numSplitThreads > 1 ? m_numSplitThreads : 1 );
 #endif
 }
 
-#if ENABLE_WPP_PARALLELISM
-void Scheduler::wait( const int ctuPosX, const int ctuPosY )
-{
-  if( m_numWppThreads == m_numWppDataInstances )
-  {
-    if( ctuPosY > 0 && ctuPosX+1 < m_ctuXsize)
-    {
-      m_SyncObjs[ctuPosY-1]->wait( ctuPosX+1, ctuPosY-1 );
-    }
-    return;
-  }
-
-  m_SyncObjs[ctuPosY]->wait( ctuPosX, ctuPosY );
-}
-
-void Scheduler::setReady(const int ctuPosX, const int ctuPosY)
-{
-  if( m_numWppThreads == m_numWppDataInstances )
-  {
-    m_SyncObjs[ctuPosY]->set( ctuPosX, ctuPosY);
-    return;
-  }
-
-  std::unique_lock< std::mutex > lock( m_mutex );
-
-  if( ctuPosX+1 == m_ctuXsize )
-  {
-    m_LineProc[ctuPosY] = true; //prevent line from be further evaluated
-    m_LineDone[ctuPosY] = std::numeric_limits<int>::max();
-    m_firstNonFinishedLine = ctuPosY+1;
-  }
-  else
-  {
-    m_LineDone[ctuPosY] = ctuPosX;
-    m_LineProc[ctuPosY] = false;    // mark currently not processed
-  }
-
-  int lastLine = m_firstNonFinishedLine + m_numWppDataInstances;
-  lastLine = std::min( m_ctuYsize, lastLine )-1-m_firstNonFinishedLine;
-
-  m_numWppThreadsRunning--;
-
-  Position pos;
-  //if the current encoder is the last
-  const bool c1 = (ctuPosY == m_firstNonFinishedLine + m_numWppThreads - 1);
-  const bool c2 = (ctuPosY+1 <= m_firstNonFinishedLine+lastLine);
-  const bool c3 = (ctuPosX >= m_ctuXsize/4);
-  if( c1 && c2 && c3 && getNextCtu( pos, ctuPosY+1, 4 ) )
-  {
-    //  try to continue in the next row
-    // go on in the current line
-    m_SyncObjs[pos.y]->set(pos.x, pos.y);
-    m_numWppThreadsRunning++;
-  }
-  else if( getNextCtu( pos, ctuPosY, 1 ) )
-  {
-    //  try to continue in the same row
-    // go on in the current line
-    m_SyncObjs[pos.y]->set(pos.x, pos.y);
-    m_numWppThreadsRunning++;
-  }
-  for( int i = m_numWppThreadsRunning; i < m_numWppThreads; i++ )
-  {
-   // just go and get a job
-    for( int y = 0; y <= lastLine; y++ )
-    {
-      if( getNextCtu( pos, m_firstNonFinishedLine+y, 1 ))
-      {
-        m_SyncObjs[pos.y]->set(pos.x, pos.y);
-        m_numWppThreadsRunning++;
-        break;
-      }
-    }
-  }
-}
-
-
-bool Scheduler::getNextCtu( Position& pos, int ctuLine, int offset)
-{
-  int x = m_LineDone[ctuLine] + 1;
-  if( ! m_LineProc[ctuLine] )
-  {
-    int maxXOffset = x+offset >= m_ctuXsize ? m_ctuXsize-1 : x+offset;
-    if( (ctuLine == 0 || m_LineDone[ctuLine-1]>=maxXOffset) && (x==0 || m_LineDone[ctuLine]>=+x-1))
-    {
-      m_LineProc[ctuLine] = true;
-      pos.x = x; pos.y = ctuLine;
-      return true;
-    }
-  }
-  return false;
-}
-
-#endif
 #endif
 
 
@@ -436,288 +168,10 @@ bool Scheduler::getNextCtu( Position& pos, int ctuLine, int offset)
 // picture methods
 // ---------------------------------------------------------------------------
 
-#if HEVC_TILES_WPP
-
-Tile::Tile()
-: m_tileWidthInCtus     (0)
-, m_tileHeightInCtus    (0)
-, m_rightEdgePosInCtus  (0)
-, m_bottomEdgePosInCtus (0)
-, m_firstCtuRsAddr      (0)
-{
-}
-
-Tile::~Tile()
-{
-}
-
-
-TileMap::TileMap()
-  : pcv(nullptr)
-  , tiles(0)
-  , numTiles(0)
-  , numTileColumns(0)
-  , numTileRows(0)
-  , tileIdxMap(nullptr)
-  , ctuTsToRsAddrMap(nullptr)
-  , ctuRsToTsAddrMap(nullptr)
-{
-}
-
-void TileMap::create( const SPS& sps, const PPS& pps )
-{
-  pcv = pps.pcv;
-
-  numTileColumns = pps.getNumTileColumnsMinus1() + 1;
-  numTileRows    = pps.getNumTileRowsMinus1() + 1;
-  numTiles       = numTileColumns * numTileRows;
-  tiles.resize( numTiles );
-
-  const uint32_t numCtusInFrame = pcv->sizeInCtus;
-  tileIdxMap       = new uint32_t[numCtusInFrame];
-  ctuTsToRsAddrMap = new uint32_t[numCtusInFrame+1];
-  ctuRsToTsAddrMap = new uint32_t[numCtusInFrame+1];
-
-  initTileMap( sps, pps );
-  initCtuTsRsAddrMap();
-}
-
-void TileMap::destroy()
-{
-  tiles.clear();
-
-  if ( tileIdxMap )
-  {
-    delete[] tileIdxMap;
-    tileIdxMap = nullptr;
-  }
-
-  if ( ctuTsToRsAddrMap )
-  {
-    delete[] ctuTsToRsAddrMap;
-    ctuTsToRsAddrMap = nullptr;
-  }
-
-  if ( ctuRsToTsAddrMap )
-  {
-    delete[] ctuRsToTsAddrMap;
-    ctuRsToTsAddrMap = nullptr;
-  }
-}
-
-void TileMap::initTileMap( const SPS& sps, const PPS& pps )
-{
-  const uint32_t frameWidthInCtus  = pcv->widthInCtus;
-  const uint32_t frameHeightInCtus = pcv->heightInCtus;
-
-  if( pps.getTileUniformSpacingFlag() )
-  {
-    //set width and height for each (uniform) tile
-    for(int row=0; row < numTileRows; row++)
-    {
-      for(int col=0; col < numTileColumns; col++)
-      {
-        const int tileIdx = row * numTileColumns + col;
-        tiles[tileIdx].setTileWidthInCtus(  (col+1)*frameWidthInCtus/numTileColumns - (col*frameWidthInCtus)/numTileColumns );
-        tiles[tileIdx].setTileHeightInCtus( (row+1)*frameHeightInCtus/numTileRows   - (row*frameHeightInCtus)/numTileRows );
-      }
-    }
-  }
-  else
-  {
-    //set the width for each tile
-    for(int row=0; row < numTileRows; row++)
-    {
-      int cumulativeTileWidth = 0;
-      for(int col=0; col < numTileColumns - 1; col++)
-      {
-        tiles[row * numTileColumns + col].setTileWidthInCtus( pps.getTileColumnWidth(col) );
-        cumulativeTileWidth += pps.getTileColumnWidth(col);
-      }
-      tiles[row * numTileColumns + numTileColumns - 1].setTileWidthInCtus( frameWidthInCtus-cumulativeTileWidth );
-    }
-
-    //set the height for each tile
-    for(int col=0; col < numTileColumns; col++)
-    {
-      int cumulativeTileHeight = 0;
-      for(int row=0; row < numTileRows - 1; row++)
-      {
-        tiles[row * numTileColumns + col].setTileHeightInCtus( pps.getTileRowHeight(row) );
-        cumulativeTileHeight += pps.getTileRowHeight(row);
-      }
-      tiles[(numTileRows - 1) * numTileColumns + col].setTileHeightInCtus( frameHeightInCtus-cumulativeTileHeight );
-    }
-  }
-
-  // Tile size check
-  int minWidth  = 1;
-  int minHeight = 1;
-#if !JVET_M0101_HLS
-  const int profileIdc = sps.getPTL()->getGeneralPTL()->getProfileIdc();
-#else
-  const int profileIdc = sps.getProfileTierLevel()->getProfileIdc();
-#endif
-  if (  profileIdc == Profile::MAIN || profileIdc == Profile::MAIN10)
-  {
-    if (pps.getTilesEnabledFlag())
-    {
-      minHeight = 64  / sps.getMaxCUHeight();
-      minWidth  = 256 / sps.getMaxCUWidth();
-    }
-  }
-  for(int row=0; row < numTileRows; row++)
-  {
-    for(int col=0; col < numTileColumns; col++)
-    {
-      const int tileIdx = row * numTileColumns + col;
-      if(tiles[tileIdx].getTileWidthInCtus() < minWidth)   { THROW("Invalid tile size"); }
-      if(tiles[tileIdx].getTileHeightInCtus() < minHeight) { THROW("Invalid tile size"); }
-    }
-  }
-
-  //initialize each tile of the current picture
-  for( int row=0; row < numTileRows; row++ )
-  {
-    for( int col=0; col < numTileColumns; col++ )
-    {
-      const int tileIdx = row * numTileColumns + col;
-
-      //initialize the RightEdgePosInCU for each tile
-      int rightEdgePosInCTU = 0;
-      for( int i=0; i <= col; i++ )
-      {
-        rightEdgePosInCTU += tiles[row * numTileColumns + i].getTileWidthInCtus();
-      }
-      tiles[tileIdx].setRightEdgePosInCtus(rightEdgePosInCTU-1);
-
-      //initialize the BottomEdgePosInCU for each tile
-      int bottomEdgePosInCTU = 0;
-      for( int i=0; i <= row; i++ )
-      {
-        bottomEdgePosInCTU += tiles[i * numTileColumns + col].getTileHeightInCtus();
-      }
-      tiles[tileIdx].setBottomEdgePosInCtus(bottomEdgePosInCTU-1);
-
-      //initialize the FirstCUAddr for each tile
-      tiles[tileIdx].setFirstCtuRsAddr( (tiles[tileIdx].getBottomEdgePosInCtus() - tiles[tileIdx].getTileHeightInCtus() + 1) * frameWidthInCtus +
-                                         tiles[tileIdx].getRightEdgePosInCtus()  - tiles[tileIdx].getTileWidthInCtus()  + 1);
-    }
-  }
-
-  int  columnIdx = 0;
-  int  rowIdx = 0;
-
-  //initialize the TileIdxMap
-  const uint32_t numCtusInFrame = pcv->sizeInCtus;
-  for( int i=0; i<numCtusInFrame; i++)
-  {
-    for( int col=0; col < numTileColumns; col++)
-    {
-      if(i % frameWidthInCtus <= tiles[col].getRightEdgePosInCtus())
-      {
-        columnIdx = col;
-        break;
-      }
-    }
-    for(int row=0; row < numTileRows; row++)
-    {
-      if(i / frameWidthInCtus <= tiles[row*numTileColumns].getBottomEdgePosInCtus())
-      {
-        rowIdx = row;
-        break;
-      }
-    }
-    tileIdxMap[i] = rowIdx * numTileColumns + columnIdx;
-  }
-}
-
-void TileMap::initCtuTsRsAddrMap()
-{
-  //generate the Coding Order Map and Inverse Coding Order Map
-  const uint32_t numCtusInFrame = pcv->sizeInCtus;
-  for(int ctuTsAddr=0, ctuRsAddr=0; ctuTsAddr<numCtusInFrame; ctuTsAddr++, ctuRsAddr = calculateNextCtuRSAddr(ctuRsAddr))
-  {
-    ctuTsToRsAddrMap[ctuTsAddr] = ctuRsAddr;
-    ctuRsToTsAddrMap[ctuRsAddr] = ctuTsAddr;
-  }
-  ctuTsToRsAddrMap[numCtusInFrame] = numCtusInFrame;
-  ctuRsToTsAddrMap[numCtusInFrame] = numCtusInFrame;
-}
-
-uint32_t TileMap::calculateNextCtuRSAddr( const uint32_t currCtuRsAddr ) const
-{
-  const uint32_t frameWidthInCtus = pcv->widthInCtus;
-  uint32_t  nextCtuRsAddr;
-
-  //get the tile index for the current CTU
-  const uint32_t uiTileIdx = getTileIdxMap(currCtuRsAddr);
 
-  //get the raster scan address for the next CTU
-  if( currCtuRsAddr % frameWidthInCtus == tiles[uiTileIdx].getRightEdgePosInCtus() && currCtuRsAddr / frameWidthInCtus == tiles[uiTileIdx].getBottomEdgePosInCtus() )
-  //the current CTU is the last CTU of the tile
-  {
-    if(uiTileIdx+1 == numTiles)
-    {
-      nextCtuRsAddr = pcv->sizeInCtus;
-    }
-    else
-    {
-      nextCtuRsAddr = tiles[uiTileIdx+1].getFirstCtuRsAddr();
-    }
-  }
-  else //the current CTU is not the last CTU of the tile
-  {
-    if( currCtuRsAddr % frameWidthInCtus == tiles[uiTileIdx].getRightEdgePosInCtus() )  //the current CTU is on the rightmost edge of the tile
-    {
-      nextCtuRsAddr = currCtuRsAddr + frameWidthInCtus - tiles[uiTileIdx].getTileWidthInCtus() + 1;
-    }
-    else
-    {
-      nextCtuRsAddr = currCtuRsAddr + 1;
-    }
-  }
-
-  return nextCtuRsAddr;
-}
-
-uint32_t TileMap::getSubstreamForCtuAddr(const uint32_t ctuAddr, const bool bAddressInRaster, Slice *pcSlice) const
-{
-  const bool bWPPEnabled = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag();
-  uint32_t subStrm;
-
-  if( (bWPPEnabled && pcv->heightInCtus > 1) || (numTiles > 1) ) // wavefronts, and possibly tiles being used.
-  {
-    const uint32_t ctuRsAddr = bAddressInRaster ? ctuAddr : getCtuTsToRsAddrMap(ctuAddr);
-    const uint32_t tileIndex = getTileIdxMap(ctuRsAddr);
-
-    if (bWPPEnabled)
-    {
-      const uint32_t firstCtuRsAddrOfTile     = tiles[tileIndex].getFirstCtuRsAddr();
-      const uint32_t tileYInCtus              = firstCtuRsAddrOfTile / pcv->widthInCtus;
-      const uint32_t ctuLine                  = ctuRsAddr / pcv->widthInCtus;
-      const uint32_t startingSubstreamForTile = (tileYInCtus * numTileColumns) + (tiles[tileIndex].getTileHeightInCtus() * (tileIndex % numTileColumns));
-
-      subStrm = startingSubstreamForTile + (ctuLine - tileYInCtus);
-    }
-    else
-    {
-      subStrm = tileIndex;
-    }
-  }
-  else
-  {
-    subStrm = 0;
-  }
-  return subStrm;
-}
-#endif
 
 Picture::Picture()
 {
-#if HEVC_TILES_WPP
-  tileMap              = nullptr;
-#endif
   cs                   = nullptr;
   m_bIsBorderExtended  = false;
   usedByCurr           = false;
@@ -728,20 +182,24 @@ Picture::Picture()
   layer                = std::numeric_limits<uint32_t>::max();
   fieldPic             = false;
   topField             = false;
+  precedingDRAP        = false;
   for( int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ )
   {
     m_prevQP[i] = -1;
   }
   m_spliceIdx = NULL;
   m_ctuNums = 0;
+  layerId = NOT_VALID;
 }
 
-void Picture::create(const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned _margin, const bool _decoder)
+void Picture::create( const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned _margin, const bool _decoder, const int _layerId )
 {
+  layerId = _layerId;
   UnitArea::operator=( UnitArea( _chromaFormat, Area( Position{ 0, 0 }, size ) ) );
-  margin            =  _margin;
+  margin            =  MAX_SCALING_RATIO*_margin;
   const Area a      = Area( Position(), size );
-  M_BUFS( 0, PIC_RECONSTRUCTION ).create( _chromaFormat, a, _maxCUSize, _margin, MEMORY_ALIGN_DEF_SIZE );
+  M_BUFS( 0, PIC_RECONSTRUCTION ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+  M_BUFS( 0, PIC_RECON_WRAP ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
 
   if( !_decoder )
   {
@@ -758,11 +216,7 @@ void Picture::create(const ChromaFormat &_chromaFormat, const Size &size, const
 void Picture::destroy()
 {
 #if ENABLE_SPLIT_PARALLELISM
-#if ENABLE_WPP_PARALLELISM
-  for( int jId = 0; jId < ( PARL_SPLIT_MAX_NUM_THREADS * PARL_WPP_MAX_NUM_THREADS ); jId++ )
-#else
   for( int jId = 0; jId < PARL_SPLIT_MAX_NUM_THREADS; jId++ )
-#endif
 #endif
   for (uint32_t t = 0; t < NUM_PIC_TYPES; t++)
   {
@@ -788,14 +242,6 @@ void Picture::destroy()
   }
   SEIs.clear();
 
-#if HEVC_TILES_WPP
-  if ( tileMap )
-  {
-    tileMap->destroy();
-    delete tileMap;
-    tileMap = nullptr;
-  }
-#endif
   if (m_spliceIdx)
   {
     delete[] m_spliceIdx;
@@ -868,16 +314,16 @@ const CPelBuf     Picture::getResiBuf(const CompArea &blk)  const { return getBu
        PelUnitBuf Picture::getResiBuf(const UnitArea &unit)       { return getBuf(unit, PIC_RESIDUAL); }
 const CPelUnitBuf Picture::getResiBuf(const UnitArea &unit) const { return getBuf(unit, PIC_RESIDUAL); }
 
-       PelBuf     Picture::getRecoBuf(const ComponentID compID)       { return getBuf(compID,                    PIC_RECONSTRUCTION); }
-const CPelBuf     Picture::getRecoBuf(const ComponentID compID) const { return getBuf(compID,                    PIC_RECONSTRUCTION); }
-       PelBuf     Picture::getRecoBuf(const CompArea &blk)            { return getBuf(blk,                       PIC_RECONSTRUCTION); }
-const CPelBuf     Picture::getRecoBuf(const CompArea &blk)      const { return getBuf(blk,                       PIC_RECONSTRUCTION); }
-       PelUnitBuf Picture::getRecoBuf(const UnitArea &unit)           { return getBuf(unit,                      PIC_RECONSTRUCTION); }
-const CPelUnitBuf Picture::getRecoBuf(const UnitArea &unit)     const { return getBuf(unit,                      PIC_RECONSTRUCTION); }
-       PelUnitBuf Picture::getRecoBuf()                               { return M_BUFS(scheduler.getSplitPicId(), PIC_RECONSTRUCTION); }
-const CPelUnitBuf Picture::getRecoBuf()                         const { return M_BUFS(scheduler.getSplitPicId(), PIC_RECONSTRUCTION); }
+       PelBuf     Picture::getRecoBuf(const ComponentID compID, bool wrap)       { return getBuf(compID,                    wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+const CPelBuf     Picture::getRecoBuf(const ComponentID compID, bool wrap) const { return getBuf(compID,                    wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+       PelBuf     Picture::getRecoBuf(const CompArea &blk, bool wrap)            { return getBuf(blk,                       wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+const CPelBuf     Picture::getRecoBuf(const CompArea &blk, bool wrap)      const { return getBuf(blk,                       wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+       PelUnitBuf Picture::getRecoBuf(const UnitArea &unit, bool wrap)           { return getBuf(unit,                      wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+const CPelUnitBuf Picture::getRecoBuf(const UnitArea &unit, bool wrap)     const { return getBuf(unit,                      wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+       PelUnitBuf Picture::getRecoBuf(bool wrap)                                 { return M_BUFS(scheduler.getSplitPicId(), wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
+const CPelUnitBuf Picture::getRecoBuf(bool wrap)                           const { return M_BUFS(scheduler.getSplitPicId(), wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
 
-void Picture::finalInit(const SPS& sps, const PPS& pps, APS& aps)
+void Picture::finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHeader *picHeader, APS** alfApss, APS* lmcsAps, APS* scalingListAps )
 {
   for( auto &sei : SEIs )
   {
@@ -886,18 +332,10 @@ void Picture::finalInit(const SPS& sps, const PPS& pps, APS& aps)
   SEIs.clear();
   clearSliceBuffer();
 
-#if HEVC_TILES_WPP
-  if( tileMap )
-  {
-    tileMap->destroy();
-    delete tileMap;
-    tileMap = nullptr;
-  }
-#endif
 
   const ChromaFormat chromaFormatIDC = sps.getChromaFormatIdc();
-  const int          iWidth = sps.getPicWidthInLumaSamples();
-  const int          iHeight = sps.getPicHeightInLumaSamples();
+  const int          iWidth = pps.getPicWidthInLumaSamples();
+  const int          iHeight = pps.getPicHeightInLumaSamples();
 
   if( cs )
   {
@@ -907,22 +345,23 @@ void Picture::finalInit(const SPS& sps, const PPS& pps, APS& aps)
   {
     cs = new CodingStructure( g_globalUnitCache.cuCache, g_globalUnitCache.puCache, g_globalUnitCache.tuCache );
     cs->sps = &sps;
-    cs->create( chromaFormatIDC, Area( 0, 0, iWidth, iHeight ), true );
+    cs->create(chromaFormatIDC, Area(0, 0, iWidth, iHeight), true, (bool)sps.getPLTMode());
   }
 
+  cs->vps = vps;
   cs->picture = this;
   cs->slice   = nullptr;  // the slices for this picture have not been set at this point. update cs->slice after swapSliceObject()
   cs->pps     = &pps;
-  cs->aps = &aps;
-#if HEVC_VPS
-  cs->vps     = nullptr;
-#endif
+  picHeader->setSPSId( sps.getSPSId() );
+  picHeader->setPPSId( pps.getPPSId() );
+  cs->picHeader = picHeader;
+  memcpy(cs->alfApss, alfApss, sizeof(cs->alfApss));
+  cs->lmcsAps = lmcsAps;
+  cs->scalinglistAps = scalingListAps;
   cs->pcv     = pps.pcv;
+  m_conformanceWindow = pps.getConformanceWindow();
+  m_scalingWindow = pps.getScalingWindow();
 
-#if HEVC_TILES_WPP
-  tileMap = new TileMap;
-  tileMap->create( sps, pps );
-#endif
   if (m_spliceIdx == NULL)
   {
     m_ctuNums = cs->pcv->sizeInCtus;
@@ -935,10 +374,12 @@ void Picture::allocateNewSlice()
 {
   slices.push_back(new Slice);
   Slice& slice = *slices.back();
+  memcpy(slice.getAlfAPSs(), cs->alfApss, sizeof(cs->alfApss));
+
 
-  slice.setAPS(cs->aps);
   slice.setPPS( cs->pps);
   slice.setSPS( cs->sps);
+  slice.setVPS( cs->vps);
   if(slices.size()>=2)
   {
     slice.copySliceInfo( slices[slices.size()-2] );
@@ -950,13 +391,17 @@ Slice *Picture::swapSliceObject(Slice * p, uint32_t i)
 {
   p->setSPS(cs->sps);
   p->setPPS(cs->pps);
-  p->setAPS(cs->aps);
+  p->setVPS(cs->vps);
+  p->setAlfAPSs(cs->alfApss);
+
 
   Slice * pTmp = slices[i];
   slices[i] = p;
   pTmp->setSPS(0);
   pTmp->setPPS(0);
-  pTmp->setAPS(0);
+  pTmp->setVPS(0);
+  memset(pTmp->getAlfAPSs(), 0, sizeof(*pTmp->getAlfAPSs())*ALF_CTB_MAX_NUM_APS);
+
   return pTmp;
 }
 
@@ -986,22 +431,308 @@ void Picture::finishParallelPart( const UnitArea& area )
   }
 }
 
-#if ENABLE_WPP_PARALLELISM
-void Picture::finishCtuPart( const UnitArea& ctuArea )
+
+#endif
+
+const TFilterCoeff DownsamplingFilterSRC[8][16][12] =
+{
+    { // D = 1
+      {   0,   0,   0,   0,   0, 128,   0,   0,   0,   0,   0,   0 },
+      {   0,   0,   0,   2,  -6, 127,   7,  -2,   0,   0,   0,   0 },
+      {   0,   0,   0,   3, -12, 125,  16,  -5,   1,   0,   0,   0 },
+      {   0,   0,   0,   4, -16, 120,  26,  -7,   1,   0,   0,   0 },
+      {   0,   0,   0,   5, -18, 114,  36, -10,   1,   0,   0,   0 },
+      {   0,   0,   0,   5, -20, 107,  46, -12,   2,   0,   0,   0 },
+      {   0,   0,   0,   5, -21,  99,  57, -15,   3,   0,   0,   0 },
+      {   0,   0,   0,   5, -20,  89,  68, -18,   4,   0,   0,   0 },
+      {   0,   0,   0,   4, -19,  79,  79, -19,   4,   0,   0,   0 },
+      {   0,   0,   0,   4, -18,  68,  89, -20,   5,   0,   0,   0 },
+      {   0,   0,   0,   3, -15,  57,  99, -21,   5,   0,   0,   0 },
+      {   0,   0,   0,   2, -12,  46, 107, -20,   5,   0,   0,   0 },
+      {   0,   0,   0,   1, -10,  36, 114, -18,   5,   0,   0,   0 },
+      {   0,   0,   0,   1,  -7,  26, 120, -16,   4,   0,   0,   0 },
+      {   0,   0,   0,   1,  -5,  16, 125, -12,   3,   0,   0,   0 },
+      {   0,   0,   0,   0,  -2,   7, 127,  -6,   2,   0,   0,   0 }
+    },
+    { // D = 1.5
+      {   0,   2,   0, -14,  33,  86,  33, -14,   0,   2,   0,   0 },
+      {   0,   1,   1, -14,  29,  85,  38, -13,  -1,   2,   0,   0 },
+      {   0,   1,   2, -14,  24,  84,  43, -12,  -2,   2,   0,   0 },
+      {   0,   1,   2, -13,  19,  83,  48, -11,  -3,   2,   0,   0 },
+      {   0,   0,   3, -13,  15,  81,  53, -10,  -4,   3,   0,   0 },
+      {   0,   0,   3, -12,  11,  79,  57,  -8,  -5,   3,   0,   0 },
+      {   0,   0,   3, -11,   7,  76,  62,  -5,  -7,   3,   0,   0 },
+      {   0,   0,   3, -10,   3,  73,  65,  -2,  -7,   3,   0,   0 },
+      {   0,   0,   3,  -9,   0,  70,  70,   0,  -9,   3,   0,   0 },
+      {   0,   0,   3,  -7,  -2,  65,  73,   3, -10,   3,   0,   0 },
+      {   0,   0,   3,  -7,  -5,  62,  76,   7, -11,   3,   0,   0 },
+      {   0,   0,   3,  -5,  -8,  57,  79,  11, -12,   3,   0,   0 },
+      {   0,   0,   3,  -4, -10,  53,  81,  15, -13,   3,   0,   0 },
+      {   0,   0,   2,  -3, -11,  48,  83,  19, -13,   2,   1,   0 },
+      {   0,   0,   2,  -2, -12,  43,  84,  24, -14,   2,   1,   0 },
+      {   0,   0,   2,  -1, -13,  38,  85,  29, -14,   1,   1,   0 }
+    },
+    { // D = 2
+      {   0,   5,   -6,  -10,  37,  76,   37,  -10,  -6,    5,  0,   0}, //0
+      {   0,   5,   -4,  -11,  33,  76,   40,  -9,    -7,    5,  0,   0}, //1
+      //{   0,   5,   -3,  -12,  28,  75,   44,  -7,    -8,    5,  1,   0}, //2
+      {  -1,   5,   -3,  -12,  29,  75,   45,  -7,    -8,   5,  0,   0}, //2 new coefficients in m24499
+      {  -1,   4,   -2,  -13,  25,  75,   48,  -5,    -9,    5,  1,   0}, //3
+      {  -1,   4,   -1,  -13,  22,  73,   52,  -3,    -10,  4,  1,   0}, //4
+      {  -1,   4,   0,    -13,  18,  72,   55,  -1,    -11,  4,  2,  -1}, //5
+      {  -1,   4,   1,    -13,  14,  70,   59,  2,    -12,  3,  2,  -1}, //6
+      {  -1,   3,   1,    -13,  11,  68,   62,  5,    -12,  3,  2,  -1}, //7
+      {  -1,   3,   2,    -13,  8,  65,   65,  8,    -13,  2,  3,  -1}, //8
+      {  -1,   2,   3,    -12,  5,  62,   68,  11,    -13,  1,  3,  -1}, //9
+      {  -1,   2,   3,    -12,  2,  59,   70,  14,    -13,  1,  4,  -1}, //10
+      {  -1,   2,   4,    -11,  -1,  55,   72,  18,    -13,  0,  4,  -1}, //11
+      {   0,   1,   4,    -10,  -3,  52,   73,  22,    -13,  -1,  4,  -1}, //12
+      {   0,   1,   5,    -9,    -5,  48,   75,  25,    -13,  -2,  4,  -1}, //13
+      //{   0,   1,   5,    -8,    -7,  44,   75,  28,    -12,  -3,  5,   0}, //14
+      {    0,   0,   5,    -8,   -7,  45,   75,  29,    -12,  -3,  5,  -1}  , //14 new coefficients in m24499
+      {   0,   0,   5,    -7,    -9,  40,   76,  33,    -11,  -4,  5,   0}, //15
+    },
+    { // D = 2.5
+      {   2,  -3,   -9,  6,   39,  58,   39,  6,   -9,  -3,    2,    0}, // 0
+      {   2,  -3,   -9,  4,   38,  58,   43,  7,   -9,  -4,    1,    0}, // 1
+      {   2,  -2,   -9,  2,   35,  58,   44,  9,   -8,  -4,    1,    0}, // 2
+      {   1,  -2,   -9,  1,   34,  58,   46,  11,   -8,  -5,    1,    0}, // 3
+      //{   1,  -1,   -8,  -1,   31,  57,   48,  13,   -8,  -5,    1,    0}, // 4
+      {   1,  -1,   -8,  -1,   31,  57,   47,  13,   -7,  -5,    1,    0},  // 4 new coefficients in m24499
+      {   1,  -1,   -8,  -2,   29,  56,   49,  15,   -7,  -6,    1,    1}, // 5
+      {   1,  0,   -8,  -3,   26,  55,   51,  17,   -7,  -6,    1,    1}, // 6
+      {   1,  0,   -7,  -4,   24,  54,   52,  19,   -6,  -7,    1,    1}, // 7
+      {   1,  0,   -7,  -5,   22,  53,   53,  22,   -5,  -7,    0,    1}, // 8
+      {   1,  1,   -7,  -6,   19,  52,   54,  24,   -4,  -7,    0,    1}, // 9
+      {   1,  1,   -6,  -7,   17,  51,   55,  26,   -3,  -8,    0,    1}, // 10
+      {   1,  1,   -6,  -7,   15,  49,   56,  29,   -2,  -8,    -1,    1}, // 11
+      //{   0,  1,   -5,  -8,   13,  48,   57,  31,   -1,  -8,    -1,    1}, // 12 new coefficients in m24499
+      {   0,  1,   -5,  -7,   13,  47,  57,  31,  -1,    -8,   -1,    1}, // 12
+      {   0,  1,   -5,  -8,   11,  46,   58,  34,   1,    -9,    -2,    1}, // 13
+      {   0,  1,   -4,  -8,   9,    44,   58,  35,   2,    -9,    -2,    2}, // 14
+      {   0,  1,   -4,  -9,   7,    43,   58,  38,   4,    -9,    -3,    2}, // 15
+    },
+    { // D = 3
+      {  -2,  -7,   0,  17,  35,  43,  35,  17,   0,  -7,  -5,   2 },
+      {  -2,  -7,  -1,  16,  34,  43,  36,  18,   1,  -7,  -5,   2 },
+      {  -1,  -7,  -1,  14,  33,  43,  36,  19,   1,  -6,  -5,   2 },
+      {  -1,  -7,  -2,  13,  32,  42,  37,  20,   3,  -6,  -5,   2 },
+      {   0,  -7,  -3,  12,  31,  42,  38,  21,   3,  -6,  -5,   2 },
+      {   0,  -7,  -3,  11,  30,  42,  39,  23,   4,  -6,  -6,   1 },
+      {   0,  -7,  -4,  10,  29,  42,  40,  24,   5,  -6,  -6,   1 },
+      {   1,  -7,  -4,   9,  27,  41,  40,  25,   6,  -5,  -6,   1 },
+      {   1,  -6,  -5,   7,  26,  41,  41,  26,   7,  -5,  -6,   1 },
+      {   1,  -6,  -5,   6,  25,  40,  41,  27,   9,  -4,  -7,   1 },
+      {   1,  -6,  -6,   5,  24,  40,  42,  29,  10,  -4,  -7,   0 },
+      {   1,  -6,  -6,   4,  23,  39,  42,  30,  11,  -3,  -7,   0 },
+      {   2,  -5,  -6,   3,  21,  38,  42,  31,  12,  -3,  -7,   0 },
+      {   2,  -5,  -6,   3,  20,  37,  42,  32,  13,  -2,  -7,  -1 },
+      {   2,  -5,  -6,   1,  19,  36,  43,  33,  14,  -1,  -7,  -1 },
+      {   2,  -5,  -7,   1,  18,  36,  43,  34,  16,  -1,  -7,  -2 }
+    },
+    { // D = 3.5
+      {  -6,  -3,   5,  19,  31,  36,  31,  19,   5,  -3,  -6,   0 },
+      {  -6,  -4,   4,  18,  31,  37,  32,  20,   6,  -3,  -6,  -1 },
+      {  -6,  -4,   4,  17,  30,  36,  33,  21,   7,  -3,  -6,  -1 },
+      {  -5,  -5,   3,  16,  30,  36,  33,  22,   8,  -2,  -6,  -2 },
+      {  -5,  -5,   2,  15,  29,  36,  34,  23,   9,  -2,  -6,  -2 },
+      {  -5,  -5,   2,  15,  28,  36,  34,  24,  10,  -2,  -6,  -3 },
+      {  -4,  -5,   1,  14,  27,  36,  35,  24,  10,  -1,  -6,  -3 },
+      {  -4,  -5,   0,  13,  26,  35,  35,  25,  11,   0,  -5,  -3 },
+      {  -4,  -6,   0,  12,  26,  36,  36,  26,  12,   0,  -6,  -4 },
+      {  -3,  -5,   0,  11,  25,  35,  35,  26,  13,   0,  -5,  -4 },
+      {  -3,  -6,  -1,  10,  24,  35,  36,  27,  14,   1,  -5,  -4 },
+      {  -3,  -6,  -2,  10,  24,  34,  36,  28,  15,   2,  -5,  -5 },
+      {  -2,  -6,  -2,   9,  23,  34,  36,  29,  15,   2,  -5,  -5 },
+      {  -2,  -6,  -2,   8,  22,  33,  36,  30,  16,   3,  -5,  -5 },
+      {  -1,  -6,  -3,   7,  21,  33,  36,  30,  17,   4,  -4,  -6 },
+      {  -1,  -6,  -3,   6,  20,  32,  37,  31,  18,   4,  -4,  -6 }
+    },
+    { // D = 4
+      {  -9,   0,   9,  20,  28,  32,  28,  20,   9,   0,  -9,   0 },
+      {  -9,   0,   8,  19,  28,  32,  29,  20,  10,   0,  -4,  -5 },
+      {  -9,  -1,   8,  18,  28,  32,  29,  21,  10,   1,  -4,  -5 },
+      {  -9,  -1,   7,  18,  27,  32,  30,  22,  11,   1,  -4,  -6 },
+      {  -8,  -2,   6,  17,  27,  32,  30,  22,  12,   2,  -4,  -6 },
+      {  -8,  -2,   6,  16,  26,  32,  31,  23,  12,   2,  -4,  -6 },
+      {  -8,  -2,   5,  16,  26,  31,  31,  23,  13,   3,  -3,  -7 },
+      {  -8,  -3,   5,  15,  25,  31,  31,  24,  14,   4,  -3,  -7 },
+      {  -7,  -3,   4,  14,  25,  31,  31,  25,  14,   4,  -3,  -7 },
+      {  -7,  -3,   4,  14,  24,  31,  31,  25,  15,   5,  -3,  -8 },
+      {  -7,  -3,   3,  13,  23,  31,  31,  26,  16,   5,  -2,  -8 },
+      {  -6,  -4,   2,  12,  23,  31,  32,  26,  16,   6,  -2,  -8 },
+      {  -6,  -4,   2,  12,  22,  30,  32,  27,  17,   6,  -2,  -8 },
+      {  -6,  -4,   1,  11,  22,  30,  32,  27,  18,   7,  -1,  -9 },
+      {  -5,  -4,   1,  10,  21,  29,  32,  28,  18,   8,  -1,  -9 },
+      {  -5,  -4,   0,  10,  20,  29,  32,  28,  19,   8,   0,  -9 }
+    },
+    { // D = 5.5
+      {  -8,   7,  13,  18,  22,  24,  22,  18,  13,   7,   2, -10 },
+      {  -8,   7,  13,  18,  22,  23,  22,  19,  13,   7,   2, -10 },
+      {  -8,   6,  12,  18,  22,  23,  22,  19,  14,   8,   2, -10 },
+      {  -9,   6,  12,  17,  22,  23,  23,  19,  14,   8,   3, -10 },
+      {  -9,   6,  12,  17,  21,  23,  23,  19,  14,   9,   3, -10 },
+      {  -9,   5,  11,  17,  21,  23,  23,  20,  15,   9,   3, -10 },
+      {  -9,   5,  11,  16,  21,  23,  23,  20,  15,   9,   4, -10 },
+      {  -9,   5,  10,  16,  21,  23,  23,  20,  15,  10,   4, -10 },
+      { -10,   5,  10,  16,  20,  23,  23,  20,  16,  10,   5, -10 },
+      { -10,   4,  10,  15,  20,  23,  23,  21,  16,  10,   5,  -9 },
+      { -10,   4,   9,  15,  20,  23,  23,  21,  16,  11,   5,  -9 },
+      { -10,   3,   9,  15,  20,  23,  23,  21,  17,  11,   5,  -9 },
+      { -10,   3,   9,  14,  19,  23,  23,  21,  17,  12,   6,  -9 },
+      { -10,   3,   8,  14,  19,  23,  23,  22,  17,  12,   6,  -9 },
+      { -10,   2,   8,  14,  19,  22,  23,  22,  18,  12,   6,  -8 },
+      { -10,   2,   7,  13,  19,  22,  23,  22,  18,  13,   7,  -8 }
+    }
+};
+
+void Picture::sampleRateConv( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
+                              const CPelBuf& beforeScale, const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
+                              const PelBuf& afterScale, const int afterScaleLeftOffset, const int afterScaleTopOffset,
+                              const int bitDepth, const bool useLumaFilter, const bool downsampling,
+                              const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag )
 {
-  const UnitArea clipdArea = clipArea( ctuArea, *this );
-  const int      sourceID  = scheduler.getSplitPicId( 0 );
-  // distribute the reconstruction across all of the parallel workers
-  for( int dataId = 0; dataId < scheduler.getNumPicInstances(); dataId++ )
+  const Pel* orgSrc = beforeScale.buf;
+  const int orgWidth = beforeScale.width;
+  const int orgHeight = beforeScale.height;
+  const int orgStride = beforeScale.stride;
+
+  Pel* scaledSrc = afterScale.buf;
+  const int scaledWidth = afterScale.width;
+  const int scaledHeight = afterScale.height;
+  const int scaledStride = afterScale.stride;
+
+  if( orgWidth == scaledWidth && orgHeight == scaledHeight && scalingRatio == SCALE_1X && !beforeScaleLeftOffset && !beforeScaleTopOffset && !afterScaleLeftOffset && !afterScaleTopOffset )
   {
-    if( dataId == sourceID ) continue;
+    for( int j = 0; j < orgHeight; j++ )
+    {
+      memcpy( scaledSrc + j * scaledStride, orgSrc + j * orgStride, sizeof( Pel ) * orgWidth );
+    }
+
+    return;
+  }
+
+  const TFilterCoeff* filterHor = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
+  const TFilterCoeff* filterVer = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
+  const int numFracPositions = useLumaFilter ? 15 : 31;
+  const int numFracShift = useLumaFilter ? 4 : 5;
+  const int posShiftX = SCALE_RATIO_BITS - numFracShift + compScale.first;
+  const int posShiftY = SCALE_RATIO_BITS - numFracShift + compScale.second;
+  int addX = ( 1 << ( posShiftX - 1 ) ) + ( beforeScaleLeftOffset << SCALE_RATIO_BITS ) + ( ( int( 1 - horCollocatedPositionFlag ) * 8 * ( scalingRatio.first - SCALE_1X.first ) + ( 1 << ( 2 + compScale.first ) ) ) >> ( 3 + compScale.first ) );
+  int addY = ( 1 << ( posShiftY - 1 ) ) + ( beforeScaleTopOffset << SCALE_RATIO_BITS ) + ( ( int( 1 - verCollocatedPositionFlag ) * 8 * ( scalingRatio.second - SCALE_1X.second ) + ( 1 << ( 2 + compScale.second ) ) ) >> ( 3 + compScale.second ) );
+
+  if( downsampling )
+  {
+    int verFilter = 0;
+    int horFilter = 0;
+
+    if( scalingRatio.first > ( 15 << SCALE_RATIO_BITS ) / 4 )   horFilter = 7;
+    else if( scalingRatio.first > ( 20 << SCALE_RATIO_BITS ) / 7 )   horFilter = 6;
+    else if( scalingRatio.first > ( 5 << SCALE_RATIO_BITS ) / 2 )   horFilter = 5;
+    else if( scalingRatio.first > ( 2 << SCALE_RATIO_BITS ) )   horFilter = 4;
+    else if( scalingRatio.first > ( 5 << SCALE_RATIO_BITS ) / 3 )   horFilter = 3;
+    else if( scalingRatio.first > ( 5 << SCALE_RATIO_BITS ) / 4 )   horFilter = 2;
+    else if( scalingRatio.first > ( 20 << SCALE_RATIO_BITS ) / 19 )   horFilter = 1;
+
+    if( scalingRatio.second > ( 15 << SCALE_RATIO_BITS ) / 4 )   verFilter = 7;
+    else if( scalingRatio.second > ( 20 << SCALE_RATIO_BITS ) / 7 )   verFilter = 6;
+    else if( scalingRatio.second > ( 5 << SCALE_RATIO_BITS ) / 2 )   verFilter = 5;
+    else if( scalingRatio.second > ( 2 << SCALE_RATIO_BITS ) )   verFilter = 4;
+    else if( scalingRatio.second > ( 5 << SCALE_RATIO_BITS ) / 3 )   verFilter = 3;
+    else if( scalingRatio.second > ( 5 << SCALE_RATIO_BITS ) / 4 )   verFilter = 2;
+    else if( scalingRatio.second > ( 20 << SCALE_RATIO_BITS ) / 19 )   verFilter = 1;
+
+    filterHor = &DownsamplingFilterSRC[horFilter][0][0];
+    filterVer = &DownsamplingFilterSRC[verFilter][0][0];
+  }
+
+  const int filterLength = downsampling ? 12 : ( useLumaFilter ? NTAPS_LUMA : NTAPS_CHROMA );
+  const int log2Norm = downsampling ? 14 : 12;
+
+  int *buf = new int[orgHeight * scaledWidth];
+  int maxVal = ( 1 << bitDepth ) - 1;
+
+  CHECK( bitDepth > 17, "Overflow may happen!" );
+
+  for( int i = 0; i < scaledWidth; i++ )
+  {
+    const Pel* org = orgSrc;
+    int refPos = ( ( ( i << compScale.first ) - afterScaleLeftOffset ) * scalingRatio.first + addX ) >> posShiftX;
+    int integer = refPos >> numFracShift;
+    int frac = refPos & numFracPositions;
+    int* tmp = buf + i;
+
+    for( int j = 0; j < orgHeight; j++ )
+    {
+      int sum = 0;
+      const TFilterCoeff* f = filterHor + frac * filterLength;
+
+      for( int k = 0; k < filterLength; k++ )
+      {
+        int xInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgWidth - 1 );
+        sum += f[k] * org[xInt]; // postpone horizontal filtering gain removal after vertical filtering
+      }
+
+      *tmp = sum;
+
+      tmp += scaledWidth;
+      org += orgStride;
+    }
+  }
+
+  Pel* dst = scaledSrc;
+
+  for( int j = 0; j < scaledHeight; j++ )
+  {
+    int refPos = ( ( ( j << compScale.second ) - afterScaleTopOffset ) * scalingRatio.second + addY ) >> posShiftY;
+    int integer = refPos >> numFracShift;
+    int frac = refPos & numFracPositions;
+
+    for( int i = 0; i < scaledWidth; i++ )
+    {
+      int sum = 0;
+      int* tmp = buf + i;
+      const TFilterCoeff* f = filterVer + frac * filterLength;
+
+      for( int k = 0; k < filterLength; k++ )
+      {
+        int yInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgHeight - 1 );
+        sum += f[k] * tmp[yInt*scaledWidth];
+      }
+
+      dst[i] = std::min<int>( std::max( 0, ( sum + ( 1 << ( log2Norm - 1 ) ) ) >> log2Norm ), maxVal );
+    }
 
-    M_BUFS( dataId, PIC_RECONSTRUCTION ).subBuf( clipdArea ).copyFrom( M_BUFS( sourceID, PIC_RECONSTRUCTION ).subBuf( clipdArea ) );
+    dst += scaledStride;
   }
+
+  delete[] buf;
 }
-#endif
 
-#endif
+void Picture::rescalePicture( const std::pair<int, int> scalingRatio,
+                              const CPelUnitBuf& beforeScaling, const Window& scalingWindowBefore,
+                              const PelUnitBuf& afterScaling, const Window& scalingWindowAfter,
+                              const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool useLumaFilter, const bool downsampling,
+                              const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag )
+{
+  for( int comp = 0; comp < ::getNumberValidComponents( chromaFormatIDC ); comp++ )
+  {
+    ComponentID compID = ComponentID( comp );
+    const CPelBuf& beforeScale = beforeScaling.get( compID );
+    const PelBuf& afterScale = afterScaling.get( compID );
+
+    sampleRateConv( scalingRatio, std::pair<int, int>( ::getComponentScaleX( compID, chromaFormatIDC ), ::getComponentScaleY( compID, chromaFormatIDC ) ),
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+                    beforeScale, scalingWindowBefore.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), scalingWindowBefore.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), 
+                    afterScale, scalingWindowAfter.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ), scalingWindowAfter.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ), 
+#else
+                    beforeScale, scalingWindowBefore.getWindowLeftOffset(), scalingWindowBefore.getWindowTopOffset(), 
+                    afterScale, scalingWindowAfter.getWindowLeftOffset(), scalingWindowAfter.getWindowTopOffset(), 
+#endif              
+                    bitDepths.recon[comp], downsampling || useLumaFilter ? true : isLuma( compID ), downsampling,
+                    isLuma( compID ) ? 1 : horCollocatedChromaFlag, isLuma( compID ) ? 1 : verCollocatedChromaFlag );
+  }
+}
 
 void Picture::extendPicBorder()
 {
@@ -1020,21 +751,6 @@ void Picture::extendPicBorder()
 
     Pel*  pi = piTxt;
     // do left and right margins
-    if (cs->sps->getWrapAroundEnabledFlag())
-    {
-      int xoffset = cs->sps->getWrapAroundOffset() >> getComponentScaleX( compID, cs->area.chromaFormat );
-      for (int y = 0; y < p.height; y++)
-      {
-        for (int x = 0; x < xmargin; x++ )
-        {
-          pi[ -x - 1       ] = pi[ -x - 1       + xoffset ];
-          pi[  p.width + x ] = pi[  p.width + x - xoffset ];
-        }
-        pi += p.stride;
-      }
-    }
-    else
-    {
       for (int y = 0; y < p.height; y++)
       {
         for (int x = 0; x < xmargin; x++ )
@@ -1044,7 +760,6 @@ void Picture::extendPicBorder()
         }
         pi += p.stride;
       }
-    }
 
     // pi is now the (0,height) (bottom left of image within bigger picture
     pi -= (p.stride + xmargin);
@@ -1061,6 +776,43 @@ void Picture::extendPicBorder()
     {
       ::memcpy( pi - (y+1)*p.stride, pi, sizeof(Pel)*(p.width + (xmargin<<1)) );
     }
+
+    // reference picture with horizontal wrapped boundary
+    if (cs->sps->getWrapAroundEnabledFlag())
+    {
+      p = M_BUFS( 0, PIC_RECON_WRAP ).get( compID );
+      p.copyFrom(M_BUFS( 0, PIC_RECONSTRUCTION ).get( compID ));
+      piTxt = p.bufAt(0,0);
+      pi = piTxt;
+      int xoffset = cs->sps->getWrapAroundOffset() >> getComponentScaleX( compID, cs->area.chromaFormat );
+      for (int y = 0; y < p.height; y++)
+      {
+        for (int x = 0; x < xmargin; x++ )
+        {
+          if( x < xoffset )
+          {
+            pi[ -x - 1 ] = pi[ -x - 1 + xoffset ];
+            pi[  p.width + x ] = pi[ p.width + x - xoffset ];
+          }
+          else
+          {
+            pi[ -x - 1 ] = pi[ 0 ];
+            pi[  p.width + x ] = pi[ p.width - 1 ];
+          }
+        }
+        pi += p.stride;
+      }
+      pi -= (p.stride + xmargin);
+      for (int y = 0; y < ymargin; y++ )
+      {
+        ::memcpy( pi + (y+1)*p.stride, pi, sizeof(Pel)*(p.width + (xmargin << 1)));
+      }
+      pi -= ((p.height-1) * p.stride);
+      for (int y = 0; y < ymargin; y++ )
+      {
+        ::memcpy( pi - (y+1)*p.stride, pi, sizeof(Pel)*(p.width + (xmargin<<1)) );
+      }
+    }
   }
 
   m_bIsBorderExtended = true;
@@ -1068,12 +820,12 @@ void Picture::extendPicBorder()
 
 PelBuf Picture::getBuf( const ComponentID compID, const PictureType &type )
 {
-  return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID );
+  return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID );
 }
 
 const CPelBuf Picture::getBuf( const ComponentID compID, const PictureType &type ) const
 {
-  return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID );
+  return M_BUFS( ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId(), type ).getBuf( compID );
 }
 
 PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type )
@@ -1084,8 +836,7 @@ PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type )
   }
 
 #if ENABLE_SPLIT_PARALLELISM
-  const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId();
-
+  const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId();
 #endif
 #if !KEEP_PRED_AND_RESI_SIGNALS
   if( type == PIC_RESIDUAL || type == PIC_PREDICTION )
@@ -1181,8 +932,8 @@ bool Picture::getSpliceFull()
 
 void Picture::addPictureToHashMapForInter()
 {
-  int picWidth = slices[0]->getSPS()->getPicWidthInLumaSamples();
-  int picHeight = slices[0]->getSPS()->getPicHeightInLumaSamples();
+  int picWidth = slices[0]->getPPS()->getPicWidthInLumaSamples();
+  int picHeight = slices[0]->getPPS()->getPicHeightInLumaSamples();
   uint32_t* blockHashValues[2][2];
   bool* bIsBlockSame[2][3];
 
@@ -1198,18 +949,11 @@ void Picture::addPictureToHashMapForInter()
       bIsBlockSame[i][j] = new bool[picWidth*picHeight];
     }
   }
-
-  m_hashMap.create();
+  m_hashMap.create(picWidth, picHeight);
   m_hashMap.generateBlock2x2HashValue(getOrigBuf(), picWidth, picHeight, slices[0]->getSPS()->getBitDepths(), blockHashValues[0], bIsBlockSame[0]);//2x2
   m_hashMap.generateBlockHashValue(picWidth, picHeight, 4, 4, blockHashValues[0], blockHashValues[1], bIsBlockSame[0], bIsBlockSame[1]);//4x4
   m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[1], bIsBlockSame[1][2], picWidth, picHeight, 4, 4);
 
-  m_hashMap.generateRectangleHashValue(picWidth, picHeight, 8, 4, blockHashValues[1], blockHashValues[0], bIsBlockSame[1], bIsBlockSame[0]);//8x4
-  m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[0], bIsBlockSame[0][2], picWidth, picHeight, 8, 4);
-
-  m_hashMap.generateRectangleHashValue(picWidth, picHeight, 4, 8, blockHashValues[1], blockHashValues[0], bIsBlockSame[1], bIsBlockSame[0]);//4x8
-  m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[0], bIsBlockSame[0][2], picWidth, picHeight, 4, 8);
-
   m_hashMap.generateBlockHashValue(picWidth, picHeight, 8, 8, blockHashValues[1], blockHashValues[0], bIsBlockSame[1], bIsBlockSame[0]);//8x8
   m_hashMap.addToHashMapByRowWithPrecalData(blockHashValues[0], bIsBlockSame[0][2], picWidth, picHeight, 8, 8);
 
diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h
index dd7ab22326f80a9f7a3776ea526542121a477b70..1c259541cde70d2bdf5b85ed760b00cb80b6dc97 100644
--- a/source/Lib/CommonLib/Picture.h
+++ b/source/Lib/CommonLib/Picture.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -50,11 +50,7 @@
 #include "MCTS.h"
 #include <deque>
 
-#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
-#if ENABLE_WPP_PARALLELISM
-#include <mutex>
-class SyncObj;
-#endif
+#if ENABLE_SPLIT_PARALLELISM
 
 #define CURR_THREAD_ID -1
 
@@ -73,35 +69,10 @@ public:
   void     finishParallel();
   void     setSplitThreadId( const int tId = CURR_THREAD_ID );
   unsigned getNumSplitThreads() const { return m_numSplitThreads; };
-#endif
-#if ENABLE_WPP_PARALLELISM
-  unsigned getWppDataId  ( int lId = CURR_THREAD_ID ) const;
-  unsigned getWppThreadId() const;
-  void     setWppThreadId( const int tId = CURR_THREAD_ID );
 #endif
   unsigned getDataId     () const;
   bool init              ( const int ctuYsize, const int ctuXsize, const int numWppThreadsRunning, const int numWppExtraLines, const int numSplitThreads );
   int  getNumPicInstances() const;
-#if ENABLE_WPP_PARALLELISM
-  void setReady          ( const int ctuPosX, const int ctuPosY );
-  void wait              ( const int ctuPosX, const int ctuPosY );
-
-private:
-  bool getNextCtu( Position& pos, int ctuLine, int offset );
-
-private:
-  int m_firstNonFinishedLine;
-  int m_numWppThreads;
-  int m_numWppThreadsRunning;
-  int m_numWppDataInstances;
-  int m_ctuYsize;
-  int m_ctuXsize;
-
-  std::vector<int>         m_LineDone;
-  std::vector<bool>        m_LineProc;
-  std::mutex               m_mutex;
-  std::vector<SyncObj*>    m_SyncObjs;
-#endif
 #if ENABLE_SPLIT_PARALLELISM
 
   int   m_numSplitThreads;
@@ -115,60 +86,7 @@ class AQpLayer;
 
 typedef std::list<SEI*> SEIMessages;
 
-#if HEVC_TILES_WPP
-class Tile
-{
-private:
-  uint32_t      m_tileWidthInCtus;
-  uint32_t      m_tileHeightInCtus;
-  uint32_t      m_rightEdgePosInCtus;
-  uint32_t      m_bottomEdgePosInCtus;
-  uint32_t      m_firstCtuRsAddr;
-
-public:
-  Tile();
-  virtual ~Tile();
-
-  void      setTileWidthInCtus     ( uint32_t i )            { m_tileWidthInCtus = i; }
-  uint32_t      getTileWidthInCtus     () const              { return m_tileWidthInCtus; }
-  void      setTileHeightInCtus    ( uint32_t i )            { m_tileHeightInCtus = i; }
-  uint32_t      getTileHeightInCtus    () const              { return m_tileHeightInCtus; }
-  void      setRightEdgePosInCtus  ( uint32_t i )            { m_rightEdgePosInCtus = i; }
-  uint32_t      getRightEdgePosInCtus  () const              { return m_rightEdgePosInCtus; }
-  void      setBottomEdgePosInCtus ( uint32_t i )            { m_bottomEdgePosInCtus = i; }
-  uint32_t      getBottomEdgePosInCtus () const              { return m_bottomEdgePosInCtus; }
-  void      setFirstCtuRsAddr      ( uint32_t i )            { m_firstCtuRsAddr = i; }
-  uint32_t      getFirstCtuRsAddr      () const              { return m_firstCtuRsAddr; }
-};
-
-
-struct TileMap
-{
-  TileMap();
-
-  void create( const SPS& sps, const PPS& pps );
-  void destroy();
 
-  uint32_t getTileIdxMap( uint32_t ctuRsAddr )       const { return *(tileIdxMap + ctuRsAddr); }
-  uint32_t getTileIdxMap( const Position& pos )  const { return getTileIdxMap( ( pos.x / pcv->maxCUWidth ) + ( pos.y / pcv->maxCUHeight ) * pcv->widthInCtus ); };
-  uint32_t getCtuTsToRsAddrMap( uint32_t ctuTsAddr ) const { return *(ctuTsToRsAddrMap + (ctuTsAddr>=pcv->sizeInCtus ? pcv->sizeInCtus : ctuTsAddr)); }
-  uint32_t getCtuRsToTsAddrMap( uint32_t ctuRsAddr ) const { return *(ctuRsToTsAddrMap + (ctuRsAddr>=pcv->sizeInCtus ? pcv->sizeInCtus : ctuRsAddr)); }
-  uint32_t getSubstreamForCtuAddr(const uint32_t ctuAddr, const bool bAddressInRaster, Slice *pcSlice) const;
-
-  const PreCalcValues* pcv;
-  std::vector<Tile> tiles;
-  uint32_t  numTiles;
-  uint32_t  numTileColumns;
-  uint32_t  numTileRows;
-  uint32_t* tileIdxMap;
-  uint32_t* ctuTsToRsAddrMap;
-  uint32_t* ctuRsToTsAddrMap;
-
-  void initTileMap( const SPS& sps, const PPS& pps );
-  void initCtuTsRsAddrMap();
-  uint32_t calculateNextCtuRSAddr( const uint32_t currCtuRsAddr ) const;
-};
-#endif
 
 #if ENABLE_SPLIT_PARALLELISM
 #define M_BUFS(JID,PID) m_bufs[JID][PID]
@@ -181,7 +99,7 @@ struct Picture : public UnitArea
   uint32_t margin;
   Picture();
 
-  void create(const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned margin, const bool bDecoder);
+  void create( const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned margin, const bool bDecoder, const int layerId );
   void destroy();
 
   void createTempBuffers( const unsigned _maxCUSize );
@@ -210,14 +128,14 @@ struct Picture : public UnitArea
          PelUnitBuf getResiBuf(const UnitArea &unit);
   const CPelUnitBuf getResiBuf(const UnitArea &unit) const;
 
-         PelBuf     getRecoBuf(const ComponentID compID);
-  const CPelBuf     getRecoBuf(const ComponentID compID) const;
-         PelBuf     getRecoBuf(const CompArea &blk);
-  const CPelBuf     getRecoBuf(const CompArea &blk) const;
-         PelUnitBuf getRecoBuf(const UnitArea &unit);
-  const CPelUnitBuf getRecoBuf(const UnitArea &unit) const;
-         PelUnitBuf getRecoBuf();
-  const CPelUnitBuf getRecoBuf() const;
+         PelBuf     getRecoBuf(const ComponentID compID, bool wrap=false);
+  const CPelBuf     getRecoBuf(const ComponentID compID, bool wrap=false) const;
+         PelBuf     getRecoBuf(const CompArea &blk, bool wrap=false);
+  const CPelBuf     getRecoBuf(const CompArea &blk, bool wrap=false) const;
+         PelUnitBuf getRecoBuf(const UnitArea &unit, bool wrap=false);
+  const CPelUnitBuf getRecoBuf(const UnitArea &unit, bool wrap=false) const;
+         PelUnitBuf getRecoBuf(bool wrap=false);
+  const CPelUnitBuf getRecoBuf(bool wrap=false) const;
 
          PelBuf     getBuf(const ComponentID compID, const PictureType &type);
   const CPelBuf     getBuf(const ComponentID compID, const PictureType &type) const;
@@ -227,7 +145,7 @@ struct Picture : public UnitArea
   const CPelUnitBuf getBuf(const UnitArea &unit,     const PictureType &type) const;
 
   void extendPicBorder();
-  void finalInit(const SPS& sps, const PPS& pps, APS& aps);
+  void finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHeader *picHeader, APS** alfApss, APS* lmcsAps, APS* scalingListAps );
 
   int  getPOC()                               const { return poc; }
   void setBorderExtension( bool bFlag)              { m_bIsBorderExtended = bFlag;}
@@ -237,6 +155,21 @@ struct Picture : public UnitArea
   void          setSpliceIdx(uint32_t idx, int poc) { m_spliceIdx[idx] = poc; }
   void          createSpliceIdx(int nums);
   bool          getSpliceFull();
+  static void   sampleRateConv( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
+                                const CPelBuf& beforeScale, const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
+                                const PelBuf& afterScale, const int afterScaleLeftOffset, const int afterScaleTopOffset,
+                                const int bitDepth, const bool useLumaFilter, const bool downsampling,
+                                const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag );
+
+  static void   rescalePicture( const std::pair<int, int> scalingRatio, 
+                                const CPelUnitBuf& beforeScaling, const Window& scalingWindowBefore, 
+                                const PelUnitBuf& afterScaling, const Window& scalingWindowAfter,
+                                const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool useLumaFilter, const bool downsampling,
+                                const bool horCollocatedChromaFlag, const bool verCollocatedChromaFlag );
+
+private:
+  Window        m_conformanceWindow;
+  Window        m_scalingWindow;
 
 public:
   bool m_bIsBorderExtended;
@@ -248,23 +181,26 @@ public:
   bool topField;
   bool fieldPic;
   int  m_prevQP[MAX_NUM_CHANNEL_TYPE];
+  bool precedingDRAP; // preceding a DRAP picture in decoding order
 
   int  poc;
   uint32_t layer;
   uint32_t depth;
+  int      layerId;
+
+  bool subLayerNonReferencePictureDueToSTSA;
 
   int* m_spliceIdx;
   int  m_ctuNums;
 
+  bool interLayerRefPicFlag;
+
 #if ENABLE_SPLIT_PARALLELISM
-#if ENABLE_WPP_PARALLELISM
-  PelStorage m_bufs[( PARL_SPLIT_MAX_NUM_JOBS * PARL_WPP_MAX_NUM_THREADS )][NUM_PIC_TYPES];
-#else
   PelStorage m_bufs[PARL_SPLIT_MAX_NUM_JOBS][NUM_PIC_TYPES];
-#endif
 #else
   PelStorage m_bufs[NUM_PIC_TYPES];
 #endif
+  const Picture*           unscaledPic;
 
   TComHash           m_hashMap;
   TComHash*          getHashMap() { return &m_hashMap; }
@@ -275,13 +211,25 @@ public:
   std::deque<Slice*> slices;
   SEIMessages        SEIs;
 
+  uint32_t           getPicWidthInLumaSamples() const                                { return  getRecoBuf( COMPONENT_Y ).width; }
+  uint32_t           getPicHeightInLumaSamples() const                               { return  getRecoBuf( COMPONENT_Y ).height; }
+  Window&            getConformanceWindow()                                          { return  m_conformanceWindow; }
+  const Window&      getConformanceWindow() const                                    { return  m_conformanceWindow; }
+  Window&            getScalingWindow()                                              { return  m_scalingWindow; }
+  const Window&      getScalingWindow()                                        const { return  m_scalingWindow; }
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  bool               isRefScaled( const PPS* pps ) const                             { return  getPicWidthInLumaSamples()                 != pps->getPicWidthInLumaSamples()                ||
+                                                                                               getPicHeightInLumaSamples()                != pps->getPicHeightInLumaSamples()               ||
+                                                                                               getScalingWindow().getWindowLeftOffset()   != pps->getScalingWindow().getWindowLeftOffset()  ||
+                                                                                               getScalingWindow().getWindowRightOffset()  != pps->getScalingWindow().getWindowRightOffset() ||
+                                                                                               getScalingWindow().getWindowTopOffset()    != pps->getScalingWindow().getWindowTopOffset()   ||
+                                                                                               getScalingWindow().getWindowBottomOffset() != pps->getScalingWindow().getWindowBottomOffset(); }
+#endif
+
   void         allocateNewSlice();
   Slice        *swapSliceObject(Slice * p, uint32_t i);
   void         clearSliceBuffer();
 
-#if HEVC_TILES_WPP
-  TileMap*     tileMap;
-#endif
   MCTSInfo     mctsInfo;
   std::vector<AQpLayer*> aqlayer;
 
@@ -293,11 +241,8 @@ private:
 #if ENABLE_SPLIT_PARALLELISM
 public:
   void finishParallelPart   ( const UnitArea& ctuArea );
-#if ENABLE_WPP_PARALLELISM
-  void finishCtuPart        ( const UnitArea& ctuArea );
-#endif
 #endif
-#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 public:
   Scheduler                  scheduler;
 #endif
@@ -328,6 +273,28 @@ public:
       std::fill( m_alfCtuEnableFlag[compIdx].begin(), m_alfCtuEnableFlag[compIdx].end(), 0 );
     }
   }
+  std::vector<short> m_alfCtbFilterIndex;
+  short* getAlfCtbFilterIndex() { return m_alfCtbFilterIndex.data(); }
+  std::vector<short>& getAlfCtbFilterIndexVec() { return m_alfCtbFilterIndex; }
+  void resizeAlfCtbFilterIndex(int numEntries)
+  {
+    m_alfCtbFilterIndex.resize(numEntries);
+    for (int i = 0; i < numEntries; i++)
+    {
+      m_alfCtbFilterIndex[i] = 0;
+    }
+  }
+  std::vector<uint8_t> m_alfCtuAlternative[MAX_NUM_COMPONENT];
+  std::vector<uint8_t>& getAlfCtuAlternative( int compIdx ) { return m_alfCtuAlternative[compIdx]; }
+  uint8_t* getAlfCtuAlternativeData( int compIdx ) { return m_alfCtuAlternative[compIdx].data(); }
+  void resizeAlfCtuAlternative( int numEntries )
+  {
+    for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+    {
+      m_alfCtuAlternative[compIdx].resize( numEntries );
+      std::fill( m_alfCtuAlternative[compIdx].begin(), m_alfCtuAlternative[compIdx].end(), 0 );
+    }
+  }
 };
 
 int calcAndPrintHashStatus(const CPelUnitBuf& pic, const class SEIDecodedPictureHash* pictureHashSEI, const BitDepths &bitDepths, const MsgLevel msgl);
diff --git a/source/Lib/CommonLib/ProfileLevelTier.cpp b/source/Lib/CommonLib/ProfileLevelTier.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..44f8a74770bd7b4fc9e05c7eec46fc95ee3a0c0d
--- /dev/null
+++ b/source/Lib/CommonLib/ProfileLevelTier.cpp
@@ -0,0 +1,127 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     ProfileLevelTier.cpp
+    \brief    Handle profile, level and tier information.
+*/
+
+
+#include "ProfileLevelTier.h"
+#include "CommonLib/Slice.h"
+#include <math.h>
+
+uint32_t
+LevelTierFeatures::getMaxPicWidthInLumaSamples()  const
+{
+  return uint32_t(sqrt(maxLumaPs*8.0));
+}
+
+uint32_t
+LevelTierFeatures::getMaxPicHeightInLumaSamples() const
+{
+  return uint32_t(sqrt(maxLumaPs*8.0));
+}
+
+static const uint64_t MAX_CNFUINT64 = std::numeric_limits<uint64_t>::max();
+
+static const LevelTierFeatures mainLevelTierInfo[] =
+{
+    //  level        , maxlumaps,      maxcpb[tier],,    maxSlice,    tile rows, cols,     maxLumaSr,       maxBr[tier],,  , minCr[tier],,
+    { Level::LEVEL1  ,    36864, {      350,        0 },       16,        1,        1,     552960ULL, {     128,        0 }, { 2, 2} },
+    { Level::LEVEL2  ,   122880, {     1500,        0 },       16,        1,        1,    3686400ULL, {    1500,        0 }, { 2, 2} },
+    { Level::LEVEL2_1,   245760, {     3000,        0 },       20,        1,        1,    7372800ULL, {    3000,        0 }, { 2, 2} },
+    { Level::LEVEL3  ,   552960, {     6000,        0 },       30,        2,        2,   16588800ULL, {    6000,        0 }, { 2, 2} },
+    { Level::LEVEL3_1,   983040, {    10000,        0 },       40,        3,        3,   33177600ULL, {   10000,        0 }, { 2, 2} },
+    { Level::LEVEL4  ,  2228224, {    12000,    30000 },       75,        5,        5,   66846720ULL, {   12000,    30000 }, { 4, 4} },
+    { Level::LEVEL4_1,  2228224, {    20000,    50000 },       75,        5,        5,  133693440ULL, {   20000,    50000 }, { 4, 4} },
+    { Level::LEVEL5  ,  8912896, {    25000,   100000 },      200,       11,       10,  267386880ULL, {   25000,   100000 }, { 6, 4} },
+    { Level::LEVEL5_1,  8912896, {    40000,   160000 },      200,       11,       10,  534773760ULL, {   40000,   160000 }, { 8, 4} },
+    { Level::LEVEL5_2,  8912896, {    60000,   240000 },      200,       11,       10, 1069547520ULL, {   60000,   240000 }, { 8, 4} },
+    { Level::LEVEL6  , 35651584, {    60000,   240000 },      600,       22,       20, 1069547520ULL, {   60000,   240000 }, { 8, 4} },
+    { Level::LEVEL6_1, 35651584, {   120000,   480000 },      600,       22,       20, 2139095040ULL, {  120000,   480000 }, { 8, 4} },
+    { Level::LEVEL6_2, 35651584, {   240000,   800000 },      600,       22,       20, 4278190080ULL, {  240000,   800000 }, { 6, 4} },
+    { Level::LEVEL8_5, MAX_UINT, { MAX_UINT, MAX_UINT }, MAX_UINT, MAX_UINT, MAX_UINT, MAX_CNFUINT64, {MAX_UINT, MAX_UINT }, { 0, 0} },
+    { Level::NONE    }
+};
+
+static const ProfileFeatures validProfiles[] =
+{   //  profile,                   pNameString,             maxBitDepth, maxChrFmt,  lvl8.5, cpbvcl, cpbnal, fcf*1000, mincr*10, levelInfo
+    { Profile::MAIN_10,            "Main_10",                        10, CHROMA_420, false,   1000,   1100,     1875,   10    , mainLevelTierInfo },
+    { Profile::MAIN_444_10,        "Main_444_10",                    10, CHROMA_444, false,   2500,   2750,     3750,    5    , mainLevelTierInfo },
+    { Profile::NONE, 0 }
+};
+
+void
+ProfileLevelTierFeatures::extractPTLInformation(const SPS &sps)
+{
+  const ProfileTierLevel &spsPtl =*(sps.getProfileTierLevel());
+
+  m_tier = spsPtl.getTierFlag();
+
+  // Identify the profile from the profile Idc, and possibly other constraints.
+  for(int32_t i=0; validProfiles[i].profile != Profile::NONE; i++)
+  {
+    if (spsPtl.getProfileIdc() == validProfiles[i].profile)
+    {
+      m_pProfile = &(validProfiles[i]);
+      break;
+    }
+  }
+
+  if (m_pProfile != 0)
+  {
+    // Now identify the level:
+    const LevelTierFeatures *pLTF = m_pProfile->pLevelTiersListInfo;
+    const Level::Name spsLevelName = spsPtl.getLevelIdc();
+    if (spsLevelName!=Level::LEVEL8_5 || m_pProfile->canUseLevel8p5)
+    {
+      for(int i=0; pLTF[i].level!=Level::NONE; i++)
+      {
+        if (pLTF[i].level == spsLevelName)
+        {
+          m_pLevelTier = &(pLTF[i]);
+        }
+      }
+    }
+  }
+}
+
+double ProfileLevelTierFeatures::getMinCr() const
+{
+  return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx10 * m_pLevelTier->minCrBase[m_tier?1:0])/10.0 : 0.0 ;
+}
+
+uint64_t ProfileLevelTierFeatures::getCpbSizeInBits() const
+{
+  return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] : uint64_t(0);
+}
diff --git a/source/Lib/CommonLib/ProfileLevelTier.h b/source/Lib/CommonLib/ProfileLevelTier.h
new file mode 100644
index 0000000000000000000000000000000000000000..6cd54107eff7b090a2e2b7685c8b70f5ac7ac592
--- /dev/null
+++ b/source/Lib/CommonLib/ProfileLevelTier.h
@@ -0,0 +1,102 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     ProfileLevelTier.h
+    \brief    Handle profile, level and tier information.
+*/
+
+#ifndef __PROFILE_LEVEL_TIER__
+#define __PROFILE_LEVEL_TIER__
+
+#if _MSC_VER > 1000
+#pragma once
+#endif // _MSC_VER > 1000
+
+#include "CommonLib/CommonDef.h"
+#include <stdint.h>
+
+class SPS; // Forward declaration.
+
+struct LevelTierFeatures
+{
+  Level::Name level;
+  uint32_t    maxLumaPs;
+  uint32_t    maxCpb[Level::NUMBER_OF_TIERS];    // in units of CpbVclFactor or CpbNalFactor bits
+  uint32_t    maxSliceSegmentsPerPicture;
+  uint32_t    maxTileRows;
+  uint32_t    maxTileCols;
+  uint64_t    maxLumaSr;
+  uint32_t    maxBr[Level::NUMBER_OF_TIERS];     // in units of BrVclFactor or BrNalFactor bits/s
+  uint32_t    minCrBase[Level::NUMBER_OF_TIERS];
+  uint32_t    getMaxPicWidthInLumaSamples()  const;
+  uint32_t    getMaxPicHeightInLumaSamples() const;
+};
+
+
+struct ProfileFeatures
+{
+  Profile::Name            profile;
+  const char              *pNameString;
+  uint32_t                 maxBitDepth;
+  ChromaFormat             maxChromaFormat;
+
+  bool                     canUseLevel8p5;
+  uint32_t                 cpbVclFactor;
+  uint32_t                 cpbNalFactor;
+  uint32_t                 formatCapabilityFactorx1000;
+  uint32_t                 minCrScaleFactorx10;
+  const LevelTierFeatures *pLevelTiersListInfo;
+};
+
+
+class ProfileLevelTierFeatures
+{
+  private:
+    const ProfileFeatures   *m_pProfile;
+    const LevelTierFeatures *m_pLevelTier;
+    Level::Tier              m_tier;
+  public:
+    ProfileLevelTierFeatures() : m_pProfile(0), m_pLevelTier(0), m_tier(Level::MAIN) { }
+
+    void extractPTLInformation(const SPS &sps);
+
+    const ProfileFeatures     *getProfileFeatures()   const { return m_pProfile; }
+    const LevelTierFeatures   *getLevelTierFeatures() const { return m_pLevelTier; }
+    Level::Tier                getTier()              const { return m_tier; }
+    uint64_t getCpbSizeInBits()                       const;
+    double getMinCr()                                 const;
+};
+
+
+#endif
+
diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp
index 04f9edb9eae2d7e52507a5be9c04476cabf2eabf..b46a46267666f9d44f13ca1d62e4ffd17e21f79a 100644
--- a/source/Lib/CommonLib/Quant.cpp
+++ b/source/Lib/CommonLib/Quant.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -63,37 +63,39 @@
 // ====================================================================================================================
 
 QpParam::QpParam(const int           qpy,
-                 const ChannelType   chType,
+                 const ComponentID   compID,
                  const int           qpBdOffset,
+                 const int           minQpPrimeTsMinus4,
                  const int           chromaQPOffset,
                  const ChromaFormat  chFmt,
-                 const int           dqp )
+                 const int           dqp
+              ,  const SPS           *sps
+)
 {
   int baseQp;
-
-  if(isLuma(chType))
+  if (isLuma(compID))
   {
     baseQp = qpy + qpBdOffset;
   }
   else
   {
-    baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
-
-    if(baseQp < 0)
-    {
-      baseQp = baseQp + qpBdOffset;
-    }
-    else
-    {
-      baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
-    }
+    int qpi = Clip3(-qpBdOffset, MAX_QP, qpy);
+    baseQp = sps->getMappedChromaQpValue(compID, qpi);
+    baseQp = Clip3(-qpBdOffset, MAX_QP, baseQp + chromaQPOffset) + qpBdOffset;
   }
 
   baseQp = Clip3( 0, MAX_QP+qpBdOffset, baseQp + dqp );
 
-  Qp =baseQp;
-  per=baseQp/6;
-  rem=baseQp%6;
+  Qps[0] =baseQp;
+  pers[0]=baseQp/6;
+  rems[0]=baseQp%6;
+
+  int baseQpTS = baseQp;
+  baseQpTS = std::max(baseQpTS, 4 + minQpPrimeTsMinus4);
+
+  Qps[1]  = baseQpTS;
+  pers[1] = baseQpTS / 6;
+  rems[1] = baseQpTS % 6;
 }
 
 QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int QP /*= -MAX_INT*/)
@@ -103,18 +105,18 @@ QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int
 
   if (isChroma(compID))
   {
-    chromaQpOffset += tu.cs->pps->getQpOffset( compID );
-    chromaQpOffset += tu.cs->slice->getSliceChromaQpDelta( compID );
-    chromaQpOffset += tu.cs->pps->getPpsRangeExtension().getChromaQpOffsetListEntry( tu.cu->chromaQpAdj ).u.offset[int( compID ) - 1];
+    const bool useJQP = ( abs(TU::getICTMode(tu)) == 2 );
+
+    chromaQpOffset += tu.cs->pps->getQpOffset            ( useJQP ? JOINT_CbCr : compID );
+    chromaQpOffset += tu.cu->slice->getSliceChromaQpDelta( useJQP ? JOINT_CbCr : compID );
+
+    chromaQpOffset += tu.cs->pps->getChromaQpOffsetListEntry( tu.cu->chromaQpAdj ).u.offset[int( useJQP ? JOINT_CbCr : compID ) - 1];
   }
 
-#if HM_QTBT_AS_IN_JEM_QUANT
   int dqp = 0;
-#else
-  int dqp = ( TU::needsQP3Offset(tu, compID) ? -3 : 0 );
-#endif
 
-  *this = QpParam(QP <= -MAX_INT ? tu.cu->qp : QP, toChannelType(compID), tu.cs->sps->getQpBDOffset(toChannelType(compID)), chromaQpOffset, tu.chromaFormat, dqp);
+  const bool useJQP = isChroma(compID) && (abs(TU::getICTMode(tu)) == 2);
+  *this = QpParam(QP <= -MAX_INT ? tu.cu->qp : QP, useJQP ? JOINT_CbCr : compID, tu.cs->sps->getQpBDOffset(toChannelType(compID)), tu.cs->sps->getMinQpPrimeTsMinus4(toChannelType(compID)), chromaQpOffset, tu.chromaFormat, dqp, tu.cs->sps);
 }
 
 
@@ -124,20 +126,93 @@ QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int
 
 Quant::Quant( const Quant* other )
 {
-#if HEVC_USE_SCALING_LISTS
   xInitScalingList( other );
-#endif
 }
 
 Quant::~Quant()
 {
-#if HEVC_USE_SCALING_LISTS
   xDestroyScalingList();
-#endif
 }
 
+void invResDPCM( const TransformUnit &tu, const ComponentID &compID, CoeffBuf &dstBuf )
+{
+  const CompArea &rect = tu.blocks[compID];
+  const int      wdt = rect.width;
+  const int      hgt = rect.height;
+  const CCoeffBuf coeffs = tu.getCoeffs(compID);
+
+  const int      maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID));
+  const TCoeff   inputMinimum   = -(1 << maxLog2TrDynamicRange);
+  const TCoeff   inputMaximum   =  (1 << maxLog2TrDynamicRange) - 1;
+
+  const TCoeff* coef = &coeffs.buf[0];
+  TCoeff* dst = &dstBuf.buf[0];
+
+  if( isLuma(compID) ? tu.cu->bdpcmMode == 1 : tu.cu->bdpcmModeChroma == 1)
+  {
+    for( int y = 0; y < hgt; y++ )
+    {
+      dst[0] = coef[0];
+      for( int x = 1; x < wdt; x++ )
+      {
+        dst[x] = Clip3(inputMinimum, inputMaximum, dst[x - 1] + coef[x]);
+      }
+      coef += coeffs.stride;
+      dst += dstBuf.stride;
+    }
+  }
+  else
+  {
+    for( int x = 0; x < wdt; x++ )
+    {
+      dst[x] = coef[x];
+    }
+    for( int y = 0; y < hgt - 1; y++ )
+    {
+      for( int x = 0; x < wdt; x++ )
+      {
+        dst[dstBuf.stride + x] = Clip3(inputMinimum, inputMaximum, dst[x] + coef[coeffs.stride + x]);
+      }
+      coef += coeffs.stride;
+      dst += dstBuf.stride;
+    }
+  }
+}
+
+void fwdResDPCM( TransformUnit &tu, const ComponentID &compID )
+{
+  const CompArea &rect = tu.blocks[compID];
+  const int      wdt = rect.width;
+  const int      hgt = rect.height;
+  CoeffBuf       coeffs = tu.getCoeffs(compID);
+
+  TCoeff* coef = &coeffs.buf[0];
+
+  if( isLuma(compID) ? tu.cu->bdpcmMode == 1 : tu.cu->bdpcmModeChroma == 1)
+  {
+    for( int y = 0; y < hgt; y++ )
+    {
+      for( int x = wdt - 1; x > 0; x-- )
+      {
+        coef[x] -= coef[x - 1];
+      }
+      coef += coeffs.stride;
+    }
+  }
+  else
+  {
+    coef += coeffs.stride * (hgt - 1);
+    for( int y = 0; y < hgt - 1; y++ )
+    {
+      for ( int x = 0; x < wdt; x++ )
+      {
+        coef[x] -= coef[x - coeffs.stride];
+      }
+      coef -= coeffs.stride;
+    }
+  }
+}
 
-#if HEVC_USE_SIGN_HIDING
 // To minimize the distortion only. No rate is considered.
 void Quant::xSignBitHidingHDQ( TCoeff* pQCoef, const TCoeff* pCoef, TCoeff* deltaU, const CoeffCodingContext& cctx, const int maxLog2TrDynamicRange )
 {
@@ -272,7 +347,6 @@ void Quant::xSignBitHidingHDQ( TCoeff* pQCoef, const TCoeff* pCoef, TCoeff* delt
 
   return;
 }
-#endif
 
 void Quant::dequant(const TransformUnit &tu,
                              CoeffBuf      &dstCoeff,
@@ -283,48 +357,43 @@ void Quant::dequant(const TransformUnit &tu,
   const CompArea       &area               = tu.blocks[compID];
   const uint32_t            uiWidth            = area.width;
   const uint32_t            uiHeight           = area.height;
-  const TCoeff   *const piQCoef            = tu.getCoeffs(compID).buf;
         TCoeff   *const piCoef             = dstCoeff.buf;
   const uint32_t            numSamplesInBlock  = uiWidth * uiHeight;
   const int             maxLog2TrDynamicRange = sps->getMaxLog2TrDynamicRange(toChannelType(compID));
   const TCoeff          transformMinimum   = -(1 << maxLog2TrDynamicRange);
   const TCoeff          transformMaximum   =  (1 << maxLog2TrDynamicRange) - 1;
-#if HEVC_USE_SCALING_LISTS
-  const bool            enableScalingLists = getUseScalingList(uiWidth, uiHeight, (tu.transformSkip[compID] != 0));
+  const bool            isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+
+  const bool            disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+  const bool            enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, tu.cu->lfnstIdx > 0, disableSMForLFNST);
   const int             scalingListType    = getScalingListType(tu.cu->predMode, compID);
-#endif
   const int             channelBitDepth    = sps->getBitDepth(toChannelType(compID));
 
-#if HEVC_USE_SCALING_LISTS
+  const TCoeff          *coef;
+  if ((tu.cu->bdpcmMode && isLuma(compID)) || ( tu.cu->bdpcmModeChroma && isChroma(compID) ))
+  {
+    invResDPCM( tu, compID, dstCoeff );
+    coef = piCoef;
+  }
+  else
+  {
+    coef = tu.getCoeffs(compID).buf;
+  }
+  const TCoeff          *const piQCoef = coef;
   CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
-#endif
   CHECK(uiWidth > m_uiMaxTrSize, "Unsupported transformation size");
 
   // Represents scaling through forward transform
-  const bool bClipTransformShiftTo0 = tu.mtsIdx!=1 && sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
+  const bool bClipTransformShiftTo0 = tu.mtsIdx[compID] != MTS_SKIP && sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
   const int  originalTransformShift = getTransformShift(channelBitDepth, area.size(), maxLog2TrDynamicRange);
-  const int  iTransformShift        = bClipTransformShiftTo0 ? std::max<int>(0, originalTransformShift) : originalTransformShift;
-
-  const int QP_per = cQP.per;
-  const int QP_rem = cQP.rem;
-
-#if HM_QTBT_AS_IN_JEM_QUANT
-  const bool needsScalingCorrection = TU::needsBlockSizeTrafoScale( tu, compID );
-  const int  NEScale    = TU::needsSqrt2Scale( tu, compID ) ? 181 : 1;
-#if HEVC_USE_SCALING_LISTS
-  const int  rightShift = (needsScalingCorrection ?   8 : 0 ) + (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
-#else
-  const int  rightShift = (needsScalingCorrection ?   8 : 0 ) + (IQUANT_SHIFT - (iTransformShift + QP_per));
-#endif
-#else
-#if HEVC_USE_SCALING_LISTS
-  const int  rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
-#else
-  const int  rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per));
-#endif
-#endif
+  const bool needSqrtAdjustment     = TU::needsBlockSizeTrafoScale( tu, compID );
+  const int  iTransformShift        = (bClipTransformShiftTo0 ? std::max<int>(0, originalTransformShift) : originalTransformShift) + (needSqrtAdjustment?-1:0);
+
+  const int QP_per = cQP.per(isTransformSkip);
+  const int QP_rem = cQP.rem(isTransformSkip);
+
+  const int  rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
 
-#if HEVC_USE_SCALING_LISTS
   if(enableScalingLists)
   {
     //from the dequantization equation:
@@ -336,9 +405,9 @@ void Quant::dequant(const TransformUnit &tu,
     const Intermediate_Int inputMinimum        = -(1 << (targetInputBitDepth - 1));
     const Intermediate_Int inputMaximum        =  (1 << (targetInputBitDepth - 1)) - 1;
 
-    const uint32_t uiLog2TrWidth  = g_aucLog2[uiWidth];
-    const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
-    int *piDequantCoef        = getDequantCoeff(scalingListType, QP_rem, uiLog2TrWidth - 1, uiLog2TrHeight - 1);
+    const uint32_t uiLog2TrWidth  = floorLog2(uiWidth);
+    const uint32_t uiLog2TrHeight = floorLog2(uiHeight);
+    int *piDequantCoef        = getDequantCoeff(scalingListType, QP_rem, uiLog2TrWidth, uiLog2TrHeight);
 
     if(rightShift > 0)
     {
@@ -347,11 +416,7 @@ void Quant::dequant(const TransformUnit &tu,
       for( int n = 0; n < numSamplesInBlock; n++ )
       {
         const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
-#if HM_QTBT_AS_IN_JEM_QUANT
-        const Intermediate_Int iCoeffQ   = ((Intermediate_Int(clipQCoef) * piDequantCoef[n] * NEScale) + iAdd ) >> rightShift;
-#else
         const Intermediate_Int iCoeffQ   = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
-#endif
 
         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
       }
@@ -363,11 +428,7 @@ void Quant::dequant(const TransformUnit &tu,
       for( int n = 0; n < numSamplesInBlock; n++ )
       {
         const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
-#if HM_QTBT_AS_IN_JEM_QUANT
-        const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * piDequantCoef[n] * NEScale) << leftShift;
-#else
         const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
-#endif
 
         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
       }
@@ -375,12 +436,7 @@ void Quant::dequant(const TransformUnit &tu,
   }
   else
   {
-#endif
-#if HM_QTBT_AS_IN_JEM_QUANT
-    const int scale     = g_invQuantScales[QP_rem] * NEScale;
-#else
-    const int scale     = g_invQuantScales[QP_rem];
-#endif
+    const int scale     = g_invQuantScales[needSqrtAdjustment?1:0][QP_rem];
     const int scaleBits = ( IQUANT_SHIFT + 1 );
 
     //from the dequantisation equation:
@@ -414,9 +470,7 @@ void Quant::dequant(const TransformUnit &tu,
         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
       }
     }
-#if HEVC_USE_SCALING_LISTS
   }
-#endif
 }
 
 void Quant::init( uint32_t uiMaxTrSize,
@@ -437,6 +491,7 @@ void Quant::init( uint32_t uiMaxTrSize,
 #if T0196_SELECTIVE_RDOQ
   m_useSelectiveRDOQ     = useSelectiveRDOQ;
 #endif
+  m_resetStore = true;
 }
 
 #if ENABLE_SPLIT_PARALLELISM
@@ -447,7 +502,6 @@ void Quant::copyState( const Quant& other )
 }
 #endif
 
-#if HEVC_USE_SCALING_LISTS
 /** set quantized matrix coefficient for encode
  * \param scalingList            quantized matrix address
  * \param format                 chroma format
@@ -459,14 +513,39 @@ void Quant::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicR
   const int minimumQp = 0;
   const int maximumQp = SCALING_LIST_REM_NUM;
 
-  for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++)
+  int scalingListId = 0;
+  int recScalingListId = 0;
+  for(uint32_t size = SCALING_LIST_FIRST_CODED; size <= SCALING_LIST_LAST_CODED; size++) //2x2->64x64
   {
     for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
     {
+      if ((size == SCALING_LIST_2x2 && list < 4) || (size == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0))   // skip 2x2 luma
+        continue;
       for(int qp = minimumQp; qp < maximumQp; qp++)
       {
-        xSetScalingListEnc(scalingList,list,size,qp);
-        xSetScalingListDec(*scalingList,list,size,qp);
+        xSetScalingListEnc(scalingList, list, size, qp, scalingListId);
+        xSetScalingListDec(*scalingList, list, size, qp, scalingListId);
+      }
+      scalingListId++;
+    }
+  }
+  //based on square result and apply downsample technology
+  for (uint32_t sizew = 0; sizew <= SCALING_LIST_LAST_CODED; sizew++) //7
+  {
+    for (uint32_t sizeh = 0; sizeh <= SCALING_LIST_LAST_CODED; sizeh++) //7
+    {
+      if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh<SCALING_LIST_4x4) || (sizeh == SCALING_LIST_1x1 && sizew<SCALING_LIST_4x4)) continue;
+      for (uint32_t list = 0; list < SCALING_LIST_NUM; list++) //9
+      {
+        int largerSide = (sizew > sizeh) ? sizew : sizeh;
+        if (largerSide == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) continue;
+        if (largerSide < SCALING_LIST_4x4) printf("Rectangle Error !\n");
+        recScalingListId = SCALING_LIST_NUM * (largerSide - 2) + 2 + (list / ((largerSide == SCALING_LIST_64x64) ? 3 : 1));
+        for (int qp = minimumQp; qp < maximumQp; qp++)
+        {
+          xSetRecScalingListEnc(scalingList, list, sizew, sizeh, qp, recScalingListId);
+          xSetRecScalingListDec(*scalingList, list, sizew, sizeh, qp, recScalingListId);
+        }
       }
     }
   }
@@ -480,13 +559,38 @@ void Quant::setScalingListDec(const ScalingList &scalingList)
   const int minimumQp = 0;
   const int maximumQp = SCALING_LIST_REM_NUM;
 
-  for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++)
+  int scalingListId = 0;
+  int recScalingListId = 0;
+  for (uint32_t size = SCALING_LIST_FIRST_CODED; size <= SCALING_LIST_LAST_CODED; size++)
   {
     for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
     {
+      if ((size == SCALING_LIST_2x2 && list < 4) || (size == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0))   // skip 2x2 luma
+        continue;
       for(int qp = minimumQp; qp < maximumQp; qp++)
       {
-        xSetScalingListDec(scalingList,list,size,qp);
+        xSetScalingListDec(scalingList, list, size, qp, scalingListId);
+      }
+      scalingListId++;
+    }
+  }
+  //based on square result and apply downsample technology
+  //based on square result and apply downsample technology
+  for (uint32_t sizew = 0; sizew <= SCALING_LIST_LAST_CODED; sizew++) //7
+  {
+    for (uint32_t sizeh = 0; sizeh <= SCALING_LIST_LAST_CODED; sizeh++) //7
+    {
+      if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh<SCALING_LIST_4x4) || (sizeh == SCALING_LIST_1x1 && sizew<SCALING_LIST_4x4)) continue;
+      for (uint32_t list = 0; list < SCALING_LIST_NUM; list++) //9
+      {
+        int largerSide = (sizew > sizeh) ? sizew : sizeh;
+        if (largerSide == SCALING_LIST_64x64 && list % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) continue;
+        if (largerSide < SCALING_LIST_4x4) printf("Rectangle Error !\n");
+        recScalingListId = SCALING_LIST_NUM * (largerSide - 2) + 2 + (list / ((largerSide == SCALING_LIST_64x64) ? 3 : 1));
+        for (int qp = minimumQp; qp < maximumQp; qp++)
+        {
+          xSetRecScalingListDec(scalingList, list, sizew, sizeh, qp, recScalingListId);
+        }
       }
     }
   }
@@ -500,23 +604,24 @@ void Quant::setScalingListDec(const ScalingList &scalingList)
  * \param qp Quantization parameter
  * \param format chroma format
  */
-void Quant::xSetScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32_t sizeId, int qp)
+void Quant::xSetScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32_t sizeId, int qp, uint32_t scalingListId)
 {
   uint32_t width  = g_scalingListSizeX[sizeId];
   uint32_t height = g_scalingListSizeX[sizeId];
   uint32_t ratio  = g_scalingListSizeX[sizeId]/std::min(MAX_MATRIX_SIZE_NUM,(int)g_scalingListSizeX[sizeId]);
   int *quantcoeff;
-  int *coeff  = scalingList->getScalingListAddress(sizeId,listId);
+  int *coeff = scalingList->getScalingListAddress(scalingListId);
   quantcoeff  = getQuantCoeff(listId, qp, sizeId, sizeId);
 
-  int quantScales = g_quantScales[qp];
+  const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1;
+  int quantScales = g_quantScales[blockIsNotPowerOf4?1:0][qp];
 
   processScalingListEnc(coeff,
                         quantcoeff,
                         (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
                         height, width, ratio,
                         std::min(MAX_MATRIX_SIZE_NUM, (int)g_scalingListSizeX[sizeId]),
-                        scalingList->getScalingListDC(sizeId,listId));
+                        scalingList->getScalingListDC(scalingListId));
 }
 
 /** set quantized matrix coefficient for decode
@@ -526,26 +631,81 @@ void Quant::xSetScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32
  * \param qp Quantization parameter
  * \param format chroma format
  */
-void Quant::xSetScalingListDec(const ScalingList &scalingList, uint32_t listId, uint32_t sizeId, int qp)
+void Quant::xSetScalingListDec(const ScalingList &scalingList, uint32_t listId, uint32_t sizeId, int qp, uint32_t scalingListId)
 {
   uint32_t width  = g_scalingListSizeX[sizeId];
   uint32_t height = g_scalingListSizeX[sizeId];
   uint32_t ratio  = g_scalingListSizeX[sizeId]/std::min(MAX_MATRIX_SIZE_NUM,(int)g_scalingListSizeX[sizeId]);
   int *dequantcoeff;
-  const int *coeff  = scalingList.getScalingListAddress(sizeId,listId);
+  const int *coeff = scalingList.getScalingListAddress(scalingListId);
 
   dequantcoeff = getDequantCoeff(listId, qp, sizeId, sizeId);
 
-  int invQuantScale = g_invQuantScales[qp];
+  const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1;
+  int invQuantScale = g_invQuantScales[blockIsNotPowerOf4?1:0][qp];
 
   processScalingListDec(coeff,
                         dequantcoeff,
                         invQuantScale,
                         height, width, ratio,
                         std::min(MAX_MATRIX_SIZE_NUM, (int)g_scalingListSizeX[sizeId]),
-                        scalingList.getScalingListDC(sizeId,listId));
+                        scalingList.getScalingListDC(scalingListId));
 }
 
+/** set quantized matrix coefficient for encode
+* \param scalingList quantized matrix address
+* \param listId List index
+* \param sizeId size index
+* \param qp Quantization parameter
+* \param format chroma format
+*/
+void Quant::xSetRecScalingListEnc(ScalingList *scalingList, uint32_t listId, uint32_t sizeIdw, uint32_t sizeIdh, int qp, uint32_t scalingListId)
+{
+  if (sizeIdw == sizeIdh) return;
+
+  uint32_t width = g_scalingListSizeX[sizeIdw];
+  uint32_t height = g_scalingListSizeX[sizeIdh];
+  uint32_t largeSideId = (sizeIdw > sizeIdh) ? sizeIdw : sizeIdh;  //16
+  int *quantcoeff;
+  int *coeff = scalingList->getScalingListAddress(scalingListId);//4x4, 8x8
+  quantcoeff = getQuantCoeff(listId, qp, sizeIdw, sizeIdh);//final quantCoeff (downsample)
+  const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1;
+  int quantScales = g_quantScales[blockIsNotPowerOf4?1:0][qp];
+
+  processScalingListEnc(coeff,
+    quantcoeff,
+    (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
+    height, width,
+    ((largeSideId>3) ? 2 : 1),
+    ((largeSideId >= 3) ? 8 : 4),
+    scalingList->getScalingListDC(scalingListId));
+}
+/** set quantized matrix coefficient for decode
+* \param scalingList quantaized matrix address
+* \param listId List index
+* \param sizeId size index
+* \param qp Quantization parameter
+* \param format chroma format
+*/
+void Quant::xSetRecScalingListDec(const ScalingList &scalingList, uint32_t listId, uint32_t sizeIdw, uint32_t sizeIdh, int qp, uint32_t scalingListId)
+{
+  if (sizeIdw == sizeIdh) return;
+  uint32_t width = g_scalingListSizeX[sizeIdw];
+  uint32_t height = g_scalingListSizeX[sizeIdh];
+  uint32_t largeSideId = (sizeIdw > sizeIdh) ? sizeIdw : sizeIdh;  //16
+
+  const int *coeff = scalingList.getScalingListAddress(scalingListId);
+  int *dequantcoeff;
+  dequantcoeff = getDequantCoeff(listId, qp, sizeIdw, sizeIdh);
+  const bool blockIsNotPowerOf4 = ((floorLog2(width) + floorLog2(height)) & 1) == 1;
+  int invQuantScale = g_invQuantScales[blockIsNotPowerOf4 ? 1 : 0][qp];
+  processScalingListDec(coeff,
+                        dequantcoeff,
+                        invQuantScale,
+                        height, width, (largeSideId>3) ? 2 : 1,
+                        (largeSideId >= 3 ? 8 : 4),
+                        scalingList.getScalingListDC(scalingListId));
+}
 /** set flat matrix value to quantized coefficient
  */
 void Quant::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
@@ -580,8 +740,9 @@ void Quant::xSetFlatScalingList(uint32_t list, uint32_t sizeX, uint32_t sizeY, i
   int *quantcoeff;
   int *dequantcoeff;
 
-  int quantScales    = g_quantScales   [qp];
-  int invQuantScales = g_invQuantScales[qp] << 4;
+  const bool blockIsNotPowerOf4 = ((floorLog2(g_scalingListSizeX[sizeX]) + floorLog2(g_scalingListSizeX[sizeY])) & 1) == 1;
+  int quantScales    = g_quantScales   [blockIsNotPowerOf4?1:0][qp];
+  int invQuantScales = g_invQuantScales[blockIsNotPowerOf4?1:0][qp] << 4;
 
   quantcoeff   = getQuantCoeff(list, qp, sizeX, sizeY);
   dequantcoeff = getDequantCoeff(list, qp, sizeX, sizeY);
@@ -605,6 +766,37 @@ void Quant::xSetFlatScalingList(uint32_t list, uint32_t sizeX, uint32_t sizeY, i
  */
 void Quant::processScalingListEnc( int *coeff, int *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc)
 {
+  if (height != width)
+  {
+    for (uint32_t j = 0; j<height; j++)
+    {
+      for (uint32_t i = 0; i<width; i++)
+      {
+        if (j >= JVET_C0024_ZERO_OUT_TH || i >= JVET_C0024_ZERO_OUT_TH)
+        {
+          quantcoeff[j*width + i] = 0;
+          continue;
+        }
+        int ratioWH = (height>width) ? height / width : width / height;
+        int ratioH = (height / sizuNum) ? (height / sizuNum) : (sizuNum / height); // 32/8 = 4
+        int ratioW = (width / sizuNum) ? (width / sizuNum) : (sizuNum / width); //16/8 = 2 //sizeNum = 8/4
+        if (height > width)
+        {
+          quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j / ratioH) + ((i * ratioWH) / ratioH)];
+        }
+        else //ratioH < ratioW
+        {
+          quantcoeff[j*width + i] = quantScales / coeff[sizuNum * ((j * ratioWH) / ratioW) + (i / ratioW)];
+        }
+        int largeOne = (width > height) ? width : height;
+        if (largeOne>8)
+        {
+          quantcoeff[0] = quantScales / dc;
+        }
+      }
+    }
+    return;
+  }
   for(uint32_t j=0;j<height;j++)
   {
     for(uint32_t i=0;i<width;i++)
@@ -619,27 +811,63 @@ void Quant::processScalingListEnc( int *coeff, int *quantcoeff, int quantScales,
   }
 }
 
-/** set quantized matrix coefficient for decode
- * \param coeff quantaized matrix address
- * \param dequantcoeff quantaized matrix address
- * \param invQuantScales IQ(QP%6))
- * \param height height
- * \param width width
- * \param ratio ratio for upscale
- * \param sizuNum matrix size
- * \param dc dc parameter
- */
-void Quant::processScalingListDec( const int *coeff, int *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc)
+void Quant::processScalingListDec( const int *coeff, int *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizeNum, uint32_t dc)
 {
-  for(uint32_t j=0;j<height;j++)
+  if (height != width)
   {
-    for(uint32_t i=0;i<width;i++)
+    int ratioWH = (height > width  ) ? (height / width  ) : (width   / height);
+    int ratioH  = (height / sizeNum) ? (height / sizeNum) : (sizeNum / height);
+    int ratioW  = (width  / sizeNum) ? (width  / sizeNum) : (sizeNum / width );
+    if (height > width)
+    {
+      for (uint32_t j = 0; j < height; j++)
+      {
+        int coeffLineSep        = (j / ratioH) * sizeNum;
+        int dequantCoeffLineSep = j * width;
+        for (uint32_t i = 0; i < width; i++)
+        {
+          if (i >= JVET_C0024_ZERO_OUT_TH || j >= JVET_C0024_ZERO_OUT_TH)
+          {
+            dequantcoeff[dequantCoeffLineSep + i] = 0;
+            continue;
+          }
+          dequantcoeff[dequantCoeffLineSep + i] = invQuantScales * coeff[coeffLineSep + ((i * ratioWH) / ratioH)];
+        }
+      }
+    }
+    else  //ratioH < ratioW
+    {
+      for (uint32_t j = 0; j < height; j++)
+      {
+        int coeffLineSep        = ((j * ratioWH) / ratioW) * sizeNum;
+        int dequantCoeffLineSep = j * width;
+        for (uint32_t i = 0; i < width; i++)
+        {
+          if (i >= JVET_C0024_ZERO_OUT_TH || j >= JVET_C0024_ZERO_OUT_TH)
+          {
+            dequantcoeff[dequantCoeffLineSep + i] = 0;
+            continue;
+          }
+          dequantcoeff[dequantCoeffLineSep + i] = invQuantScales * coeff[coeffLineSep + (i / ratioW)];
+        }
+      }
+    }
+    int largeOne = (width > height) ? width : height;
+    if (largeOne > 8)
+      dequantcoeff[0] = invQuantScales * dc;
+    return;
+  }
+  for (uint32_t j = 0; j<height; j++)
+  {
+    int coeffLineSep        = (j / ratio) * sizeNum;
+    int dequantCoeffLineSep = j * width;
+    for (uint32_t i = 0; i<width; i++)
     {
-      dequantcoeff[j*width + i] = invQuantScales * coeff[sizuNum * (j / ratio) + i / ratio];
+      dequantcoeff[dequantCoeffLineSep + i] = invQuantScales * coeff[coeffLineSep + i / ratio];
     }
   }
 
-  if(ratio > 1)
+  if (ratio > 1)
   {
     dequantcoeff[0] = invQuantScales * dc;
   }
@@ -651,6 +879,29 @@ void Quant::xInitScalingList( const Quant* other )
 {
   m_isScalingListOwner = other == nullptr;
 
+  size_t numElements = 0;
+
+  for (uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
+  {
+    for (uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
+    {
+      for (uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
+      {
+        for (uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
+        {
+          numElements += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY];
+        }
+      }
+    }
+  }
+
+  if (m_isScalingListOwner)
+  {
+    m_quantCoef[0][0][0][0] = new int[2 * numElements];
+  }
+
+  size_t offset = 0;
+
   for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
   {
     for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
@@ -661,8 +912,10 @@ void Quant::xInitScalingList( const Quant* other )
         {
           if( m_isScalingListOwner )
           {
-            m_quantCoef   [sizeIdX][sizeIdY][listId][qp] = new int    [g_scalingListSizeX[sizeIdX]*g_scalingListSizeX[sizeIdY]];
-            m_dequantCoef [sizeIdX][sizeIdY][listId][qp] = new int    [g_scalingListSizeX[sizeIdX]*g_scalingListSizeX[sizeIdY]];
+            m_quantCoef[sizeIdX][sizeIdY][listId][qp] = m_quantCoef[0][0][0][0] + offset;
+            offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY];
+            m_dequantCoef[sizeIdX][sizeIdY][listId][qp] = m_quantCoef[0][0][0][0] + offset;
+            offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY];
           }
           else
           {
@@ -673,6 +926,8 @@ void Quant::xInitScalingList( const Quant* other )
       }
     }
   }
+
+  m_pairCheck = 0;
 }
 
 /** destroy quantization matrix array
@@ -681,129 +936,86 @@ void Quant::xDestroyScalingList()
 {
   if( !m_isScalingListOwner ) return;
 
-  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
-  {
-    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
-    {
-      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
-      {
-        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
-        {
-          if(m_quantCoef[sizeIdX][sizeIdY][listId][qp])
-          {
-            delete [] m_quantCoef[sizeIdX][sizeIdY][listId][qp];
-          }
-          if(m_dequantCoef[sizeIdX][sizeIdY][listId][qp])
-          {
-            delete [] m_dequantCoef[sizeIdX][sizeIdY][listId][qp];
-          }
-        }
-      }
-    }
-  }
+  delete[] m_quantCoef[0][0][0][0];
 }
-#endif
 
 void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx)
 {
   const SPS &sps            = *tu.cs->sps;
   const CompArea &rect      = tu.blocks[compID];
-#if HEVC_USE_SCALING_LISTS || HEVC_USE_SIGN_HIDING
   const uint32_t uiWidth        = rect.width;
   const uint32_t uiHeight       = rect.height;
-#endif
   const int channelBitDepth = sps.getBitDepth(toChannelType(compID));
 
   const CCoeffBuf &piCoef   = pSrc;
         CoeffBuf   piQCoef  = tu.getCoeffs(compID);
 
-  const bool useTransformSkip      = tu.mtsIdx==1;
+  const bool useTransformSkip      = (tu.mtsIdx[compID] == MTS_SKIP);
   const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID));
 
   {
-#if HEVC_USE_SIGN_HIDING
-    CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
-#else
-    CoeffCodingContext cctx(tu, compID);
-#endif
+    CoeffCodingContext cctx(tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag());
 
     const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
     const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
 
-#if HEVC_USE_SIGN_HIDING
     TCoeff deltaU[MAX_TB_SIZEY * MAX_TB_SIZEY];
-#endif
-#if HEVC_USE_SCALING_LISTS
     int scalingListType = getScalingListType(tu.cu->predMode, compID);
     CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
-    const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth];
-    const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
-    int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1);
+    const uint32_t uiLog2TrWidth = floorLog2(uiWidth);
+    const uint32_t uiLog2TrHeight = floorLog2(uiHeight);
+    int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight);
 
-    const bool enableScalingLists             = getUseScalingList(uiWidth, uiHeight, useTransformSkip);
-#endif
-    const int  defaultQuantisationCoefficient = g_quantScales[cQP.rem];
+    const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+    const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, useTransformSkip, tu.cu->lfnstIdx > 0, disableSMForLFNST);
 
-    /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
-     * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
-     * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
-     * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
-     */
-    // Represents scaling through forward transform
-    int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
+    // for blocks that where width*height != 4^N, the effective scaling applied during transformation cannot be
+    // compensated by a bit-shift (the quantised result will be sqrt(2) * larger than required).
+    // The quantScale table and shift is used to compensate for this.
+    const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
+    const int defaultQuantisationCoefficient    = g_quantScales[needSqrtAdjustment?1:0][cQP.rem(useTransformSkip)];
+    int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange) + ( needSqrtAdjustment?-1:0);
 
     if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
     {
       iTransformShift = std::max<int>(0, iTransformShift);
     }
 
-    int iWHScale = 1;
-#if HM_QTBT_AS_IN_JEM_QUANT
-    if( TU::needsBlockSizeTrafoScale( tu, compID ) )
-    {
-      iTransformShift += ADJ_QUANT_SHIFT;
-      iWHScale = 181;
-    }
-#endif
-
-    const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
+    const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift);
     // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
 
     const int64_t iAdd = int64_t(tu.cs->slice->isIRAP() ? 171 : 85) << int64_t(iQBits - 9);
-#if HEVC_USE_SIGN_HIDING
     const int qBits8 = iQBits - 8;
-#endif
 
-    for (int uiBlockPos = 0; uiBlockPos < piQCoef.area(); uiBlockPos++)
+    const uint32_t lfnstIdx = tu.cu->lfnstIdx;
+    const int maxNumberOfCoeffs = lfnstIdx > 0 ? ((( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8) ) ? 8 : 16) : piQCoef.area();
+    memset( piQCoef.buf, 0, sizeof(TCoeff) * piQCoef.area() );
+    for (int uiBlockPos = 0; uiBlockPos < maxNumberOfCoeffs; uiBlockPos++ )
     {
       const TCoeff iLevel   = piCoef.buf[uiBlockPos];
       const TCoeff iSign    = (iLevel < 0 ? -1: 1);
 
-#if HEVC_USE_SCALING_LISTS
       const int64_t  tmpLevel = (int64_t)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
-#else
-      const int64_t  tmpLevel = (int64_t)abs(iLevel) * defaultQuantisationCoefficient;
-#endif
 
-      const TCoeff quantisedMagnitude = TCoeff((tmpLevel * iWHScale + iAdd ) >> iQBits);
-#if HEVC_USE_SIGN_HIDING
-      deltaU[uiBlockPos] = (TCoeff)((tmpLevel * iWHScale - ((int64_t)quantisedMagnitude<<iQBits) )>> qBits8);
-#endif
+      const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
+      deltaU[uiBlockPos] = (TCoeff)((tmpLevel - ((int64_t)quantisedMagnitude<<iQBits) )>> qBits8);
 
       uiAbsSum += quantisedMagnitude;
       const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
 
       piQCoef.buf[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
     } // for n
-#if HEVC_USE_SIGN_HIDING
-    if( cctx.signHiding() && uiWidth>=4 && uiHeight>=4 )
+    if ((tu.cu->bdpcmMode && isLuma(compID)) || (tu.cu->bdpcmModeChroma && isChroma(compID)) )
+    {
+      fwdResDPCM( tu, compID );
+    }
+    if( cctx.signHiding() )
     {
       if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
       {
         xSignBitHidingHDQ(piQCoef.buf, piCoef.buf, deltaU, cctx, maxLog2TrDynamicRange);
       }
     }
-#endif
   } //if RDOQ
   //return;
 }
@@ -812,53 +1024,42 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff
 {
   const SPS &sps            = *tu.cs->sps;
   const CompArea &rect      = tu.blocks[compID];
-#if HEVC_USE_SCALING_LISTS
   const uint32_t uiWidth        = rect.width;
   const uint32_t uiHeight       = rect.height;
-#endif
   const int channelBitDepth = sps.getBitDepth(toChannelType(compID));
 
   const CCoeffBuf piCoef    = pSrc;
 
-  const bool useTransformSkip      = tu.mtsIdx==1;
+  const bool useTransformSkip      = (tu.mtsIdx[compID] == MTS_SKIP);
   const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID));
 
-#if HEVC_USE_SCALING_LISTS
   int scalingListType = getScalingListType(tu.cu->predMode, compID);
   CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
 
-  const uint32_t uiLog2TrWidth  = g_aucLog2[uiWidth];
-  const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
-  int *piQuantCoeff         = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1);
+  const uint32_t uiLog2TrWidth  = floorLog2(uiWidth);
+  const uint32_t uiLog2TrHeight = floorLog2(uiHeight);
+  int *piQuantCoeff         = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight);
 
-  const bool enableScalingLists             = getUseScalingList(uiWidth, uiHeight, (useTransformSkip != 0));
-#endif
-  const int  defaultQuantisationCoefficient = g_quantScales[cQP.rem];
+  const bool disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+  const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (useTransformSkip != 0), tu.cu->lfnstIdx > 0, disableSMForLFNST);
 
   /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
     * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
     * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
     * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
     */
-
-  // Represents scaling through forward transform
-  int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
+  const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
+  const int defaultQuantisationCoefficient    = g_quantScales[needSqrtAdjustment?1:0][cQP.rem(useTransformSkip)];
+  int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange) + (needSqrtAdjustment?-1:0);
 
   if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
   {
     iTransformShift = std::max<int>(0, iTransformShift);
   }
 
-  int iWHScale = 1;
-#if HM_QTBT_AS_IN_JEM_QUANT
-  if( TU::needsBlockSizeTrafoScale( tu, compID ) )
-  {
-    iTransformShift += ADJ_QUANT_SHIFT;
-    iWHScale = 181;
-  }
-#endif
 
-  const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
+  const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift;
+  assert(iQBits>=0);
   // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
 
   // iAdd is different from the iAdd used in normal quantization
@@ -867,12 +1068,8 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff
   for (int uiBlockPos = 0; uiBlockPos < rect.area(); uiBlockPos++)
   {
     const TCoeff iLevel   = piCoef.buf[uiBlockPos];
-#if HEVC_USE_SCALING_LISTS
     const int64_t  tmpLevel = (int64_t)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
-#else
-    const int64_t  tmpLevel = (int64_t)abs(iLevel) * defaultQuantisationCoefficient;
-#endif
-    const TCoeff quantisedMagnitude = TCoeff((tmpLevel * iWHScale + iAdd ) >> iQBits);
+    const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
 
     if (quantisedMagnitude != 0)
     {
@@ -887,34 +1084,29 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co
 {
   const SPS           &sps = *tu.cs->sps;
   const CompArea      &rect                           = tu.blocks[compID];
-#if HEVC_USE_SCALING_LISTS
   const uint32_t           uiWidth                        = rect.width;
   const uint32_t           uiHeight                       = rect.height;
-#endif
   const int            maxLog2TrDynamicRange          = sps.getMaxLog2TrDynamicRange(toChannelType(compID));
   const int            channelBitDepth                = sps.getBitDepth(toChannelType(compID));
   const int            iTransformShift                = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
-#if HEVC_USE_SCALING_LISTS
   const int            scalingListType                = getScalingListType(tu.cu->predMode, compID);
-  const bool           enableScalingLists             = getUseScalingList(uiWidth, uiHeight, true);
-#endif
-  const int            defaultQuantisationCoefficient = g_quantScales[cQP.rem];
+  const bool           disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+  const bool           enableScalingLists = getUseScalingList(uiWidth, uiHeight, true, tu.cu->lfnstIdx > 0, disableSMForLFNST);
+  const bool           useTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+  const int            defaultQuantisationCoefficient = g_quantScales[0][cQP.rem(useTransformSkip)];
 
-#if HEVC_USE_SCALING_LISTS
   CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
 
-  const uint32_t uiLog2TrWidth      = g_aucLog2[uiWidth];
-  const uint32_t uiLog2TrHeight     = g_aucLog2[uiHeight];
-  const int *const piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1);
-#endif
+  const uint32_t uiLog2TrWidth      = floorLog2(uiWidth);
+  const uint32_t uiLog2TrHeight     = floorLog2(uiHeight);
+  const int *const piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight);
 
   /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
   * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
   * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
   * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
   */
-
-  const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
+  const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift);
   // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
   const int iAdd = int64_t(bUseHalfRoundingPoint ? 256 : (tu.cs->slice->isIRAP() ? 171 : 85)) << int64_t(iQBits - 9);
   TCoeff transformedCoefficient;
@@ -934,13 +1126,9 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co
   // quantization
   const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
 
-#if HEVC_USE_SCALING_LISTS
   const int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
 
   const int64_t tmpLevel = (int64_t)abs(transformedCoefficient) * quantisationCoefficient;
-#else
-  const int64_t tmpLevel = (int64_t)abs(transformedCoefficient) * defaultQuantisationCoefficient;
-#endif
 
   const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
 
@@ -953,25 +1141,21 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
 {
   const SPS           &sps                    = *tu.cs->sps;
   const CompArea      &rect                   = tu.blocks[compID];
-#if HEVC_USE_SCALING_LISTS
   const uint32_t           uiWidth                = rect.width;
   const uint32_t           uiHeight               = rect.height;
-#endif
-  const int            QP_per                 = cQP.per;
-  const int            QP_rem                 = cQP.rem;
+  const int            QP_per                 = cQP.per(tu.mtsIdx[compID] == MTS_SKIP);
+  const int            QP_rem                 = cQP.rem(tu.mtsIdx[compID] == MTS_SKIP);
   const int            maxLog2TrDynamicRange  = sps.getMaxLog2TrDynamicRange(toChannelType(compID));
   const int            channelBitDepth        = sps.getBitDepth(toChannelType(compID));
   const int            iTransformShift        = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
-#if HEVC_USE_SCALING_LISTS
   const int            scalingListType        = getScalingListType(tu.cu->predMode, compID);
-  const bool           enableScalingLists     = getUseScalingList(uiWidth, uiHeight, true);
+  const bool           disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+  const bool           enableScalingLists = getUseScalingList(uiWidth, uiHeight, true, tu.cu->lfnstIdx > 0, disableSMForLFNST);
 
   CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
 
-  const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
-#else
-  const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per));
-#endif
+  const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+  const int rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
 
   const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange);
   const TCoeff transformMaximum =  (1 << maxLog2TrDynamicRange) - 1;
@@ -980,7 +1164,6 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
 
   TCoeff dequantisedSample;
 
-#if HEVC_USE_SCALING_LISTS
   if (enableScalingLists)
   {
     const uint32_t             dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
@@ -989,9 +1172,9 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
     const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
     const Intermediate_Int inputMaximum =  (1 << (targetInputBitDepth - 1)) - 1;
 
-    const uint32_t uiLog2TrWidth  = g_aucLog2[uiWidth];
-    const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
-    int *piDequantCoef        = getDequantCoeff(scalingListType,QP_rem,uiLog2TrWidth-1, uiLog2TrHeight-1);
+    const uint32_t uiLog2TrWidth  = floorLog2(uiWidth);
+    const uint32_t uiLog2TrHeight = floorLog2(uiHeight);
+    int *piDequantCoef        = getDequantCoeff(scalingListType, QP_rem, uiLog2TrWidth, uiLog2TrHeight);
 
     if (rightShift > 0)
     {
@@ -1012,8 +1195,7 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
   }
   else
   {
-#endif
-    const int scale = g_invQuantScales[QP_rem];
+    const int scale = g_invQuantScales[0][QP_rem];
     const int scaleBits = (IQUANT_SHIFT + 1);
 
     const uint32_t             targetInputBitDepth = std::min<uint32_t>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
@@ -1036,23 +1218,42 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
 
       dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum, transformMaximum, iCoeffQ));
     }
-#if HEVC_USE_SCALING_LISTS
   }
-#endif
 
   // Inverse transform-skip
+  reconSample = Pel(dequantisedSample);
+}
 
-  if (iTransformShift >= 0)
+void Quant::lambdaAdjustColorTrans(bool forward)
+{
+  if (m_resetStore)
   {
-    const TCoeff offset = iTransformShift == 0 ? 0 : (1 << (iTransformShift - 1));
-    reconSample = Pel((dequantisedSample + offset) >> iTransformShift);
+    for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++)
+    {
+      ComponentID compID = (ComponentID)component;
+      int       delta_QP = (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+      double lamdbaAdjustRate = pow(2.0, delta_QP / 3.0);
+
+      m_lambdasStore[0][component] = m_lambdas[component];
+      m_lambdasStore[1][component] = m_lambdas[component] * lamdbaAdjustRate;
+    }
+    m_resetStore = false;
   }
-  else //for very high bit depths
+  
+  if (forward)
   {
-    const int iTrShiftNeg = -iTransformShift;
-    reconSample = Pel(dequantisedSample << iTrShiftNeg);
+    CHECK(m_pairCheck == 1, "lambda has been already adjusted");
+    m_pairCheck = 1;
+  }
+  else
+  {
+    CHECK(m_pairCheck == 0, "lambda has not been adjusted");
+    m_pairCheck = 0;
   }
-}
-
 
+  for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++)
+  {
+    m_lambdas[component] = m_lambdasStore[m_pairCheck][component];
+  }
+}
 //! \}
diff --git a/source/Lib/CommonLib/Quant.h b/source/Lib/CommonLib/Quant.h
index 4877a59aa20e58887ec4f1ca797005dfbe31261e..d2ffa8bbf336cd14f2ccbaf77ccd0ca0813bf71c 100644
--- a/source/Lib/CommonLib/Quant.h
+++ b/source/Lib/CommonLib/Quant.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -58,27 +58,40 @@
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
+struct TrQuantParams
+{
+  int     rightShift;
+  int     qScale;
+};
 
 /// QP struct
-struct QpParam
+class QpParam
 {
-  int Qp;
-  int per;
-  int rem;
+public:
+  int Qps[2];
+  int pers[2];
+  int rems[2];
 
 private:
 
   QpParam(const int           qpy,
-          const ChannelType   chType,
+          const ComponentID   compID,
           const int           qpBdOffset,
+          const int           minQpPrimeTsMinus4,
           const int           chromaQPOffset,
           const ChromaFormat  chFmt,
-          const int           dqp );
+          const int           dqp
+        , const SPS           *sps
+  );
 
 public:
 
   QpParam(const TransformUnit& tu, const ComponentID &compID, const int QP = -MAX_INT);
 
+  int Qp ( const bool ts ) const { return Qps [ts?1:0]; }
+  int per( const bool ts ) const { return pers[ts?1:0]; }
+  int rem( const bool ts ) const { return rems[ts?1:0]; }
+
 }; // END STRUCT DEFINITION QpParam
 
 /// transform and quantization class
@@ -109,21 +122,23 @@ public:
 #endif
   void   setLambda               ( const double dLambda )                      { m_dLambda = dLambda; }
   double getLambda               () const                                      { return m_dLambda; }
+  void   lambdaAdjustColorTrans(bool forward);
+  void   resetStore() { m_resetStore = true; }
 
-#if HEVC_USE_SCALING_LISTS
   int* getQuantCoeff             ( uint32_t list, int qp, uint32_t sizeX, uint32_t sizeY ) { return m_quantCoef            [sizeX][sizeY][list][qp]; };  //!< get Quant Coefficent
   int* getDequantCoeff           ( uint32_t list, int qp, uint32_t sizeX, uint32_t sizeY ) { return m_dequantCoef          [sizeX][sizeY][list][qp]; };  //!< get DeQuant Coefficent
 
   void setUseScalingList         ( bool bUseScalingList){ m_scalingListEnabledFlag = bUseScalingList; };
-  bool getUseScalingList         ( const uint32_t width, const uint32_t height, const bool isTransformSkip){ return m_scalingListEnabledFlag && (!isTransformSkip || ((width == 4) && (height == 4))); };
-
+  bool getUseScalingList(const uint32_t width, const uint32_t height, const bool isTransformSkip, const bool lfnstApplied, const bool disableScalingMatrixForLFNSTBlks) 
+  { 
+    return (m_scalingListEnabledFlag && !isTransformSkip && (!lfnstApplied || !disableScalingMatrixForLFNSTBlks));
+  }
   void setScalingListDec         ( const ScalingList &scalingList);
-  void processScalingListEnc     ( int *coeff, int *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
-  void processScalingListDec     ( const int *coeff, int *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
+  void processScalingListEnc     ( int *coeff, int *quantcoeff, int qpMod6, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
+  void processScalingListDec     ( const int *coeff, int *dequantcoeff, int qpMod6, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
 
   virtual void setFlatScalingList( const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths);
   virtual void setScalingList    ( ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths);
-#endif
 
   // quantization
   virtual void quant             ( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx );
@@ -147,30 +162,30 @@ protected:
 #if T0196_SELECTIVE_RDOQ
   bool     m_useSelectiveRDOQ;
 #endif
-#if HEVC_USE_SCALING_LISTS
 private:
   void xInitScalingList   ( const Quant* other );
   void xDestroyScalingList();
   void xSetFlatScalingList( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp );
-  void xSetScalingListEnc ( ScalingList *scalingList, uint32_t list, uint32_t size, int qp );
-  void xSetScalingListDec ( const ScalingList &scalingList, uint32_t list, uint32_t size, int qp );
-#endif
-#if HEVC_USE_SIGN_HIDING
+  void xSetScalingListEnc(ScalingList *scalingList, uint32_t list, uint32_t size, int qp, uint32_t scalingListId);
+  void xSetScalingListDec(const ScalingList &scalingList, uint32_t list, uint32_t size, int qp, uint32_t scalingListId);
+  void xSetRecScalingListEnc(ScalingList *scalingList, uint32_t list, uint32_t sizew, uint32_t sizeh, int qp, uint32_t scalingListId);
+  void xSetRecScalingListDec(const ScalingList &scalingList, uint32_t list, uint32_t sizew, uint32_t sizeh, int qp, uint32_t scalingListId);
 private:
   void xSignBitHidingHDQ  (TCoeff* pQCoef, const TCoeff* pCoef, TCoeff* deltaU, const CoeffCodingContext& cctx, const int maxLog2TrDynamicRange);
-#endif
 
 private:
 #if RDOQ_CHROMA_LAMBDA
   double   m_lambdas[MAX_NUM_COMPONENT];
 #endif
-#if HEVC_USE_SCALING_LISTS
+  double   m_lambdasStore[2][MAX_NUM_COMPONENT];  // 0-org; 1-act
+  bool     m_resetStore;
   bool     m_scalingListEnabledFlag;
   bool     m_isScalingListOwner;
 
   int      *m_quantCoef            [SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4
   int      *m_dequantCoef          [SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of dequantization matrix coefficient 4x4
-#endif
+
+  int      m_pairCheck;
 };// END CLASS DEFINITION Quant
 
 
diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp
index b844ae48d4320200b8c2457e54fe67991d4af0c7..7b3ffe927496a28c4bf07dabc9b8116da3e8ba60 100644
--- a/source/Lib/CommonLib/QuantRDOQ.cpp
+++ b/source/Lib/CommonLib/QuantRDOQ.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,6 +57,7 @@ struct coeffGroupRDStats
   double d64UncodedDist;    // all zero coded block distortion
   double d64SigCost;
   double d64SigCost_0;
+  int   iNumSbbCtxBins;
 };
 
 
@@ -82,16 +83,12 @@ QuantRDOQ::QuantRDOQ( const Quant* other ) : Quant( other )
 
   const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
   CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
-#if HEVC_USE_SCALING_LISTS
   xInitScalingList( rdoq );
-#endif
 }
 
 QuantRDOQ::~QuantRDOQ()
 {
-#if HEVC_USE_SCALING_LISTS
   xDestroyScalingList();
-#endif
 }
 
 
@@ -112,7 +109,6 @@ inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
                                        const BinFracBits& fracBitsPar,
                                        const BinFracBits& fracBitsGt1,
                                        const BinFracBits& fracBitsGt2,
-                                       const int          remGt2Bins,
                                        const int          remRegBins,
                                        unsigned           goRiceZero,
                                        uint16_t             ui16AbsGoRice,
@@ -150,7 +146,7 @@ inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
   {
     double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
 
-    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) );
+    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) );
     dCurrCost          += dCurrCostSig;
 
     if( dCurrCost < rd64CodedCost )
@@ -179,7 +175,6 @@ inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
                                   const BinFracBits& fracBitsPar,
                                   const BinFracBits& fracBitsGt1,
                                   const BinFracBits& fracBitsGt2,
-                                  const int          remGt2Bins,
                                   const int          remRegBins,
                                   unsigned           goRiceZero,
                                   const uint16_t       ui16AbsGoRice,
@@ -347,7 +342,6 @@ inline double QuantRDOQ::xGetIEPRate      (
 
 
 
-#if HEVC_USE_SCALING_LISTS
 /** set quantized matrix coefficient for encode
  * \param scalingList            quantized matrix address
  * \param format                 chroma format
@@ -376,32 +370,19 @@ void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDyna
 }
 
 
-
-#if HM_QTBT_AS_IN_JEM_QUANT
-#endif
-#else
-
-double QuantRDOQ::xGetErrScaleCoeff( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth )
+double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
 {
-  const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
-#if HM_QTBT_AS_IN_JEM_QUANT
-  double    dErrScale       = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
-  double    dTransShift     = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
-  dErrScale                 = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
-  int       QStep           = ( needsSqrt2 ? ( ( g_quantScales[qp] * 181 ) >> 7 ) : g_quantScales[qp] );
+  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
+  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
+  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
+  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
+  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
   double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
-#else
-  int errShift = SCALE_BITS - ((iTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) << 1);
-  double    dErrScale       = exp2( double( errShift ) );
-  double    finalErrScale   = dErrScale / double( g_quantScales[qp] * g_quantScales[qp] );
-#endif
   return    finalErrScale;
 }
-#endif
 
 
 
-#if HEVC_USE_SCALING_LISTS
 /** set error scale coefficients
  * \param list                   list ID
  * \param size
@@ -421,12 +402,11 @@ void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY
   int *piQuantcoeff;
   double *pdErrScale;
   piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
-  pdErrScale   = xGetErrScaleCoeff( list, sizeX, sizeY, qp );
+  pdErrScale   = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp);
 
-#if HM_QTBT_AS_IN_JEM_QUANT
   double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
 
-  bool   needsSqrt2 = TU::needsBlockSizeTrafoScale( Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ) );// ( ( (sizeX+sizeY) & 1 ) !=0 );
+  const bool needsSqrt2 = ((floorLog2(width) + floorLog2(height)) & 1) == 1;
   double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
   dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
 
@@ -436,19 +416,10 @@ void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY
                     / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1));
   }
 
-  int QStep = ( needsSqrt2 ? ( ( g_quantScales[qp] * 181 ) >> 7 ) : g_quantScales[qp] );
+  int QStep = g_quantScales[needsSqrt2][qp];
 
   xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
     dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1));
-#else
-  int errShift = SCALE_BITS - ((iTransformShift + DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType])) << 1);
-  double dErrScale = exp2( double( errShift ) );
-  for( i = 0; i < uiMaxNumCoeff; i++ )
-  {
-    pdErrScale[i] = dErrScale / double( piQuantcoeff[i] * piQuantcoeff[i] );
-  }
-  xGetErrScaleCoeffNoScalingList( list, sizeX, sizeY, qp ) = dErrScale / double( g_quantScales[qp] * g_quantScales[qp] );
-#endif
 }
 
 /** set flat matrix value to quantized coefficient
@@ -481,6 +452,29 @@ void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
 {
   m_isErrScaleListOwner = other == nullptr;
 
+  size_t numElements = 0;
+
+  for (uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
+  {
+    for (uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
+    {
+      for (uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
+      {
+        for (uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
+        {
+          numElements += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY];
+        }
+      }
+    }
+  }
+
+  if (m_isErrScaleListOwner)
+  {
+    m_errScale[0][0][0][0] = new double[numElements];
+  }
+
+  size_t offset = 0;
+
   for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
   {
     for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
@@ -491,7 +485,8 @@ void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
         {
           if( m_isErrScaleListOwner )
           {
-            m_errScale[sizeIdX][sizeIdY][listId][qp] = new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]];
+            m_errScale[sizeIdX][sizeIdY][listId][qp] = m_errScale[0][0][0][0] + offset;
+            offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY];
           }
           else
           {
@@ -509,25 +504,8 @@ void QuantRDOQ::xDestroyScalingList()
 {
   if( !m_isErrScaleListOwner ) return;
 
-  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
-  {
-    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
-    {
-      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
-      {
-        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
-        {
-          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
-          {
-            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
-          }
-        }
-      }
-    }
-  }
-//   Quant::destroyScalingList();
+  delete[] m_errScale[0][0][0][0];
 }
-#endif
 
 
 void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx)
@@ -539,7 +517,7 @@ void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeff
   const CCoeffBuf &piCoef   = pSrc;
         CoeffBuf   piQCoef  = tu.getCoeffs(compID);
 
-  const bool useTransformSkip      = tu.mtsIdx==1;
+  const bool useTransformSkip      = (tu.mtsIdx[compID] == MTS_SKIP);
 
   bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
 
@@ -555,7 +533,21 @@ void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeff
     if (!m_useSelectiveRDOQ || xNeedRDOQ(tu, compID, piCoef, cQP))
     {
 #endif
-      xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
+      if( useTransformSkip )
+      {
+        if( (tu.cu->bdpcmMode && isLuma(compID)) || (isChroma(compID) && tu.cu->bdpcmModeChroma ) )
+        {
+          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
+        }
+        else
+        {
+          xRateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
+        }
+      }
+      else
+      {
+        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
+      }
 #if T0196_SELECTIVE_RDOQ
     }
     else
@@ -597,25 +589,21 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
   // Represents scaling through forward transform
   int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
 
-  if (tu.mtsIdx==1 && extendedPrecision)
+  if (tu.mtsIdx[compID] == MTS_SKIP && extendedPrecision)
   {
     iTransformShift = std::max<int>(0, iTransformShift);
   }
 
   double     d64BlockUncodedCost               = 0;
-  const uint32_t uiLog2BlockWidth                  = g_aucLog2[uiWidth];
-#if HEVC_USE_SCALING_LISTS
-  const uint32_t uiLog2BlockHeight                 = g_aucLog2[uiHeight];
-#endif
+  const uint32_t uiLog2BlockWidth                  = floorLog2(uiWidth);
+  const uint32_t uiLog2BlockHeight                 = floorLog2(uiHeight);
   const uint32_t uiMaxNumCoeff                     = rect.area();
 
   CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
 
-#if HEVC_USE_SCALING_LISTS
   int scalingListType = getScalingListType(tu.cu->predMode, compID);
 
   CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
-#endif
 
   const TCoeff *plSrcCoeff = pSrc.buf;
         TCoeff *piDstCoeff = tu.getCoeffs(compID).buf;
@@ -623,75 +611,47 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
   double *pdCostCoeff  = m_pdCostCoeff;
   double *pdCostSig    = m_pdCostSig;
   double *pdCostCoeff0 = m_pdCostCoeff0;
-#if HEVC_USE_SIGN_HIDING
   int    *rateIncUp    = m_rateIncUp;
   int    *rateIncDown  = m_rateIncDown;
   int    *sigRateDelta = m_sigRateDelta;
   TCoeff *deltaU       = m_deltaU;
-#endif
 
   memset(piDstCoeff, 0, sizeof(*piDstCoeff) * uiMaxNumCoeff);
   memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
   memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
-#if HEVC_USE_SIGN_HIDING
   memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
   memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
   memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
   memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
-#endif
-
-
-  const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
 
-#if HEVC_USE_SCALING_LISTS
-  const double *const pdErrScale = xGetErrScaleCoeff(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem);
-  const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1));
 
-  const bool   enableScalingLists             = getUseScalingList(uiWidth, uiHeight, tu.transformSkip[compID]);
-#if HM_QTBT_AS_IN_JEM_QUANT
-  const int    defaultQuantisationCoefficient = ( TU::needsSqrt2Scale( rect, tu.transformSkip[compID] ) ? ( g_quantScales[cQP.rem] * 181 ) >> 7 : g_quantScales[cQP.rem] );
-  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem);
-#else
-  const double blkErrScale                    = ( TU::needsQP3Offset( tu, compID ) ? 2.0 : 1.0 );
-  const int    defaultQuantisationCoefficient = g_quantScales[cQP.rem];
-  const double defaultErrorScale              = blkErrScale * xGetErrScaleCoeffNoScalingList( scalingListType, ( uiLog2BlockWidth - 1 ), ( uiLog2BlockHeight - 1 ), cQP.rem );
-#endif
-#else //HEVC_USE_SCALING_LISTS
-#if HM_QTBT_AS_IN_JEM_QUANT
-  const int    quantisationCoefficient = ( TU::needsSqrt2Scale( tu, compID ) ? ( g_quantScales[cQP.rem] * 181 ) >> 7 : g_quantScales[cQP.rem] );
-  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), uiWidth, uiHeight, cQP.rem, maxLog2TrDynamicRange, channelBitDepth );
-#else
-  const double blkErrScale             = ( TU::needsQP3Offset( tu, compID ) ? 2.0 : 1.0 );
-  const int    quantisationCoefficient = g_quantScales[cQP.rem];
-  const double errorScale              = blkErrScale * xGetErrScaleCoeff( uiWidth, uiHeight, cQP.rem, maxLog2TrDynamicRange, channelBitDepth );
-#endif
-#endif//HEVC_USE_SCALING_LISTS
+  const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
+  const bool   isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+  const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
+  const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
+  const bool   disableSMForLFNST = tu.cs->picHeader->getScalingListPresentFlag() ? tu.cs->picHeader->getScalingListAPS()->getScalingList().getDisableScalingMatrixForLfnstBlks() : false;
+  const bool   enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, tu.cu->lfnstIdx > 0, disableSMForLFNST);
+  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
+  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
+  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
 
 
-#if HEVC_USE_SIGN_HIDING
   const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
-#endif
   const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
 
-#if HEVC_USE_SIGN_HIDING
-  CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
-#else
-  CoeffCodingContext cctx(tu, compID);
-#endif
+  CoeffCodingContext cctx(tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag());
   const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
 
   int     iCGLastScanPos      = -1;
   double  d64BaseCost         = 0;
   int     iLastScanPos        = -1;
 
-  bool      is2x2subblock = ( iCGSizeM1 == 3 );
-  int       remGt2Bins    = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK );
-  int       remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK );
+  int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
+  int remRegBins = (uiWidth * uiHeight * ctxBinSampleRatio) >> 4;
   uint32_t  goRiceParam   = 0;
 
   double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
   memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
-  const int iCGNum = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
   int iScanPos;
   coeffGroupRDStats rdStats;
 
@@ -699,29 +659,37 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
   DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
 #endif
 
+  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
 
+  const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
 
   for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
   {
     cctx.initSubblock( subSetId );
 
+    uint32_t maxNonZeroPosInCG = iCGSizeM1;
+    if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
+    {
+      maxNonZeroPosInCG = 7;
+    }
+
     memset( &rdStats, 0, sizeof (coeffGroupRDStats));
 
-    for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--)
+    for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
+    {
+      iScanPos = cctx.minSubPos() + iScanPosinCG;
+      uint32_t    blkPos = cctx.blockPos( iScanPos );
+      piDstCoeff[ blkPos ] = 0;
+    }
+    for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
     {
       iScanPos = cctx.minSubPos() + iScanPosinCG;
       //===== quantization =====
       uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
 
       // set coeff
-#if HEVC_USE_SCALING_LISTS
       const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
-#if HM_QTBT_AS_IN_JEM_QUANT
       const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
-#else
-      const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos] * blkErrScale : defaultErrorScale;
-#endif
-#endif
       const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
 
       const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
@@ -763,9 +731,9 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         uint32_t    goRiceZero    = 0;
         if( remRegBins < 4 )
         {
-          unsigned  sumAbs        = cctx.templateAbsSum( iScanPos, piDstCoeff );
+          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
           goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
-          goRiceZero              = g_auiGoRicePosCoeff0[0][ sumAbs ];
+          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
         }
 
         const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
@@ -775,7 +743,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         if( iScanPos == iLastScanPos )
         {
           uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
-                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );
+                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );
         }
         else
         {
@@ -783,54 +751,45 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
 
           const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
           uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
-                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );
-#if HEVC_USE_SIGN_HIDING
+                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );
           sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
-#endif
         }
 
         DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
         DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
         DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
 
-#if HEVC_USE_SIGN_HIDING
         deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
 
         if( uiLevel > 0 )
         {
-          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
-          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
-          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
+          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
+          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
+          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
         }
         else // uiLevel == 0
         {
           if( remRegBins < 4 )
           {
-            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
-            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
+            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );
+            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
           }
           else
           {
             rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
           }
         }
-#endif
         piDstCoeff[ uiBlkPos ] = uiLevel;
         d64BaseCost           += pdCostCoeff [ iScanPos ];
 
         if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
         {
-          remGt2Bins    = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK );
-          remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins;
           goRiceParam   = 0;
         }
         else if( remRegBins >= 4 )
         {
-          const uint32_t baseLevel = 4;
-          if( goRiceParam < 3 && ((uiLevel-baseLevel)>>1) > (3<<goRiceParam)-1 )
-          {
-            goRiceParam++;
-          }
+          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
+          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
           remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
         }
       }
@@ -901,7 +860,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
                 pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
               }
               // reset coeffs to 0 in this block
-              for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--)
+              for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
               {
                 iScanPos      = cctx.minSubPos() + iScanPosinCG;
                 uint32_t uiBlkPos = cctx.blockPos( iScanPos );
@@ -949,7 +908,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
     {
       bool rootCbfSoFar       = false;
       bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
-      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> g_aucLog2[tu.lheight()] : tu.cu->lwidth() >> g_aucLog2[tu.lwidth()];
+      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth());
       if( isLastSubPartition )
       {
         TransformUnit* tuPointer = tu.cu->firstTU;
@@ -968,7 +927,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
       }
     }
-    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, tu.depth, previousCbf, useIntraSubPartitions ) ) );
+    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
 
     if( !lastCbfIsInferred )
     {
@@ -984,13 +943,8 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
   int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
   int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
   {
-#if HEVC_USE_MDCS
-    int dim1  = ( cctx.scanType() == SCAN_VER ? uiHeight : uiWidth  );
-    int dim2  = ( cctx.scanType() == SCAN_VER ? uiWidth  : uiHeight );
-#else
     int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
     int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
-#endif
     int bitsX = 0;
     int bitsY = 0;
     int ctxId;
@@ -1019,7 +973,12 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
     d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
     if (cctx.isSigGroup( iCGScanPos ) )
     {
-      for (int iScanPosinCG = iCGSizeM1; iScanPosinCG >= 0; iScanPosinCG--)
+      uint32_t maxNonZeroPosInCG = iCGSizeM1;
+      if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
+      {
+        maxNonZeroPosInCG = 7;
+      }
+      for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
       {
         iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
 
@@ -1033,11 +992,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         {
           uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
           uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
-#if HEVC_USE_MDCS
-          double d64CostLast  = ( cctx.scanType() == SCAN_VER ? xGetRateLast( lastBitsX, lastBitsY, uiPosY, uiPosX ) : xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY ) );
-#else
           double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
-#endif
 
           double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
 
@@ -1085,16 +1040,11 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
     piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
   }
 
-#if HEVC_USE_SIGN_HIDING
   if( cctx.signHiding() && uiAbsSum>=2)
   {
-    const double inverseQuantScale = double(g_invQuantScales[cQP.rem]);
-    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per)) / m_dLambda / 16
-                               / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
-#if HM_QTBT_AS_IN_JEM_QUANT
-#else
-                              * blkErrScale
-#endif
+    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
+    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
+                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
                              + 0.5);
 
     int lastCG = -1;
@@ -1220,7 +1170,757 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
       }
     }
   }
-#endif
+}
+
+void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx )
+{
+  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
+
+  const SPS &sps            = *tu.cs->sps;
+  const CompArea &rect      = tu.blocks[compID];
+  const uint32_t width      = rect.width;
+  const uint32_t height     = rect.height;
+  const ChannelType chType  = toChannelType(compID);
+  const int channelBitDepth = sps.getBitDepth( chType );
+
+  const bool extendedPrecision     = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
+  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
+
+  int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
+
+  if( extendedPrecision )
+  {
+    transformShift = std::max<int>( 0, transformShift );
+  }
+
+        double   blockUncodedCost                   = 0;
+  const uint32_t maxNumCoeff                        = rect.area();
+
+  CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
+
+  int scalingListType = getScalingListType( tu.cu->predMode, compID );
+  CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
+
+  const TCoeff *srcCoeff = coeffs.buf;
+        TCoeff *dstCoeff = tu.getCoeffs( compID ).buf;
+
+  double *costCoeff  = m_pdCostCoeff;
+  double *costSig    = m_pdCostSig;
+  double *costCoeff0 = m_pdCostCoeff0;
+
+  memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
+  memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
+
+  m_bdpcm = 0;
+
+  const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
+  const bool   isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
+  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
+
+  const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
+
+  uint32_t coeffLevels[3];
+  double   coeffLevelError[4];
+
+  CoeffCodingContext cctx( tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag() );
+  const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
+  double    baseCost    = 0;
+  uint32_t  goRiceParam = 0;
+
+  double *costSigSubBlock = m_pdCostCoeffGroupSig;
+  memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
+
+  const int sbNum = width * height >> cctx.log2CGSize();
+  int scanPos;
+  coeffGroupRDStats rdStats;
+
+  bool anySigCG = false;
+
+  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
+  cctx.setNumCtxBins(maxCtxBins);
+
+  for( int sbId = 0; sbId < sbNum; sbId++ )
+  {
+    cctx.initSubblock( sbId );
+
+    int noCoeffCoded = 0;
+    baseCost = 0.0;
+    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
+
+    rdStats.iNumSbbCtxBins = 0;
+
+    for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
+    {
+      int lastPosCoded = sbSizeM1;
+      scanPos = cctx.minSubPos() + scanPosInSB;
+      //===== quantization =====
+      uint32_t blkPos = cctx.blockPos( scanPos );
+
+      // set coeff
+      const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
+      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
+
+      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
+      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
+
+      uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
+      uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
+
+      m_testedLevels = 0;
+      coeffLevels[m_testedLevels++] = roundAbsLevel;
+
+      if (minAbsLevel != roundAbsLevel)
+        coeffLevels[m_testedLevels++] = minAbsLevel;
+
+      int rightPixel, belowPixel, predPixel;
+
+      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
+      predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
+
+      if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
+        coeffLevels[m_testedLevels++] = upAbsLevel;
+
+      double dErr = double(levelDouble);
+      coeffLevelError[0] = dErr * dErr * errorScale;
+
+      costCoeff0[scanPos] = coeffLevelError[0];
+      blockUncodedCost   += costCoeff0[ scanPos ];
+      dstCoeff[blkPos]    = coeffLevels[0];
+
+      //===== coefficient level estimation =====
+            unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
+            uint32_t    cLevel;
+      const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
+
+      goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
+      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
+      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
+      const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
+
+      DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
+
+      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
+      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
+
+      const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
+      bool lastCoeff = false; //
+      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
+      {
+        lastCoeff = true;
+      }
+      int numUsedCtxBins = 0;
+      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
+                                    &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins);
+
+      cctx.decimateNumCtxBins(numUsedCtxBins);
+      rdStats.iNumSbbCtxBins += numUsedCtxBins;
+
+
+      if (cLevel > 0)
+      {
+        noCoeffCoded++;
+      }
+
+      TCoeff level = cLevel;
+      dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
+      baseCost           += costCoeff[ scanPos ];
+      rdStats.d64SigCost += costSig[ scanPos ];
+
+      if( dstCoeff[ blkPos ] )
+      {
+        cctx.setSigGroup();
+        rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
+        rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
+      }
+    } //end for (iScanPosinCG)
+
+    if( !cctx.isSigGroup() )
+    {
+      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
+      baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
+      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
+      cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
+    }
+    else if( sbId != sbNum - 1 || anySigCG )
+    {
+      // rd-cost if SigCoeffGroupFlag = 0, initialization
+      double costZeroSB = baseCost;
+
+      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
+
+      baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
+      costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
+      costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
+
+      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
+      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
+      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
+
+      if( costZeroSB < baseCost )
+      {
+        cctx.resetSigGroup();
+        baseCost = costZeroSB;
+        costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
+        cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
+        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
+        {
+          scanPos = cctx.minSubPos() + scanPosInSB;
+          uint32_t blkPos = cctx.blockPos( scanPos );
+
+          if( dstCoeff[ blkPos ] )
+          {
+            dstCoeff[ blkPos ] = 0;
+            costCoeff[ scanPos ] = costCoeff0[ scanPos ];
+            costSig[ scanPos] = 0;
+          }
+        }
+      }
+      else
+      {
+        anySigCG = true;
+      }
+    }
+  }
+
+  //===== estimate last position =====
+  for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
+  {
+    int blkPos = cctx.blockPos( scanPos );
+    TCoeff level = dstCoeff[ blkPos ];
+    absSum += abs(level);
+  }
+}
+
+void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx )
+{
+  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
+
+  const SPS &sps = *tu.cs->sps;
+  const CompArea &rect = tu.blocks[compID];
+  const uint32_t width = rect.width;
+  const uint32_t height = rect.height;
+  const ChannelType chType = toChannelType(compID);
+  const int channelBitDepth = sps.getBitDepth(chType);
+
+  const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
+  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
+  const int  dirMode = isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma;
+  int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
+
+  if (extendedPrecision)
+  {
+    transformShift = std::max<int>(0, transformShift);
+  }
+
+  double   blockUncodedCost = 0;
+  const uint32_t maxNumCoeff = rect.area();
+
+  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
+
+  int scalingListType = getScalingListType(tu.cu->predMode, compID);
+  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
+
+  const TCoeff *srcCoeff = coeffs.buf;
+  TCoeff *dstCoeff = tu.getCoeffs(compID).buf;
+
+  double *costCoeff = m_pdCostCoeff;
+  double *costSig = m_pdCostSig;
+  double *costCoeff0 = m_pdCostCoeff0;
+
+  memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
+  memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
+  memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
+
+  m_bdpcm = dirMode;
+
+  const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
+  const bool   isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
+  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
+  TrQuantParams trQuantParams;
+  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
+  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
+
+  const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
+
+  uint32_t coeffLevels[3];
+  double   coeffLevelError[4];
+
+  CoeffCodingContext cctx(tu, compID, tu.cs->picHeader->getSignDataHidingEnabledFlag());
+  const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
+  double    baseCost = 0;
+  uint32_t  goRiceParam = 0;
+
+  double *costSigSubBlock = m_pdCostCoeffGroupSig;
+  memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
+
+  const int sbNum = width * height >> cctx.log2CGSize();
+  int scanPos;
+  coeffGroupRDStats rdStats;
+
+  bool anySigCG = false;
+
+  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
+  cctx.setNumCtxBins(maxCtxBins);
+
+  for (int sbId = 0; sbId < sbNum; sbId++)
+  {
+    cctx.initSubblock(sbId);
+
+    int noCoeffCoded = 0;
+    baseCost = 0.0;
+    memset(&rdStats, 0, sizeof(coeffGroupRDStats));
+    rdStats.iNumSbbCtxBins = 0;
+
+    for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
+    {
+      int lastPosCoded = sbSizeM1;
+      scanPos = cctx.minSubPos() + scanPosInSB;
+      //===== quantization =====
+      uint32_t blkPos = cctx.blockPos(scanPos);
+
+      const int posX = cctx.posX(scanPos);
+      const int posY = cctx.posY(scanPos);
+      const int posS = (1 == dirMode) ? posX : posY;
+      const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
+      TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
+
+      // set coeff
+      const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
+      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
+      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
+      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
+
+      m_testedLevels = 0;
+      coeffLevels[m_testedLevels++] = roundAbsLevel;
+
+      if (minAbsLevel != roundAbsLevel)
+        coeffLevels[m_testedLevels++] = minAbsLevel;
+
+      double dErr = double(levelDouble);
+      coeffLevelError[0]  = dErr * dErr * errorScale;
+
+      costCoeff0[scanPos] = coeffLevelError[0];
+      blockUncodedCost   += costCoeff0[scanPos];
+      dstCoeff[blkPos]    = coeffLevels[0];
+
+      //===== coefficient level estimation =====
+      unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
+      uint32_t    cLevel;
+      const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
+
+      goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
+      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
+      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
+      const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
+      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
+      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
+
+      DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
+
+      const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
+      bool lastCoeff = false; //
+      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
+      {
+        lastCoeff = true;
+      }
+      int rightPixel, belowPixel;
+      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
+      int numUsedCtxBins = 0;
+      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
+        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins);
+      cctx.decimateNumCtxBins(numUsedCtxBins);
+      rdStats.iNumSbbCtxBins += numUsedCtxBins;
+      if (cLevel > 0)
+      {
+        noCoeffCoded++;
+      }
+      dstCoeff[blkPos] = cLevel;
+
+      if (sign)
+      {
+        dstCoeff[blkPos] = -dstCoeff[blkPos];
+      }
+      xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
+      m_fullCoeff[blkPos] += predCoeff;
+
+      baseCost += costCoeff[scanPos];
+      rdStats.d64SigCost += costSig[scanPos];
+
+      if (dstCoeff[blkPos])
+      {
+        cctx.setSigGroup();
+        rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
+        rdStats.d64UncodedDist += costCoeff0[scanPos];
+      }
+    } //end for (iScanPosinCG)
+
+    if (!cctx.isSigGroup())
+    {
+      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
+      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
+      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
+      cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
+    }
+    else if (sbId != sbNum - 1 || anySigCG)
+    {
+      // rd-cost if SigCoeffGroupFlag = 0, initialization
+      double costZeroSB = baseCost;
+
+      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
+
+      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
+      costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
+      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
+
+      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
+      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
+      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
+
+      if (costZeroSB < baseCost)
+      {
+        cctx.resetSigGroup();
+        baseCost = costZeroSB;
+        costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
+        cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block
+
+        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
+        {
+          scanPos = cctx.minSubPos() + scanPosInSB;
+          uint32_t blkPos = cctx.blockPos(scanPos);
+
+          const int posX = cctx.posX(scanPos);
+          const int posY = cctx.posY(scanPos);
+          const int posS = (1 == dirMode) ? posX : posY;
+          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
+          m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
+
+          if (dstCoeff[blkPos])
+          {
+            dstCoeff[blkPos] = 0;
+            costCoeff[scanPos] = costCoeff0[scanPos];
+            costSig[scanPos] = 0;
+          }
+        }
+      }
+      else
+      {
+        anySigCG = true;
+      }
+    }
+  }
+
+  //===== estimate last position =====
+  for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
+  {
+    int blkPos = cctx.blockPos(scanPos);
+    TCoeff level = dstCoeff[blkPos];
+    absSum += abs(level);
+  }
+}
+
+void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams)
+{
+  // xDequant
+  if (trQuantParams.rightShift > 0)
+  {
+    const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
+    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
+  }
+  else
+  {
+    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) << -trQuantParams.rightShift);
+  }
+}
+
+inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
+  double&            rd64CodedCost0,
+  double&            rd64CodedCostSig,
+  Intermediate_Int    levelDouble,
+  int                 qBits,
+  double              errorScale,
+  uint32_t coeffLevels[],
+  double coeffLevelError[],
+  const BinFracBits* fracBitsSig,
+  const BinFracBits& fracBitsPar,
+  CoeffCodingContext& cctx,
+  const FracBitsAccess& fracBitsAccess,
+  const BinFracBits& fracBitsSign,
+  const BinFracBits& fracBitsGt1,
+  const uint8_t      sign,
+  int                rightPixel,
+  int                belowPixel,
+  uint16_t           ricePar,
+  bool               isLast,
+  bool               useLimitedPrefixLength,
+  const int          maxLog2TrDynamicRange
+  , int&               numUsedCtxBins
+) const
+{
+  double currCostSig = 0;
+  uint32_t   bestAbsLevel = 0;
+  numUsedCtxBins = 0;
+  int numBestCtxBin = 0;
+  if (!isLast && coeffLevels[0] < 3)
+  {
+    if (cctx.numCtxBins() >= 4)
+      rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
+    else
+      rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
+    rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
+    if (cctx.numCtxBins() >= 4)
+      numUsedCtxBins++;
+    if (coeffLevels[0] == 0)
+    {
+      return bestAbsLevel;
+    }
+  }
+  else
+  {
+    rd64CodedCost = MAX_DOUBLE;
+  }
+
+  if (!isLast)
+  {
+    if (cctx.numCtxBins() >= 4)
+      currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
+    else
+      currCostSig = xGetICost(1 << SCALE_BITS);
+    if (coeffLevels[0] >= 3 && cctx.numCtxBins() >= 4)
+      numUsedCtxBins++;
+  }
+
+  for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
+  {
+    int absLevel = coeffLevels[errorInd - 1];
+    double dErr = 0.0;
+    dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
+    coeffLevelError[errorInd] = dErr * dErr * errorScale;
+    int modAbsLevel = absLevel;
+    if (cctx.numCtxBins() >= 4) 
+    {
+      modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
+    }
+    int numCtxBins = 0;
+    double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange));
+
+    if (cctx.numCtxBins() >= 4)
+      dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
+
+    if (dCurrCost < rd64CodedCost)
+    {
+      bestAbsLevel = absLevel;
+      rd64CodedCost = dCurrCost;
+      rd64CodedCostSig = currCostSig;
+      numBestCtxBin = numCtxBins;
+    }
+  }
+  numUsedCtxBins += numBestCtxBin;
+  return bestAbsLevel;
+}
+
+inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
+                                    const BinFracBits&        fracBitsPar,
+                                    const CoeffCodingContext& cctx,
+                                    const FracBitsAccess&     fracBitsAccess,
+                                    const BinFracBits&        fracBitsSign,
+                                    const BinFracBits&        fracBitsGt1,
+                                    int&                      numCtxBins,
+                                    const uint8_t             sign,
+                                    const uint16_t            ricePar,
+                                    const bool                useLimitedPrefixLength,
+                                    const int                 maxLog2TrDynamicRange  ) const
+{
+ 
+  
+  if (cctx.numCtxBins() < 4) // Full by-pass coding 
+  {
+    int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
+
+    uint32_t symbol = absLevel;
+
+    uint32_t length;
+    const int threshold = COEF_REMAIN_BIN_REDUCTION;
+    if (symbol < (threshold << ricePar))
+    {
+      length = symbol >> ricePar;
+      rate += (length + 1 + ricePar) << SCALE_BITS;
+    }
+    else if (useLimitedPrefixLength)
+    {
+      const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange));
+
+      uint32_t prefixLength = 0;
+      uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION;
+
+      while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
+      {
+        prefixLength++;
+      }
+
+      const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/);
+
+      rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS;
+    }
+    else
+    {
+      length = ricePar;
+      symbol = symbol - (threshold << ricePar);
+      while (symbol >= (1 << length))
+      {
+        symbol -= (1 << (length++));
+      }
+      rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
+    }
+
+    return rate;
+  }
+
+  else if (cctx.numCtxBins() >= 4 && cctx.numCtxBins() < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
+  {
+    int rate = fracBitsSign.intBits[sign]; // sign bits
+    if (absLevel)
+      numCtxBins++;
+
+    if (absLevel > 1)
+    {
+      rate += fracBitsGt1.intBits[1];
+      rate += fracBitsPar.intBits[(absLevel - 2) & 1];
+
+      numCtxBins += 2;
+
+      int cutoffVal = 2;
+
+      if (absLevel >= cutoffVal)
+      {
+        uint32_t symbol = (absLevel - cutoffVal) >> 1;
+        uint32_t length;
+        const int threshold = COEF_REMAIN_BIN_REDUCTION;
+        if (symbol < (threshold << ricePar))
+        {
+          length = symbol >> ricePar;
+          rate += (length + 1 + ricePar) << SCALE_BITS;
+        }
+        else if (useLimitedPrefixLength)
+        {
+          const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange));
+
+          uint32_t prefixLength = 0;
+          uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION;
+
+          while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
+          {
+            prefixLength++;
+          }
+
+          const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/);
+
+          rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS;
+        }
+        else
+        {
+          length = ricePar;
+          symbol = symbol - (threshold << ricePar);
+          while (symbol >= (1 << length))
+          {
+            symbol -= (1 << (length++));
+          }
+          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
+        }
+      }
+    }
+    else if (absLevel == 1)
+    {
+      rate += fracBitsGt1.intBits[0];
+      numCtxBins++;
+    }
+    else
+    {
+      rate = 0;
+    }
+    return rate;
+
+  }
+
+  
+  
+  int rate = fracBitsSign.intBits[sign];
+
+  if (absLevel)
+    numCtxBins++;
+
+  if( absLevel > 1 )
+  {
+    rate += fracBitsGt1.intBits[1];
+    rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
+    numCtxBins += 2;
+
+          int cutoffVal = 2;
+    const int numGtBins = 4;
+    for( int i = 0; i < numGtBins; i++ )
+    {
+      if( absLevel >= cutoffVal )
+      {
+        const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
+        const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
+        unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
+        rate += fracBitsGtX.intBits[gtX];
+        numCtxBins++;
+      }
+      cutoffVal += 2;
+    }
+
+    if( absLevel >= cutoffVal )
+    {
+      uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
+      uint32_t length;
+      const int threshold = COEF_REMAIN_BIN_REDUCTION;
+      if( symbol < ( threshold << ricePar ) )
+      {
+        length = symbol >> ricePar;
+        rate  += ( length + 1 + ricePar ) << SCALE_BITS;
+      }
+      else if( useLimitedPrefixLength )
+      {
+        const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) );
+
+        uint32_t prefixLength = 0;
+        uint32_t suffix = ( symbol >> ricePar ) - COEF_REMAIN_BIN_REDUCTION;
+
+        while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) )
+        {
+          prefixLength++;
+        }
+
+        const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ricePar ) : ( prefixLength + 1/*separator*/ );
+
+        rate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar ) << SCALE_BITS;
+      }
+      else
+      {
+        length = ricePar;
+        symbol = symbol - ( threshold << ricePar );
+        while( symbol >= ( 1 << length ) )
+        {
+          symbol -= ( 1 << ( length++ ) );
+        }
+        rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
+      }
+    }
+  }
+  else if( absLevel == 1 )
+  {
+    rate += fracBitsGt1.intBits[0];
+    numCtxBins++;
+  }
+  else
+  {
+    rate = 0;
+  }
+  return rate;
 }
 
 //! \}
diff --git a/source/Lib/CommonLib/QuantRDOQ.h b/source/Lib/CommonLib/QuantRDOQ.h
index a51bdca698c3c03bada83ab39ef21562d4a4609c..ec3ca1c6db1b570841e8fa3e779e886118365a76 100644
--- a/source/Lib/CommonLib/QuantRDOQ.h
+++ b/source/Lib/CommonLib/QuantRDOQ.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,24 +61,20 @@ public:
   ~QuantRDOQ();
 
 public:
-#if HEVC_USE_SCALING_LISTS
   void setFlatScalingList   ( const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths );
   void setScalingList       ( ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths);
-#endif
   // quantization
   void quant                ( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx );
+  void forwardRDPCM         ( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx );
 
 private:
-#if HEVC_USE_SCALING_LISTS
-  double* xGetErrScaleCoeff              ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScale             [sizeX][sizeY][list][qp]; };  //!< get Error Scale Coefficent
+  double* xGetErrScaleCoeffSL            ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScale[sizeX][sizeY][list][qp]; };  //!< get Error Scale Coefficent
+  double  xGetErrScaleCoeff              ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip);
   double& xGetErrScaleCoeffNoScalingList ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScaleNoScalingList[sizeX][sizeY][list][qp]; };  //!< get Error Scale Coefficent
   void    xInitScalingList               ( const QuantRDOQ* other );
   void    xDestroyScalingList            ();
   void    xSetErrScaleCoeff              ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths );
-#else
-  double  xGetErrScaleCoeff              ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth);
-#endif
-
+  void    xDequantSample                 ( TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams );
   // RDOQ functions
   void xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx);
 
@@ -91,7 +87,6 @@ private:
                               const BinFracBits& fracBitsPar,
                               const BinFracBits& fracBitsGt1,
                               const BinFracBits& fracBitsGt2,
-                              const int          remGt2Bins,
                               const int          remRegBins,
                               unsigned           goRiceZero,
                               uint16_t             ui16AbsGoRice,
@@ -104,7 +99,6 @@ private:
                               const BinFracBits& fracBitsPar,
                               const BinFracBits& fracBitsGt1,
                               const BinFracBits& fracBitsGt2,
-                              const int          remGt2Bins,
                               const int          remRegBins,
                               unsigned           goRiceZero,
                               const uint16_t       ui16AbsGoRice,
@@ -120,24 +114,60 @@ private:
   inline double xGetICost            ( double dRate                                                      ) const;
   inline double xGetIEPRate          (                                                                   ) const;
 
+  void xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx );
+
+  inline uint32_t xGetCodedLevelTSPred(double&            rd64CodedCost,
+    double&            rd64CodedCost0,
+    double&            rd64CodedCostSig,
+    Intermediate_Int    levelDouble,
+    int                 qBits,
+    double              errorScale,
+    uint32_t coeffLevels[],
+    double coeffLevelError[],
+    const BinFracBits* fracBitsSig,
+    const BinFracBits& fracBitsPar,
+    CoeffCodingContext& cctx,
+    const FracBitsAccess& fracBitsAccess,
+    const BinFracBits& fracBitsSign,
+    const BinFracBits& fracBitsGt1,
+    const uint8_t      sign,
+    int                rightPixel,
+    int                belowPixel,
+    uint16_t           ricePar,
+    bool               isLast,
+    bool               useLimitedPrefixLength,
+    const int          maxLog2TrDynamicRange
+    , int&               numUsedCtxBins
+  ) const;
+
+  inline int xGetICRateTS   ( const uint32_t            absLevel,
+                              const BinFracBits&        fracBitsPar,
+                              const CoeffCodingContext& cctx,
+                              const FracBitsAccess&     fracBitsAccess,
+                              const BinFracBits&        fracBitsSign,
+                              const BinFracBits&        fracBitsGt1,
+                              int&                      numCtxBins,
+                              const uint8_t             sign,
+                              const uint16_t            ricePar,
+                              const bool                useLimitedPrefixLength,
+                              const int                 maxLog2TrDynamicRange  ) const;
 private:
-#if HEVC_USE_SCALING_LISTS
   bool    m_isErrScaleListOwner;
 
   double *m_errScale             [SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4
   double  m_errScaleNoScalingList[SCALING_LIST_SIZE_NUM][SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4
-#endif
   // temporary buffers for RDOQ
   double m_pdCostCoeff        [MAX_TB_SIZEY * MAX_TB_SIZEY];
   double m_pdCostSig          [MAX_TB_SIZEY * MAX_TB_SIZEY];
   double m_pdCostCoeff0       [MAX_TB_SIZEY * MAX_TB_SIZEY];
   double m_pdCostCoeffGroupSig[(MAX_TB_SIZEY * MAX_TB_SIZEY) >> MLS_CG_SIZE]; // even if CG size is 2 (if one of the sides is 2) instead of 4, there should be enough space
-#if HEVC_USE_SIGN_HIDING
   int    m_rateIncUp          [MAX_TB_SIZEY * MAX_TB_SIZEY];
   int    m_rateIncDown        [MAX_TB_SIZEY * MAX_TB_SIZEY];
   int    m_sigRateDelta       [MAX_TB_SIZEY * MAX_TB_SIZEY];
   TCoeff m_deltaU             [MAX_TB_SIZEY * MAX_TB_SIZEY];
-#endif
+  TCoeff m_fullCoeff          [MAX_TB_SIZEY * MAX_TB_SIZEY];
+  int   m_bdpcm;
+  int   m_testedLevels;
 };// END CLASS DEFINITION QuantRDOQ
 
 //! \}
diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp
index 5c753e9537b20670e0eaa897f6c0c4e785ada977..7f48f5d5ea9fca804b4e8de7f230cb99dec5add3 100644
--- a/source/Lib/CommonLib/RdCost.cpp
+++ b/source/Lib/CommonLib/RdCost.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -65,7 +65,12 @@ double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion, bool useUna
 double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion )
 #endif
 {
-
+#if JVET_AHG14_LOSSLESS
+  if( m_costMode == COST_LOSSLESS_CODING && 0 != distortion )
+  {
+    return MAX_DOUBLE;
+  }
+#endif
 #if WCG_EXT
   return ( useUnadjustedLambda ? m_DistScaleUnadjusted : m_DistScale ) * double( distortion ) + double( fracBits );
 #else
@@ -77,16 +82,46 @@ void RdCost::setLambda( double dLambda, const BitDepths &bitDepths )
 {
   m_dLambda             = dLambda;
   m_DistScale           = double(1<<SCALE_BITS) / m_dLambda;
-  m_dLambdaMotionSAD[0] = sqrt(m_dLambda);
-  dLambda = 0.57
-            * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12
-                         + 6
-                             * ((bitDepths.recon[CHANNEL_TYPE_LUMA] - 8)
-                                - DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[CHANNEL_TYPE_LUMA])))
-                        / 3.0));
-  m_dLambdaMotionSAD[1] = sqrt(dLambda);
+  m_dLambdaMotionSAD    = sqrt(m_dLambda);
 }
 
+void RdCost::lambdaAdjustColorTrans(bool forward, ComponentID componentID)
+{
+  if (m_resetStore)
+  {
+    for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++)
+    {
+      ComponentID compID = (ComponentID)component;
+      int       delta_QP = (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+      double lamdbaAdjustRate = pow(2.0, delta_QP / 3.0);
+
+      m_lambdaStore[0][component] = m_dLambda;
+      m_DistScaleStore[0][component] = m_DistScale;
+
+      m_lambdaStore[1][component] = m_dLambda * lamdbaAdjustRate;
+      m_DistScaleStore[1][component] = double(1 << SCALE_BITS) / m_lambdaStore[1][component];
+    }
+    m_resetStore = false;
+  }
+  
+  if (forward)
+  {
+    CHECK(m_pairCheck == 1, "lambda has been already adjusted");
+    m_pairCheck = 1;
+  }
+  else
+  {
+    CHECK(m_pairCheck == 0, "lambda has not been adjusted");
+    m_pairCheck = 0;
+  }
+
+  m_dLambda = m_lambdaStore[m_pairCheck][componentID];
+  m_DistScale = m_DistScaleStore[m_pairCheck][componentID];
+  if (m_pairCheck == 0)
+  {
+    CHECK(m_DistScale != m_DistScaleUnadjusted, "lambda should be adjusted to the original value");
+  }
+}
 
 // Initialize Function Pointer by [eDFunc]
 void RdCost::init()
@@ -176,6 +211,8 @@ void RdCost::init()
 
   m_motionLambda               = 0;
   m_iCostScale                 = 0;
+  m_resetStore = true;
+  m_pairCheck    = 0;
 }
 
 
@@ -190,7 +227,7 @@ void RdCost::copyState( const RdCost& other )
   m_mvPredictor   = other.m_mvPredictor;
   m_motionLambda  = other.m_motionLambda;
   m_iCostScale    = other.m_iCostScale;
-  memcpy( m_dLambdaMotionSAD, other.m_dLambdaMotionSAD, sizeof( m_dLambdaMotionSAD ) );
+  m_dLambdaMotionSAD = other.m_dLambdaMotionSAD;
 #if WCG_EXT
   m_dLambda_unadjusted  = other.m_dLambda_unadjusted ;
   m_DistScaleUnadjusted = other.m_DistScaleUnadjusted;
@@ -232,7 +269,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRef
     }
     else if( isPowerOf2( org.width ) )
     {
-      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + g_aucLog2[ org.width ] ];
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + floorLog2( org.width ) ];
     }
     else
     {
@@ -241,7 +278,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRef
   }
   else if( isPowerOf2( org.width ) )
   {
-    rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + g_aucLog2[ org.width ] ];
+    rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + floorLog2( org.width ) ];
   }
   else
   {
@@ -306,7 +343,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c
     }
     else if( isPowerOf2( org.width) )
     {
-      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + g_aucLog2[ org.width ] ];
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + floorLog2( org.width ) ];
     }
     else
     {
@@ -315,7 +352,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c
   }
   else
   {
-    rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + g_aucLog2[ org.width ] ];
+    rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + floorLog2( org.width ) ];
   }
 
   rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
@@ -359,7 +396,7 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
   }
   else
   {
-    rcDP.distFunc = m_afpDistortFunc[ DF_SAD + g_aucLog2[ width ] ];
+    rcDP.distFunc = m_afpDistortFunc[ DF_SAD + floorLog2( width ) ];
   }
 }
 
@@ -380,6 +417,8 @@ Distortion RdCost::getDistPart( const CPelBuf &org, const CPelBuf &cur, int bitD
 #if WCG_EXT
   if( orgLuma )
   {
+    cDtParam.cShiftX = getComponentScaleX(compID,  m_cf);
+    cDtParam.cShiftY = getComponentScaleY(compID,  m_cf);
     if( isChroma(compID) )
     {
       cDtParam.orgLuma  = *orgLuma;
@@ -393,7 +432,7 @@ Distortion RdCost::getDistPart( const CPelBuf &org, const CPelBuf &cur, int bitD
 
   if( isPowerOf2( org.width ) )
   {
-    cDtParam.distFunc = m_afpDistortFunc[eDFunc + g_aucLog2[org.width]];
+    cDtParam.distFunc = m_afpDistortFunc[eDFunc + floorLog2(org.width)];
   }
   else
   {
@@ -2936,7 +2975,7 @@ void RdCost::restoreReshapeLumaLevelToWeightTable()
 
 void RdCost::updateReshapeLumaLevelToWeightTable(SliceReshapeInfo &sliceReshape, Pel *wtTable, double cwt)
 {
-  if (m_signalType == RESHAPE_SIGNAL_SDR)
+  if (m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG)
   {
     if (sliceReshape.getSliceReshapeModelPresentFlag())
     {
@@ -2989,7 +3028,7 @@ Distortion RdCost::getWeightedMSE(int compIdx, const Pel org, const Pel cur, con
   }
   // use luma to get weight
   double weight = 1.0;
-  if (m_signalType == RESHAPE_SIGNAL_SDR)
+  if (m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG)
   {
     if (compIdx == COMPONENT_Y)
     {
@@ -3024,7 +3063,8 @@ Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const int  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const int  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
 
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
@@ -3036,7 +3076,8 @@ Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam )
     }
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3056,7 +3097,9 @@ Distortion RdCost::xGetSSE2_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma           = rcDtParam.orgLuma.buf;
   const size_t  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
+
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
   for( ; iRows != 0; iRows-- )
@@ -3065,7 +3108,7 @@ Distortion RdCost::xGetSSE2_WTD( const DistParam &rcDtParam )
     uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1  ], piCur[1  ], uiShift, piOrgLuma[size_t(1)<<cShift]);   // piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3085,7 +3128,9 @@ Distortion RdCost::xGetSSE4_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const size_t  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
+
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
   for( ; iRows != 0; iRows-- )
@@ -3096,7 +3141,7 @@ Distortion RdCost::xGetSSE4_WTD( const DistParam &rcDtParam )
     uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3  ], piCur[3  ], uiShift, piOrgLuma[size_t(3)<<cShift] );   // piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3116,7 +3161,8 @@ Distortion RdCost::xGetSSE8_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const size_t  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
 
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
@@ -3132,7 +3178,7 @@ Distortion RdCost::xGetSSE8_WTD( const DistParam &rcDtParam )
     uiSum += getWeightedMSE(rcDtParam.compID, piOrg[7  ], piCur[7  ], uiShift, piOrgLuma[size_t(7)<<cShift  ]);  // piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3151,8 +3197,8 @@ Distortion RdCost::xGetSSE16_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const size_t  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
-
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
   for( ; iRows != 0; iRows-- )
@@ -3175,7 +3221,8 @@ Distortion RdCost::xGetSSE16_WTD( const DistParam &rcDtParam )
     uiSum += getWeightedMSE(rcDtParam.compID, piOrg[15 ], piCur[15 ], uiShift, piOrgLuma[size_t(15)<<cShift  ]);  //piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3194,7 +3241,8 @@ Distortion RdCost::xGetSSE16N_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const size_t  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
   for( ; iRows != 0; iRows-- )
@@ -3220,7 +3268,7 @@ Distortion RdCost::xGetSSE16N_WTD( const DistParam &rcDtParam )
     }
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3239,7 +3287,8 @@ Distortion RdCost::xGetSSE32_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const size_t  iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t  cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
 
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1;
@@ -3279,7 +3328,7 @@ Distortion RdCost::xGetSSE32_WTD( const DistParam &rcDtParam )
     uiSum += getWeightedMSE(rcDtParam.compID, piOrg[31], piCur[31], uiShift, piOrgLuma[size_t(31)<<cShift ]);  //  iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
@@ -3298,7 +3347,8 @@ Distortion RdCost::xGetSSE64_WTD( const DistParam &rcDtParam )
   const int  iStrideOrg = rcDtParam.org.stride;
   const Pel* piOrgLuma        = rcDtParam.orgLuma.buf;
   const size_t iStrideOrgLuma   = rcDtParam.orgLuma.stride;
-  const size_t cShift           = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY
+  const size_t  cShift  = rcDtParam.cShiftX;
+  const size_t  cShiftY = rcDtParam.cShiftY;
 
   Distortion uiSum   = 0;
   uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT((rcDtParam.bitDepth)) << 1;
@@ -3370,7 +3420,8 @@ Distortion RdCost::xGetSSE64_WTD( const DistParam &rcDtParam )
     uiSum += getWeightedMSE(rcDtParam.compID, piOrg[63], piCur[63], uiShift, piOrgLuma[size_t(63)<<cShift]);  // iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
     piOrg += iStrideOrg;
     piCur += iStrideCur;
-    piOrgLuma += iStrideOrgLuma<<cShift;
+
+    piOrgLuma += iStrideOrgLuma<<cShiftY;
   }
   return ( uiSum );
 }
diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h
index 64d2e64595e9ec84e66bf711c55c558a121a5498..a7aef6fa7700fbe9cac8236d0317687c167ff53a 100644
--- a/source/Lib/CommonLib/RdCost.h
+++ b/source/Lib/CommonLib/RdCost.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -87,10 +87,11 @@ public:
   // (vertical) subsampling shift (for reducing complexity)
   // - 0 = no subsampling, 1 = even rows, 2 = every 4th, etc.
   int                   subShift;
-
+  int                   cShiftX;
+  int                   cShiftY;
   DistParam() :
   org(), cur(), step( 1 ), bitDepth( 0 ), useMR( false ), applyWeight( false ), isBiPred( false ), wpCur( nullptr ), compID( MAX_NUM_COMPONENT ), maximumDistortionForEarlyExit( std::numeric_limits<Distortion>::max() ), subShift( 0 )
-
+  , cShiftX(-1), cShiftY(-1)
   { }
 };
 
@@ -112,9 +113,14 @@ private:
   static uint32_t         m_signalType;
   static double           m_chromaWeight;
   static int              m_lumaBD;
+  ChromaFormat            m_cf;
 #endif
   double                  m_DistScale;
-  double                  m_dLambdaMotionSAD[2 /* 0=standard, 1=for transquant bypass when mixed-lossless cost evaluation enabled*/];
+  double                  m_dLambdaMotionSAD;
+  double                  m_lambdaStore[2][3];   // 0-org; 1-act
+  double                  m_DistScaleStore[2][3]; // 0-org; 1-act
+  bool                    m_resetStore;
+  int                     m_pairCheck;
 
   // for motion cost
   Mv                      m_mvPredictor;
@@ -128,6 +134,7 @@ public:
   virtual ~RdCost();
 
 #if WCG_EXT
+  void          setChromaFormat       ( const ChromaFormat & _cf) { m_cf = _cf; }
   double        calcRdCost            ( uint64_t fracBits, Distortion distortion, bool useUnadjustedLambda = true );
 #else
   double        calcRdCost            ( uint64_t fracBits, Distortion distortion );
@@ -143,6 +150,9 @@ public:
   double        getLambda()           { return m_dLambda; }
 #endif
   double        getChromaWeight()     { return ((m_distortionWeight[COMPONENT_Cb] + m_distortionWeight[COMPONENT_Cr]) / 2.0); }
+#if RDOQ_CHROMA_LAMBDA
+  double        getDistortionWeight   ( const ComponentID compID ) const { return m_distortionWeight[compID % MAX_NUM_COMPONENT]; }
+#endif
 
   void          setCostMode(CostMode m) { m_costMode = m; }
 
@@ -158,8 +168,8 @@ public:
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false );
   void           setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bioApplied = false );
 
-  double         getMotionLambda          ( bool bIsTransquantBypass ) { return m_dLambdaMotionSAD[(bIsTransquantBypass && m_costMode==COST_MIXED_LOSSLESS_LOSSY_CODING)?1:0]; }
-  void           selectMotionLambda       ( bool bIsTransquantBypass ) { m_motionLambda = getMotionLambda( bIsTransquantBypass ); }
+  double         getMotionLambda          ( )  { return m_dLambdaMotionSAD; }
+  void           selectMotionLambda       ( )  { m_motionLambda = getMotionLambda( ); }
   void           setPredictor             ( const Mv& rcMv )
   {
     m_mvPredictor = rcMv;
@@ -167,7 +177,7 @@ public:
   void           setCostScale             ( int iCostScale )           { m_iCostScale = iCostScale; }
   Distortion     getCost                  ( uint32_t b )                   { return Distortion( m_motionLambda * b ); }
   // for ibc
-  void           getMotionCost(int add, bool isTransquantBypass) { m_dCost = m_dLambdaMotionSAD[(isTransquantBypass && m_costMode == COST_MIXED_LOSSLESS_LOSSY_CODING) ? 1 : 0] + add; }
+  void           getMotionCost(int add) { m_dCost = m_dLambdaMotionSAD + add; }
 
   void    setPredictors(Mv* pcMv)
   {
@@ -179,7 +189,7 @@ public:
 
   inline Distortion getBvCostMultiplePreds(int x, int y, bool useIMV)
   {
-    return Distortion((m_dCost * getBitsMultiplePreds(x, y, useIMV)) / 65536.0);
+    return Distortion(m_dCost * getBitsMultiplePreds(x, y, useIMV));
   }
 
   unsigned int    getBitsMultiplePreds(int x, int y, bool useIMV)
@@ -286,7 +296,7 @@ public:
       uiTemp2  >>=   MAX_CU_DEPTH;
     }
 
-    return uiLength2 + ( g_aucPrevLog2[uiTemp2] << 1 );
+    return uiLength2 + ( floorLog2(uiTemp2) << 1 );
   }
   Distortion     getCostOfVectorWithPredictor( const int x, const int y, const unsigned imvShift )  { return Distortion( m_motionLambda * getBitsOfVectorWithPredictor(x, y, imvShift )); }
   uint32_t           getBitsOfVectorWithPredictor( const int x, const int y, const unsigned imvShift )  { return xGetExpGolombNumberOfBits(((x << m_iCostScale) - m_mvPredictor.getHor())>>imvShift) + xGetExpGolombNumberOfBits(((y << m_iCostScale) - m_mvPredictor.getVer())>>imvShift); }
@@ -303,6 +313,9 @@ public:
   inline std::vector<double>& getLumaLevelWeightTable        ()                   { return m_lumaLevelToWeightPLUT; }
 #endif
 
+  void           lambdaAdjustColorTrans(bool forward, ComponentID compID);
+  void           resetStore() { m_resetStore = true; }
+
 private:
 
   static Distortion xGetSSE           ( const DistParam& pcDtParam );
@@ -362,19 +375,19 @@ private:
   static Distortion xCalcHADs8x4      ( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur );
 
 #ifdef TARGET_SIMD_X86
-  template< typename Torg, typename Tcur, X86_VEXT vext >
+  template<X86_VEXT vext>
   static Distortion xGetSSE_SIMD    ( const DistParam& pcDtParam );
-  template< typename Torg, typename Tcur, int iWidth, X86_VEXT vext >
+  template<int iWidth, X86_VEXT vext>
   static Distortion xGetSSE_NxN_SIMD( const DistParam& pcDtParam );
 
-  template< X86_VEXT vext >
+  template<X86_VEXT vext>
   static Distortion xGetSAD_SIMD    ( const DistParam& pcDtParam );
-  template< int iWidth, X86_VEXT vext >
+  template<int iWidth, X86_VEXT vext>
   static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
-  template< X86_VEXT vext >
-  static Distortion xGetSAD_IBD_SIMD(const DistParam& pcDtParam);
+  template<X86_VEXT vext>
+  static Distortion xGetSAD_IBD_SIMD( const DistParam& pcDtParam );
 
-  template< typename Torg, typename Tcur, X86_VEXT vext >
+  template<X86_VEXT vext>
   static Distortion xGetHADs_SIMD   ( const DistParam& pcDtParam );
 #endif
 
diff --git a/source/Lib/CommonLib/RdCostWeightPrediction.cpp b/source/Lib/CommonLib/RdCostWeightPrediction.cpp
index 3a4c4f2652ab1750026322f9564119e9471977a7..f88f665c0cf3074255305c6931c0032d8c4e2d95 100644
--- a/source/Lib/CommonLib/RdCostWeightPrediction.cpp
+++ b/source/Lib/CommonLib/RdCostWeightPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/RdCostWeightPrediction.h b/source/Lib/CommonLib/RdCostWeightPrediction.h
index 9687c3cef1b0d74fdf504410f5dd171db58f20fb..cf7d55e6f2fb26f2df631611d1e0b3417be50268 100644
--- a/source/Lib/CommonLib/RdCostWeightPrediction.h
+++ b/source/Lib/CommonLib/RdCostWeightPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Reshape.cpp b/source/Lib/CommonLib/Reshape.cpp
index 85f06103e04aba3cdd177062214aed9fc4e2b613..4d6ac24644cca6d38c6a471c10d8c092ebb6a3fb 100644
--- a/source/Lib/CommonLib/Reshape.cpp
+++ b/source/Lib/CommonLib/Reshape.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <math.h>
+#include <UnitTools.h>
  //! \ingroup CommonLib
  //! \{
 
@@ -46,10 +47,13 @@
  // ====================================================================================================================
 
 Reshape::Reshape()
+: m_CTUFlag (false)
+, m_recReshaped (false)
+, m_reshape (true)
+, m_chromaScale (1 << CSCALE_FP_PREC)
+, m_vpduX (-1)
+, m_vpduY (-1)
 {
-  m_CTUFlag = false;
-  m_recReshaped = false;
-  m_reshape = true;
 }
 
 Reshape::~Reshape()
@@ -67,6 +71,12 @@ void  Reshape::createDec(int bitDepth)
     m_invLUT.resize(m_reshapeLUTSize, 0);
   if (m_binCW.empty())
     m_binCW.resize(PIC_CODE_CW_BINS, 0);
+  if (m_inputPivot.empty())
+    m_inputPivot.resize(PIC_CODE_CW_BINS + 1, 0);
+  if (m_fwdScaleCoef.empty())
+    m_fwdScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC);
+  if (m_invScaleCoef.empty())
+    m_invScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC);
   if (m_reshapePivot.empty())
     m_reshapePivot.resize(PIC_CODE_CW_BINS + 1, 0);
   if (m_chromaAdjHelpLUT.empty())
@@ -77,46 +87,6 @@ void  Reshape::destroy()
 {
 }
 
-/**
--Perform inverse of a one dimension LUT
-\param   InputLUT  describing the input LUT
-\retval  OutputLUT describing the inversed LUT of InputLUT
-\param   lut_size  size of LUT in number of samples
-*/
-void Reshape::reverseLUT(std::vector<Pel>& inputLUT, std::vector<Pel>& outputLUT, uint16_t lutSize)
-{
-  int i, j;
-  outputLUT[m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx]] = m_sliceReshapeInfo.reshaperModelMinBinIdx*m_initCW;
-  for (i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++)
-  {
-    int16_t X1 = m_reshapePivot[i];
-    int16_t X2 = m_reshapePivot[i + 1];
-    outputLUT[X2] = (i + 1)*m_initCW;
-    int16_t Y1 = outputLUT[X1];
-    int16_t Y2 = outputLUT[X2];
-
-    if (X2 !=X1)
-    {
-      int32_t scale = (int32_t)(Y2 - Y1) * (1 << FP_PREC) / (int32_t)(X2 - X1);
-      for (j = X1 + 1; j < X2; j++)
-      {
-        outputLUT[j] = (Pel)((scale*(int32_t)(j - X1) + (1 << (FP_PREC - 1))) >> FP_PREC) + Y1;
-      }
-    }
-  }
-
-  for (i = 0; i < m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx]; i++)
-    outputLUT[i] = outputLUT[m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx]];
-  for (i = m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1]; i < m_reshapeLUTSize; i++)
-    outputLUT[i] = outputLUT[m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1]];
-
-  bool clipRange = ((m_sliceReshapeInfo.reshaperModelMinBinIdx > 0) && (m_sliceReshapeInfo.reshaperModelMaxBinIdx < (PIC_CODE_CW_BINS - 1)));
-  for (i = 0; i < lutSize; i++)
-  {
-    if (clipRange) outputLUT[i] = Clip3((Pel)(16<<(m_lumaBD-8)), (Pel)(235<<(m_lumaBD-8)), outputLUT[i]);
-    else           outputLUT[i] = Clip3((Pel)0, (Pel)((1<<m_lumaBD)-1), outputLUT[i]);
-  }
-}
 
 
 /** compute chroma residuce scale for TU
@@ -130,7 +100,103 @@ int  Reshape::calculateChromaAdj(Pel avgLuma)
   return(iAdj);
 }
 
+/** compute chroma residuce scale for TU
+* \param average luma pred of TU
+* \return chroma residue scale
+*/
+int  Reshape::calculateChromaAdjVpduNei(TransformUnit &tu, const CompArea &areaY)
+{
+  CodingStructure &cs = *tu.cs;
+  int xPos = areaY.lumaPos().x;
+  int yPos = areaY.lumaPos().y;
+  int ctuSize = cs.sps->getCTUSize();
+  int numNeighbor = std::min(64, ctuSize);
+  int numNeighborLog = floorLog2(numNeighbor);
+  if (ctuSize == 128)
+  {
+    xPos = xPos / 64 * 64;
+    yPos = yPos / 64 * 64;
+  }
+  else
+  {
+    xPos = xPos / ctuSize * ctuSize;
+    yPos = yPos / ctuSize * ctuSize;
+  }
+
+  if (isVPDUprocessed(xPos, yPos) && !cs.pcv->isEncoder)
+  {
+    return getChromaScale();
+  }
+  else
+  {
+    setVPDULoc(xPos, yPos);
+    Position topLeft(xPos, yPos);
+    CodingUnit *topLeftLuma;
+    const CodingUnit *cuAbove, *cuLeft;
+    if (CS::isDualITree(cs) && cs.slice->getSliceType() == I_SLICE)
+    {
+      topLeftLuma = tu.cs->picture->cs->getCU(topLeft, CHANNEL_TYPE_LUMA);
+      cuAbove = cs.picture->cs->getCURestricted(topLeftLuma->lumaPos().offset(0, -1), *topLeftLuma, CHANNEL_TYPE_LUMA);
+      cuLeft  = cs.picture->cs->getCURestricted(topLeftLuma->lumaPos().offset(-1, 0), *topLeftLuma, CHANNEL_TYPE_LUMA);
+    }
+    else
+    {
+      topLeftLuma = cs.getCU(topLeft, CHANNEL_TYPE_LUMA);
+      cuAbove = cs.getCURestricted(topLeftLuma->lumaPos().offset(0, -1), *topLeftLuma, CHANNEL_TYPE_LUMA);
+      cuLeft  = cs.getCURestricted(topLeftLuma->lumaPos().offset(-1, 0), *topLeftLuma, CHANNEL_TYPE_LUMA);
+    }
+
+    xPos = topLeftLuma->lumaPos().x;
+    yPos = topLeftLuma->lumaPos().y;
+
+    CompArea lumaArea = CompArea(COMPONENT_Y, tu.chromaFormat, topLeftLuma->lumaPos(), topLeftLuma->lumaSize(), true);
+    PelBuf piRecoY = cs.picture->getRecoBuf(lumaArea);
+    int strideY = piRecoY.stride;
+    int chromaScale = (1 << CSCALE_FP_PREC);
+    int lumaValue = -1;
 
+    Pel* recSrc0 = piRecoY.bufAt(0, 0);
+    const uint32_t picH = tu.cs->picture->lheight();
+    const uint32_t picW = tu.cs->picture->lwidth();
+    const Pel   valueDC = 1 << (tu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1);
+    int32_t recLuma = 0;
+    int pelnum = 0;
+    if (cuLeft != nullptr)
+    {
+      for (int i = 0; i < numNeighbor; i++)
+      {
+        int k = (yPos + i) >= picH ? (picH - yPos - 1) : i;
+        recLuma += recSrc0[-1 + k * strideY];
+        pelnum++;
+      }
+    }
+    if (cuAbove != nullptr)
+    {
+      for (int i = 0; i < numNeighbor; i++)
+      {
+        int k = (xPos + i) >= picW ? (picW - xPos - 1) : i;
+        recLuma += recSrc0[-strideY + k];
+        pelnum++;
+      }
+    }
+    if (pelnum == numNeighbor)
+    {
+      lumaValue = ClipPel((recLuma + (1 << (numNeighborLog - 1))) >> numNeighborLog, tu.cs->slice->clpRng(COMPONENT_Y));
+    }
+    else if (pelnum == (numNeighbor << 1))
+    {
+      lumaValue = ClipPel((recLuma + (1 << numNeighborLog)) >> (numNeighborLog + 1), tu.cs->slice->clpRng(COMPONENT_Y));
+    }
+    else
+    {
+      CHECK(pelnum != 0, "");
+      lumaValue = ClipPel(valueDC, tu.cs->slice->clpRng(COMPONENT_Y));
+    }
+    chromaScale = calculateChromaAdj(lumaValue);
+    setChromaScale(chromaScale);
+    return(chromaScale);
+  }
+}
 /** find inx of PWL for inverse mapping
 * \param average luma pred of TU
 * \return idx of PWL for inverse mapping
@@ -138,18 +204,11 @@ int  Reshape::calculateChromaAdj(Pel avgLuma)
 int Reshape::getPWLIdxInv(int lumaVal)
 {
   int idxS = 0;
-  if (lumaVal < m_reshapePivot[m_sliceReshapeInfo.reshaperModelMinBinIdx + 1])
-    return m_sliceReshapeInfo.reshaperModelMinBinIdx;
-  else if (lumaVal >= m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx])
-    return m_sliceReshapeInfo.reshaperModelMaxBinIdx;
-  else
+  for (idxS = m_sliceReshapeInfo.reshaperModelMinBinIdx; (idxS <= m_sliceReshapeInfo.reshaperModelMaxBinIdx); idxS++)
   {
-    for (idxS = m_sliceReshapeInfo.reshaperModelMinBinIdx; (idxS < m_sliceReshapeInfo.reshaperModelMaxBinIdx); idxS++)
-    {
-      if (lumaVal < m_reshapePivot[idxS + 1])     break;
-    }
-    return idxS;
+    if (lumaVal < m_reshapePivot[idxS + 1])     break;
   }
+  return std::min(idxS, PIC_CODE_CW_BINS-1);
 }
 
 /**
@@ -166,6 +225,7 @@ void Reshape::copySliceReshaperInfo(SliceReshapeInfo& tInfo, SliceReshapeInfo& s
     tInfo.reshaperModelMinBinIdx = sInfo.reshaperModelMinBinIdx;
     memcpy(tInfo.reshaperModelBinCWDelta, sInfo.reshaperModelBinCWDelta, sizeof(int)*(PIC_CODE_CW_BINS));
     tInfo.maxNbitsNeededDeltaCW = sInfo.maxNbitsNeededDeltaCW;
+    tInfo.chrResScalingOffset = sInfo.chrResScalingOffset;
   }
   tInfo.sliceReshaperEnableFlag = sInfo.sliceReshaperEnableFlag;
   if (sInfo.sliceReshaperEnableFlag)
@@ -193,41 +253,32 @@ void Reshape::constructReshaper()
   for (int i = 0; i < pwlFwdLUTsize; i++)
   {
     m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i];
-    int16_t Y1 = m_reshapePivot[i];
-    int16_t Y2 = m_reshapePivot[i + 1];
-
-    m_fwdLUT[i*pwlFwdBinLen] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)Y1);
-
-    int log2PwlFwdBinLen = floorLog2(pwlFwdBinLen);
-
-    int32_t scale = ((int32_t)(Y2 - Y1) * (1 << FP_PREC) + (1 << (log2PwlFwdBinLen - 1))) >> (log2PwlFwdBinLen);
-    for (int j = 1; j < pwlFwdBinLen; j++)
+    m_inputPivot[i + 1] = m_inputPivot[i] + m_initCW;
+    m_fwdScaleCoef[i] = ((int32_t)m_binCW[i] * (1 << FP_PREC) + (1 << (floorLog2(pwlFwdBinLen) - 1))) >> floorLog2(pwlFwdBinLen);
+    if (m_binCW[i] == 0)
     {
-      int tempVal = Y1 + (((int32_t)scale * (int32_t)j + (1 << (FP_PREC - 1))) >> FP_PREC);
-      m_fwdLUT[i*pwlFwdBinLen + j] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)tempVal);
+      m_invScaleCoef[i] = 0;
+      m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC;
+    }
+    else
+    {
+      m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]);
+      m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / ( m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset ) );
     }
   }
-  reverseLUT(m_fwdLUT, m_invLUT, m_reshapeLUTSize);
-  updateChromaScaleLUT();
-}
-
-/** generate chroma residue scaling LUT
-* \param void
-* \return void
-*/
-void Reshape::updateChromaScaleLUT()
-{
-  const int16_t  CW_bin_SC_LUT[2 * PIC_ANALYZE_CW_BINS] = { 16384, 16384, 16384, 16384, 16384, 16384, 16384, 8192, 8192, 8192, 8192, 5461, 5461, 5461, 5461, 4096, 4096, 4096, 4096, 3277, 3277, 3277, 3277, 2731, 2731, 2731, 2731, 2341, 2341, 2341, 2048, 2048, 2048, 1820, 1820, 1820, 1638, 1638, 1638, 1638, 1489, 1489, 1489, 1489, 1365, 1365, 1365, 1365, 1260, 1260, 1260, 1260, 1170, 1170, 1170, 1170, 1092, 1092, 1092, 1092, 1024, 1024, 1024, 1024 }; //p=11
-  for (int i = 0; i < PIC_CODE_CW_BINS; i++)
+  for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++)
   {
-    uint16_t binCW = m_lumaBD > 10 ? (m_binCW[i] >> (m_lumaBD - 10)) : m_lumaBD < 10 ? (m_binCW[i] << (10 -m_lumaBD)): m_binCW[i];
-    if ((i < m_sliceReshapeInfo.reshaperModelMinBinIdx) || (i > m_sliceReshapeInfo.reshaperModelMaxBinIdx))
-      m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC;
-    else
-      m_chromaAdjHelpLUT[i] = CW_bin_SC_LUT[Clip3((uint16_t)1, (uint16_t)64, (uint16_t)(binCW >> 1)) - 1];
+    int idxY = lumaSample / m_initCW;
+    int tempVal = m_reshapePivot[idxY] + ((m_fwdScaleCoef[idxY] * (lumaSample - m_inputPivot[idxY]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_fwdLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(tempVal));
+
+    int idxYInv = getPWLIdxInv(lumaSample);
+    int invSample = m_inputPivot[idxYInv] + ((m_invScaleCoef[idxYInv] * (lumaSample - m_reshapePivot[idxYInv]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_invLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(invSample));
   }
 }
 
 
+
 //
 //! \}
diff --git a/source/Lib/CommonLib/Reshape.h b/source/Lib/CommonLib/Reshape.h
index 5b41c402df91324a1e594f2af792c2c9770321ac..6b6e9d58e17adcafa6551c32a41e15f896cfc0ac 100644
--- a/source/Lib/CommonLib/Reshape.h
+++ b/source/Lib/CommonLib/Reshape.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -64,8 +64,14 @@ protected:
   uint16_t                m_initCW;
   bool                    m_reshape;
   std::vector<Pel>        m_reshapePivot;
+  std::vector<Pel>        m_inputPivot;
+  std::vector<int32_t>    m_fwdScaleCoef;
+  std::vector<int32_t>    m_invScaleCoef;
   int                     m_lumaBD;
   int                     m_reshapeLUTSize;
+  int                     m_chromaScale;
+  int                     m_vpduX;
+  int                     m_vpduY;
 public:
   Reshape();
 #if ENABLE_SPLIT_PARALLELISM
@@ -77,7 +83,6 @@ public:
   void createDec(int bitDepth);
   void destroy();
 
-  void reverseLUT(std::vector<Pel>& inputLUT, std::vector<Pel>& outputLUT, uint16_t lutSize);
   std::vector<Pel>&  getFwdLUT() { return m_fwdLUT; }
   std::vector<Pel>&  getInvLUT() { return m_invLUT; }
   std::vector<int>&  getChromaAdjHelpLUT() { return m_chromaAdjHelpLUT; }
@@ -93,9 +98,13 @@ public:
   void copySliceReshaperInfo(SliceReshapeInfo& tInfo, SliceReshapeInfo& sInfo);
 
   void constructReshaper();
-  void updateChromaScaleLUT();
   bool getReshapeFlag() { return m_reshape; }
   void setReshapeFlag(bool b) { m_reshape = b; }
+  int  calculateChromaAdjVpduNei(TransformUnit &tu, const CompArea &areaY);
+  void setVPDULoc(int x, int y) { m_vpduX = x, m_vpduY = y; }
+  bool isVPDUprocessed(int x, int y) { return ((x == m_vpduX) && (y == m_vpduY)); }
+  void setChromaScale (int chromaScale) { m_chromaScale = chromaScale; }
+  int  getChromaScale() { return m_chromaScale; }
 };// END CLASS DEFINITION Reshape
 
 //! \}
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index 4e2a4f03376b5894b135f7ae816f5695597635e4..5bc2f227a26e924598f0185c9169e8450619e685 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -53,7 +53,6 @@ CDTrace *g_trace_ctx = NULL;
 #endif
 bool g_mctsDecCheckEnabled = false;
 
-
 //! \ingroup CommonLib
 //! \{
 
@@ -63,58 +62,29 @@ const char* nalUnitTypeToString(NalUnitType type)
 {
   switch (type)
   {
-#if JVET_M0101_HLS
   case NAL_UNIT_CODED_SLICE_TRAIL:      return "TRAIL";
   case NAL_UNIT_CODED_SLICE_STSA:       return "STSA";
-  case NAL_UNIT_CODED_SLICE_IDR_W_RADL: return "IDR_W_RADL";
-  case NAL_UNIT_CODED_SLICE_IDR_N_LP:   return "IDR_N_LP";
-  case NAL_UNIT_CODED_SLICE_CRA:        return "CRA";
   case NAL_UNIT_CODED_SLICE_RADL:       return "RADL";
   case NAL_UNIT_CODED_SLICE_RASL:       return "RASL";
-#if HEVC_VPS
-  case NAL_UNIT_VPS:                    return "VPS";
-#endif
-  case NAL_UNIT_SPS:                    return "SPS";
-  case NAL_UNIT_PPS:                    return "PPS";
-  case NAL_UNIT_APS:                    return "APS";
-  case NAL_UNIT_ACCESS_UNIT_DELIMITER:  return "AUD";
-  case NAL_UNIT_EOS:                    return "EOS";
-  case NAL_UNIT_EOB:                    return "EOB";
-  case NAL_UNIT_FILLER_DATA:            return "FILLER";
-  case NAL_UNIT_PREFIX_SEI:             return "Prefix SEI";
-  case NAL_UNIT_SUFFIX_SEI:             return "Suffix SEI";
-  default:                              return "UNK";
-#else
-  case NAL_UNIT_CODED_SLICE_TRAIL_R:    return "TRAIL_R";
-  case NAL_UNIT_CODED_SLICE_TRAIL_N:    return "TRAIL_N";
-  case NAL_UNIT_CODED_SLICE_TSA_R:      return "TSA_R";
-  case NAL_UNIT_CODED_SLICE_TSA_N:      return "TSA_N";
-  case NAL_UNIT_CODED_SLICE_STSA_R:     return "STSA_R";
-  case NAL_UNIT_CODED_SLICE_STSA_N:     return "STSA_N";
-  case NAL_UNIT_CODED_SLICE_BLA_W_LP:   return "BLA_W_LP";
-  case NAL_UNIT_CODED_SLICE_BLA_W_RADL: return "BLA_W_RADL";
-  case NAL_UNIT_CODED_SLICE_BLA_N_LP:   return "BLA_N_LP";
   case NAL_UNIT_CODED_SLICE_IDR_W_RADL: return "IDR_W_RADL";
   case NAL_UNIT_CODED_SLICE_IDR_N_LP:   return "IDR_N_LP";
   case NAL_UNIT_CODED_SLICE_CRA:        return "CRA";
-  case NAL_UNIT_CODED_SLICE_RADL_R:     return "RADL_R";
-  case NAL_UNIT_CODED_SLICE_RADL_N:     return "RADL_N";
-  case NAL_UNIT_CODED_SLICE_RASL_R:     return "RASL_R";
-  case NAL_UNIT_CODED_SLICE_RASL_N:     return "RASL_N";
-#if HEVC_VPS
+  case NAL_UNIT_CODED_SLICE_GDR:        return "GDR";
+  case NAL_UNIT_DPS:                    return "DPS";
   case NAL_UNIT_VPS:                    return "VPS";
-#endif
   case NAL_UNIT_SPS:                    return "SPS";
   case NAL_UNIT_PPS:                    return "PPS";
-  case NAL_UNIT_APS:                    return "APS";
+  case NAL_UNIT_PREFIX_APS:             return "Prefix APS";
+  case NAL_UNIT_SUFFIX_APS:             return "Suffix APS";
+  case NAL_UNIT_PH:                     return "PH";
   case NAL_UNIT_ACCESS_UNIT_DELIMITER:  return "AUD";
   case NAL_UNIT_EOS:                    return "EOS";
   case NAL_UNIT_EOB:                    return "EOB";
-  case NAL_UNIT_FILLER_DATA:            return "FILLER";
   case NAL_UNIT_PREFIX_SEI:             return "Prefix SEI";
   case NAL_UNIT_SUFFIX_SEI:             return "Suffix SEI";
+  case NAL_UNIT_FD:                     return "FD";
   default:                              return "UNK";
-#endif
+
   }
 }
 
@@ -163,37 +133,47 @@ public:
         }
         break;
 
-#if HEVC_USE_MDCS
-      //------------------------------------------------
-      case SCAN_HOR:
-
-        if (m_column == m_blockWidth - 1)
+      case SCAN_TRAV_HOR:
+        if (m_line % 2 == 0)
         {
-          m_line++;
-          m_column = 0;
+          if (m_column == (m_blockWidth - 1))
+          {
+            m_line++;
+            m_column = m_blockWidth - 1;
+          }
+          else m_column++;
         }
         else
         {
-          m_column++;
+          if (m_column == 0)
+          {
+            m_line++;
+            m_column = 0;
+          }
+          else m_column--;
         }
         break;
 
-      //------------------------------------------------
-
-      case SCAN_VER:
-
-        if (m_line == m_blockHeight - 1)
+      case SCAN_TRAV_VER:
+        if (m_column % 2 == 0)
         {
-          m_column++;
-          m_line = 0;
+          if (m_line == (m_blockHeight - 1))
+          {
+            m_column++;
+            m_line = m_blockHeight - 1;
+          }
+          else m_line++;
         }
         else
         {
-          m_line++;
+          if (m_line == 0)
+          {
+            m_column++;
+            m_line = 0;
+          }
+          else m_line--;
         }
         break;
-
-#endif
       //------------------------------------------------
 
       default:
@@ -205,50 +185,50 @@ public:
     return rtn;
   }
 };
-const int8_t g_GbiLog2WeightBase = 3;
-const int8_t g_GbiWeightBase = (1 << g_GbiLog2WeightBase);
-const int8_t g_GbiWeights[GBI_NUM] = { -2, 3, 4, 5, 10 };
-const int8_t g_GbiSearchOrder[GBI_NUM] = { GBI_DEFAULT, GBI_DEFAULT - 2, GBI_DEFAULT + 2, GBI_DEFAULT - 1, GBI_DEFAULT + 1 };
-int8_t g_GbiCodingOrder[GBI_NUM];
-int8_t g_GbiParsingOrder[GBI_NUM];
-
-int8_t getGbiWeight(uint8_t gbiIdx, uint8_t uhRefFrmList)
+const int8_t g_BcwLog2WeightBase = 3;
+const int8_t g_BcwWeightBase = (1 << g_BcwLog2WeightBase);
+const int8_t g_BcwWeights[BCW_NUM] = { -2, 3, 4, 5, 10 };
+const int8_t g_BcwSearchOrder[BCW_NUM] = { BCW_DEFAULT, BCW_DEFAULT - 2, BCW_DEFAULT + 2, BCW_DEFAULT - 1, BCW_DEFAULT + 1 };
+int8_t g_BcwCodingOrder[BCW_NUM];
+int8_t g_BcwParsingOrder[BCW_NUM];
+
+int8_t getBcwWeight(uint8_t bcwIdx, uint8_t uhRefFrmList)
 {
   // Weghts for the model: P0 + w * (P1 - P0) = (1-w) * P0 + w * P1
   // Retuning  1-w for P0 or w for P1
-  return (uhRefFrmList == REF_PIC_LIST_0 ? g_GbiWeightBase - g_GbiWeights[gbiIdx] : g_GbiWeights[gbiIdx]);
+  return (uhRefFrmList == REF_PIC_LIST_0 ? g_BcwWeightBase - g_BcwWeights[bcwIdx] : g_BcwWeights[bcwIdx]);
 }
 
-void resetGbiCodingOrder(bool bRunDecoding, const CodingStructure &cs)
+void resetBcwCodingOrder(bool bRunDecoding, const CodingStructure &cs)
 {
-  // Form parsing order: { GBI_DEFAULT, GBI_DEFAULT+1, GBI_DEFAULT-1, GBI_DEFAULT+2, GBI_DEFAULT-2, ... }
-  g_GbiParsingOrder[0] = GBI_DEFAULT;
-  for (int i = 1; i <= (GBI_NUM >> 1); ++i)
+  // Form parsing order: { BCW_DEFAULT, BCW_DEFAULT+1, BCW_DEFAULT-1, BCW_DEFAULT+2, BCW_DEFAULT-2, ... }
+  g_BcwParsingOrder[0] = BCW_DEFAULT;
+  for (int i = 1; i <= (BCW_NUM >> 1); ++i)
   {
-    g_GbiParsingOrder[2 * i - 1] = GBI_DEFAULT + (int8_t)i;
-    g_GbiParsingOrder[2 * i] = GBI_DEFAULT - (int8_t)i;
+    g_BcwParsingOrder[2 * i - 1] = BCW_DEFAULT + (int8_t)i;
+    g_BcwParsingOrder[2 * i] = BCW_DEFAULT - (int8_t)i;
   }
 
   // Form encoding order
   if (!bRunDecoding)
   {
-    for (int i = 0; i < GBI_NUM; ++i)
+    for (int i = 0; i < BCW_NUM; ++i)
     {
-      g_GbiCodingOrder[(uint32_t)g_GbiParsingOrder[i]] = i;
+      g_BcwCodingOrder[(uint32_t)g_BcwParsingOrder[i]] = i;
     }
   }
 }
 
-uint32_t deriveWeightIdxBits(uint8_t gbiIdx) // Note: align this with TEncSbac::codeGbiIdx and TDecSbac::parseGbiIdx
+uint32_t deriveWeightIdxBits(uint8_t bcwIdx) // Note: align this with TEncSbac::codeBcwIdx and TDecSbac::parseBcwIdx
 {
   uint32_t numBits = 1;
-  uint8_t  gbiCodingIdx = (uint8_t)g_GbiCodingOrder[gbiIdx];
+  uint8_t  bcwCodingIdx = (uint8_t)g_BcwCodingOrder[bcwIdx];
 
-  if (GBI_NUM > 2 && gbiCodingIdx != 0)
+  if (BCW_NUM > 2 && bcwCodingIdx != 0)
   {
-    uint32_t prefixNumBits = GBI_NUM - 2;
+    uint32_t prefixNumBits = BCW_NUM - 2;
     uint32_t step = 1;
-    uint8_t  prefixSymbol = gbiCodingIdx;
+    uint8_t  prefixSymbol = bcwCodingIdx;
 
     // Truncated unary code
     uint8_t idx = 1;
@@ -269,55 +249,21 @@ uint32_t deriveWeightIdxBits(uint8_t gbiIdx) // Note: align this with TEncSbac::
   return numBits;
 }
 
-uint32_t g_log2SbbSize[2][MAX_CU_DEPTH+1][MAX_CU_DEPTH+1][2] =
+uint32_t g_log2SbbSize[MAX_CU_DEPTH + 1][MAX_CU_DEPTH + 1][2] =
+//===== luma/chroma =====
 {
-  //===== luma =====
-  {
-    { {0,0}, {0,1}, {0,2}, {0,3}, {0,4}, {0,4}, {0,4}, {0,4} },
-    { {1,0}, {1,1}, {1,2}, {1,3}, {1,3}, {1,3}, {1,3}, {1,3} },
-    { {2,0}, {2,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {3,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {4,0}, {3,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }
-  },
-  //===== chroma =====
-  {
-    { {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} },
-    { {0,0}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1}, {1,1} },
-    { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} },
-    { {0,0}, {1,1}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2}, {2,2} }
-  },
+  { { 0,0 },{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,4 },{ 0,4 },{ 0,4 } },
+  { { 1,0 },{ 1,1 },{ 1,1 },{ 1,3 },{ 1,3 },{ 1,3 },{ 1,3 },{ 1,3 } },
+  { { 2,0 },{ 1,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } },
+  { { 3,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } },
+  { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } },
+  { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } },
+  { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } },
+  { { 4,0 },{ 3,1 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 },{ 2,2 } }
 };
 // initialize ROM variables
 void initROM()
 {
-  int c;
-
-  // g_aucConvertToBit[ x ]: log2(x/4), if x=4 -> 0, x=8 -> 1, x=16 -> 2, ...
-  // g_aucLog2[ x ]: log2(x), if x=1 -> 0, x=2 -> 1, x=4 -> 2, x=8 -> 3, x=16 -> 4, ...
-  ::memset(g_aucLog2, 0, sizeof(g_aucLog2));
-  c = 0;
-  for( int i = 0, n = 0; i <= MAX_CU_SIZE; i++ )
-  {
-    g_aucNextLog2[i] = i <= 1 ? 0 : c + 1;
-
-    if( i == ( 1 << n ) )
-    {
-      c = n;
-      n++;
-    }
-
-    g_aucPrevLog2[i] = c;
-    g_aucLog2    [i] = c;
-  }
-
-
   gp_sizeIdxInfo = new SizeIndexInfoLog2();
   gp_sizeIdxInfo->init(MAX_CU_SIZE);
 
@@ -325,8 +271,6 @@ void initROM()
   SizeIndexInfoLog2 sizeInfo;
   sizeInfo.init(MAX_CU_SIZE);
 
-  for( int ch = 0; ch < MAX_NUM_CHANNEL_TYPE; ch++ )
-  {
   // initialize scan orders
   for (uint32_t blockHeightIdx = 0; blockHeightIdx < sizeInfo.numAllHeights(); blockHeightIdx++)
   {
@@ -350,7 +294,7 @@ void initROM()
           scan = new ScanElement[totalValues];
         }
 
-        g_scanOrder[ch][SCAN_UNGROUPED][scanType][blockWidthIdx][blockHeightIdx] = scan;
+        g_scanOrder[SCAN_UNGROUPED][scanType][blockWidthIdx][blockHeightIdx] = scan;
 
         if (scan == nullptr)
         {
@@ -374,7 +318,7 @@ void initROM()
       //--------------------------------------------------------------------------------------------------
 
       //grouped scan orders
-      const uint32_t* log2Sbb        = g_log2SbbSize[ch][ g_aucLog2[blockWidth] ][ g_aucLog2[blockHeight] ];
+      const uint32_t* log2Sbb        = g_log2SbbSize[floorLog2(blockWidth)][floorLog2(blockHeight)];
       const uint32_t  log2CGWidth    = log2Sbb[0];
       const uint32_t  log2CGHeight   = log2Sbb[1];
 
@@ -392,7 +336,7 @@ void initROM()
 
         ScanElement *scan = new ScanElement[totalValues];
 
-        g_scanOrder[ch][SCAN_GROUPED_4x4][scanType][blockWidthIdx][blockHeightIdx] = scan;
+        g_scanOrder[SCAN_GROUPED_4x4][scanType][blockWidthIdx][blockHeightIdx] = scan;
 
         if ( blockWidth > JVET_C0024_ZERO_OUT_TH || blockHeight > JVET_C0024_ZERO_OUT_TH )
         {
@@ -434,6 +378,29 @@ void initROM()
       //--------------------------------------------------------------------------------------------------
     }
   }
+
+  // initialize CoefTopLeftDiagScan8x8 for LFNST
+  for( uint32_t blockWidthIdx = 0; blockWidthIdx < sizeInfo.numAllWidths(); blockWidthIdx++ )
+  {
+    const uint32_t blockWidth = sizeInfo.sizeFrom( blockWidthIdx );
+
+    const static uint8_t g_auiXYDiagScan8x8[ 64 ][ 2 ] =
+    {
+      { 0, 0 }, { 0, 1 }, { 1, 0 }, { 0, 2 }, { 1, 1 }, { 2, 0 }, { 0, 3 }, { 1, 2 },
+      { 2, 1 }, { 3, 0 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 2, 3 }, { 3, 2 }, { 3, 3 },
+      { 0, 4 }, { 0, 5 }, { 1, 4 }, { 0, 6 }, { 1, 5 }, { 2, 4 }, { 0, 7 }, { 1, 6 },
+      { 2, 5 }, { 3, 4 }, { 1, 7 }, { 2, 6 }, { 3, 5 }, { 2, 7 }, { 3, 6 }, { 3, 7 },
+      { 4, 0 }, { 4, 1 }, { 5, 0 }, { 4, 2 }, { 5, 1 }, { 6, 0 }, { 4, 3 }, { 5, 2 },
+      { 6, 1 }, { 7, 0 }, { 5, 3 }, { 6, 2 }, { 7, 1 }, { 6, 3 }, { 7, 2 }, { 7, 3 },
+      { 4, 4 }, { 4, 5 }, { 5, 4 }, { 4, 6 }, { 5, 5 }, { 6, 4 }, { 4, 7 }, { 5, 6 },
+      { 6, 5 }, { 7, 4 }, { 5, 7 }, { 6, 6 }, { 7, 5 }, { 6, 7 }, { 7, 6 }, { 7, 7 }
+    };
+    for( int i = 0; i < 64; i++ )
+    {
+      g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].idx = g_auiXYDiagScan8x8[ i ][ 0 ] + g_auiXYDiagScan8x8[ i ][ 1 ] * blockWidth;
+      g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].x   = g_auiXYDiagScan8x8[ i ][ 0 ];
+      g_coefTopLeftDiagScan8x8[ blockWidthIdx ][ i ].y   = g_auiXYDiagScan8x8[ i ][ 1 ];
+    }
   }
 
   for( int idxH = MAX_CU_DEPTH - MIN_CU_LOG2; idxH >= 0; --idxH )
@@ -457,6 +424,30 @@ void initROM()
       }
     }
   }
+
+  for (int idxH = 0; idxH < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxH)
+  {
+    for (int idxW = 0; idxW < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxW)
+    {
+      const int nCbH = 1 << (idxH + 1);
+      const int nCbW = 1 << (idxW + 1);
+      const int nCbR = (nCbW > nCbH) ? nCbW / nCbH : nCbH / nCbW;
+
+      // let SIMD can read at least 64-bit when at last row
+      g_triangleWeights[0][idxH][idxW] = new int16_t[nCbH * nCbW + 4];
+      g_triangleWeights[1][idxH][idxW] = new int16_t[nCbH * nCbW + 4];
+      for (int y = 0; y < nCbH; y++)
+      {
+        for (int x = 0; x < nCbW; x++)
+        {
+          g_triangleWeights[0][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, x / nCbR - y + 4) : Clip3(0, 8, x - y / nCbR + 4);
+          g_triangleWeights[1][idxH][idxW][y*nCbW + x] = (nCbW > nCbH) ? Clip3(0, 8, nCbH - 1 - x / nCbR - y + 4) : Clip3(0, 8, nCbW - 1 - x - y / nCbR + 4);
+        }
+      }
+    }
+  }
+
+  ::memset(g_isReusedUniMVsFilled, 0, sizeof(g_isReusedUniMVsFilled));
 }
 
 void destroyROM()
@@ -464,19 +455,16 @@ void destroyROM()
   unsigned numWidths = gp_sizeIdxInfo->numAllWidths();
   unsigned numHeights = gp_sizeIdxInfo->numAllHeights();
 
-  for( uint32_t ch = 0; ch < MAX_NUM_CHANNEL_TYPE; ch++ )
+  for (uint32_t groupTypeIndex = 0; groupTypeIndex < SCAN_NUMBER_OF_GROUP_TYPES; groupTypeIndex++)
   {
-    for( uint32_t groupTypeIndex = 0; groupTypeIndex < SCAN_NUMBER_OF_GROUP_TYPES; groupTypeIndex++ )
+    for (uint32_t scanOrderIndex = 0; scanOrderIndex < SCAN_NUMBER_OF_TYPES; scanOrderIndex++)
     {
-      for( uint32_t scanOrderIndex = 0; scanOrderIndex < SCAN_NUMBER_OF_TYPES; scanOrderIndex++ )
+      for (uint32_t blockWidthIdx = 0; blockWidthIdx <= numWidths; blockWidthIdx++)
       {
-        for( uint32_t blockWidthIdx = 0; blockWidthIdx <= numWidths; blockWidthIdx++ )
+        for (uint32_t blockHeightIdx = 0; blockHeightIdx <= numHeights; blockHeightIdx++)
         {
-          for( uint32_t blockHeightIdx = 0; blockHeightIdx <= numHeights; blockHeightIdx++ )
-          {
-            delete[] g_scanOrder[ch][groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx];
-            g_scanOrder[ch][groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx] = nullptr;
-          }
+          delete[] g_scanOrder[groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx];
+          g_scanOrder[groupTypeIndex][scanOrderIndex][blockWidthIdx][blockHeightIdx] = nullptr;
         }
       }
     }
@@ -484,20 +472,33 @@ void destroyROM()
 
   delete gp_sizeIdxInfo;
   gp_sizeIdxInfo = nullptr;
+
+  for (int idxH = 0; idxH < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxH)
+  {
+    for (int idxW = 0; idxW < MAX_CU_DEPTH - MIN_CU_LOG2 + 2; ++idxW)
+    {
+      delete[] g_triangleWeights[0][idxH][idxW];
+      delete[] g_triangleWeights[1][idxH][idxW];
+      g_triangleWeights[0][idxH][idxW] = nullptr;
+      g_triangleWeights[1][idxH][idxW] = nullptr;
+    }
+  }
 }
 
 // ====================================================================================================================
 // Data structure related table & variable
 // ====================================================================================================================
 
-const int g_quantScales[SCALING_LIST_REM_NUM] =
+const int g_quantScales[2][SCALING_LIST_REM_NUM] = // can be represented as a 9 element table
 {
-  26214,23302,20560,18396,16384,14564
+    { 26214,23302,20560,18396,16384,14564 },
+    { 18396,16384,14564,13107,11651,10280 } // Note: last 3 values of second row == half of the first 3 values of the first row
 };
 
-const int g_invQuantScales[SCALING_LIST_REM_NUM] =
+const int g_invQuantScales[2][SCALING_LIST_REM_NUM] = // can be represented as a 9 element table
 {
-  40,45,51,57,64,72
+  { 40,45,51,57,64,72 },
+  { 57,64,72,80,90,102 } // Note: last 3 values of second row == double of the first 3 values of the first row
 };
 
 //--------------------------------------------------------------------------------------------------
@@ -505,16 +506,6 @@ const int g_invQuantScales[SCALING_LIST_REM_NUM] =
 //--------------------------------------------------------------------------------------------------
 //coefficients
 //--------------------------------------------------------------------------------------------------
-
-const uint8_t g_aucChromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize] =
-{
-  //0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69
-  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,29,30,31,32,33,33,34,34,35,35,36,36,37,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 },
-  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,63,63,63,63,63,63 },
-  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,63,63,63,63,63,63 }
-};
-
 // ====================================================================================================================
 // Intra prediction
 // ====================================================================================================================
@@ -551,10 +542,9 @@ const uint8_t g_aucIntraModeNumFast_NotUseMPM[MAX_CU_DEPTH] =
 };
 
 const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE] =
-//                                                               H                                                               D                                                               V
-//0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, DM
-{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 3,  4,  6,  8, 10, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 44, 44, 45, 46, 46, 46, 47, 48, 48, 48, 49, 50, 51, 52, 52, 52, 53, 54, 54, 54, 55, 56, 56, 56, 57, 58, 59, 60, DM_CHROMA_IDX };
-
+//                                    *                                H                              *                                D      *   *   *   *       *   *   *                   *        V       *                   *   *   *      *   *   *   *
+//0, 1,  2,  3,  4,  5,  6,  7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, DM
+{ 0, 1, 61, 62, 63, 64, 65, 66, 2, 3,  5,  6,  8, 10, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 41, 42, 43, 43, 44, 44, 45, 45, 46, 47, 48, 48, 49, 49, 50, 51, 51, 52, 52, 53, 54, 55, 55, 56, 56, 57, 57, 58, 59, 59, 60, DM_CHROMA_IDX };
 
 
 
@@ -562,9 +552,8 @@ const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE] =
 // Misc.
 // ====================================================================================================================
 SizeIndexInfo*           gp_sizeIdxInfo = NULL;
-int8_t                    g_aucLog2    [MAX_CU_SIZE + 1];
-int8_t                    g_aucNextLog2[MAX_CU_SIZE + 1];
-int8_t                    g_aucPrevLog2[MAX_CU_SIZE + 1];
+
+const int                 g_ictModes[2][4] = { { 0, 3, 1, 2 }, { 0, -3, -1, -2 } };
 
 UnitScale g_miScaling( MIN_CU_LOG2, MIN_CU_LOG2 );
 
@@ -574,7 +563,8 @@ UnitScale g_miScaling( MIN_CU_LOG2, MIN_CU_LOG2 );
 // ====================================================================================================================
 
 // scanning order table
-ScanElement *g_scanOrder[2][SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1];
+ScanElement *g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1];
+ScanElement  g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ];
 
 const uint32_t g_uiMinInGroup[LAST_SIGNIFICANT_GROUPS] = { 0,1,2,3,4,6,8,12,16,24,32,48,64,96 };
 const uint32_t g_uiGroupIdx[MAX_TB_SIZEY] = { 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11 };
@@ -582,16 +572,16 @@ const uint32_t g_auiGoRiceParsCoeff[32] =
 {
   0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3
 };
-const uint32_t g_auiGoRicePosCoeff0[3][32] =
-{
-  {0, 0, 0, 0, 0, 1, 2,    2, 2, 2, 2, 2, 4, 4,    4, 4, 4, 4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,     8,  8,  8,  8},
-  {1, 1, 1, 1, 2, 3, 4,    4, 4, 6, 6, 6, 8, 8,    8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16,    16, 16, 16, 16},
-  {1, 1, 2, 2, 2, 3, 4,    4, 4, 6, 6, 6, 8, 8,    8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16,    16, 16, 16, 16}
-};
-
-#if HEVC_USE_SCALING_LISTS
 const char *MatrixType[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] =
 {
+  {
+    "INTRA1X1_LUMA",
+    "INTRA1X1_CHROMAU",
+    "INTRA1X1_CHROMAV",
+    "INTER1X1_LUMA",
+    "INTER1X1_CHROMAU",
+    "INTER1X1_CHROMAV"
+  },
   {
     "INTRA2X2_LUMA",
     "INTRA2X2_CHROMAU",
@@ -625,17 +615,29 @@ const char *MatrixType[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] =
     "INTER16X16_CHROMAV"
   },
   {
-   "INTRA32X32_LUMA",
-   "INTRA32X32_CHROMAU_FROM16x16_CHROMAU",
-   "INTRA32X32_CHROMAV_FROM16x16_CHROMAV",
-   "INTER32X32_LUMA",
-   "INTER32X32_CHROMAU_FROM16x16_CHROMAU",
-   "INTER32X32_CHROMAV_FROM16x16_CHROMAV"
+    "INTRA32X32_LUMA",
+    "INTRA32X32_CHROMAU",
+    "INTRA32X32_CHROMAV",
+    "INTER32X32_LUMA",
+    "INTER32X32_CHROMAU",
+    "INTER32X32_CHROMAV"
+  },
+  {
+    "INTRA64X64_LUMA",
+    "INTRA64X64_CHROMAU",
+    "INTRA64X64_CHROMAV",
+    "INTER64X64_LUMA",
+    "INTER64X64_CHROMAU",
+    "INTER64X64_CHROMAV"
+  },
+  {
   },
 };
 
 const char *MatrixType_DC[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] =
 {
+  {  //1x1
+  },
   {
   },
   {
@@ -652,11 +654,21 @@ const char *MatrixType_DC[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] =
   },
   {
     "INTRA32X32_LUMA_DC",
-    "INTRA32X32_CHROMAU_DC_FROM16x16_CHROMAU",
-    "INTRA32X32_CHROMAV_DC_FROM16x16_CHROMAV",
+    "INTRA32X32_CHROMAU_DC",
+    "INTRA32X32_CHROMAV_DC",
     "INTER32X32_LUMA_DC",
-    "INTER32X32_CHROMAU_DC_FROM16x16_CHROMAU",
-    "INTER32X32_CHROMAV_DC_FROM16x16_CHROMAV"
+    "INTER32X32_CHROMAU_DC",
+    "INTER32X32_CHROMAV_DC"
+  },
+  {
+    "INTRA64X64_LUMA_DC",
+    "INTRA64X64_CHROMAU_DC",
+    "INTRA64X64_CHROMAV_DC",
+    "INTER64X64_LUMA_DC",
+    "INTER64X64_CHROMAU_DC",
+    "INTER64X64_CHROMAV_DC"
+  },
+  {
   },
 };
 
@@ -670,33 +682,38 @@ const int g_quantTSDefault4x4[4 * 4] =
 
 const int g_quantIntraDefault8x8[8 * 8] =
 {
-  16,16,16,16,17,18,21,24,
-  16,16,16,16,17,19,22,25,
-  16,16,17,18,20,22,25,29,
-  16,16,18,21,24,27,31,36,
-  17,17,20,24,30,35,41,47,
-  18,19,22,27,35,44,54,65,
-  21,22,25,31,41,54,70,88,
-  24,25,29,36,47,65,88,115
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16
 };
 
 const int g_quantInterDefault8x8[8 * 8] =
 {
-  16,16,16,16,17,18,20,24,
-  16,16,16,17,18,20,24,25,
-  16,16,17,18,20,24,25,28,
-  16,17,18,20,24,25,28,33,
-  17,18,20,24,25,28,33,41,
-  18,20,24,25,28,33,41,54,
-  20,24,25,28,33,41,54,71,
-  24,25,28,33,41,54,71,91
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16
 };
 
-const uint32_t g_scalingListSize [SCALING_LIST_SIZE_NUM] = { 4, 16, 64, 256, 1024, 4096, 16384 };
-const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM] = { 2,  4,  8,  16,   32,   64,   128 };
-#endif
+const uint32_t g_scalingListSize [SCALING_LIST_SIZE_NUM] = { 1, 4, 16, 64, 256, 1024, 4096, 16384 };
+const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM] = { 1, 2,  4,  8,  16,   32,   64,   128 };
 
 
 uint8_t g_triangleMvStorage[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2];
+int16_t *g_triangleWeights[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2];
+Mv   g_reusedUniMVs[32][32][8][8][2][33];
+bool g_isReusedUniMVsFilled[32][32][8][8];
 
+const uint8_t g_paletteQuant[52] = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 23, 25, 26, 28, 29, 31, 32, 34, 36, 37, 39, 41, 42, 45 };
+uint8_t g_paletteRunTopLut [5] = { 0, 1, 1, 2, 2 };
+uint8_t g_paletteRunLeftLut[5] = { 0, 1, 2, 3, 4 };
 //! \}
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index d896d304dda40320bfb3a39b13f4fa3991d5c397..929a75547ef2cdafdc21e965dea0b93e8d3288e9 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,6 +59,7 @@ void         destroyROM();
 // Data structure related table & variable
 // ====================================================================================================================
 
+
 // flexible conversion from relative to absolute index
 struct ScanElement
 {
@@ -67,25 +68,18 @@ struct ScanElement
   uint16_t y;
 };
 
-extern       uint32_t   g_log2SbbSize   [2][MAX_CU_DEPTH+1][MAX_CU_DEPTH+1][2];
+extern       uint32_t   g_log2SbbSize[MAX_CU_DEPTH + 1][MAX_CU_DEPTH + 1][2];
 extern ScanElement
-  *g_scanOrder[2][SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1];
+  *g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1];
+extern       ScanElement   g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ];
 
-extern const int g_quantScales   [SCALING_LIST_REM_NUM];          // Q(QP%6)
-extern const int g_invQuantScales[SCALING_LIST_REM_NUM];          // IQ(QP%6)
+extern const int g_quantScales   [2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIST_REM_NUM];          // Q(QP%6)
+extern const int g_invQuantScales[2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIST_REM_NUM];          // IQ(QP%6)
 
 static const int g_numTransformMatrixSizes = 6;
 static const int g_transformMatrixShift[TRANSFORM_NUMBER_OF_DIRECTIONS] = {  6, 6 };
 
 
-// ====================================================================================================================
-// Luma QP to Chroma QP mapping
-// ====================================================================================================================
-static const int chromaQPMappingTableSize = (MAX_QP + 7);
-
-extern const uint8_t  g_aucChromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize];
-
-
 // ====================================================================================================================
 // Scanning order & context mapping table
 // ====================================================================================================================
@@ -93,8 +87,10 @@ extern const uint8_t  g_aucChromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSi
 extern const uint32_t   g_uiGroupIdx[ MAX_TB_SIZEY ];
 extern const uint32_t   g_uiMinInGroup[ LAST_SIGNIFICANT_GROUPS ];
 extern const uint32_t   g_auiGoRiceParsCoeff     [ 32 ];
-extern const uint32_t   g_auiGoRicePosCoeff0[ 3 ][ 32 ];
-extern const uint32_t   g_auiGoRiceRange[ MAX_GR_ORDER_RESIDUAL ];                  //!< maximum value coded with Rice codes
+inline uint32_t g_auiGoRicePosCoeff0(int st, uint32_t ricePar)
+{
+  return (st < 2 ? 1 : 2) << ricePar;
+}
 
 // ====================================================================================================================
 // Intra prediction table
@@ -129,33 +125,36 @@ extern const TMatrixCoeff g_trCoreDST7P8  [TRANSFORM_NUMBER_OF_DIRECTIONS][  8][
 extern const TMatrixCoeff g_trCoreDST7P16 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 16][ 16];
 extern const TMatrixCoeff g_trCoreDST7P32 [TRANSFORM_NUMBER_OF_DIRECTIONS][ 32][ 32];
 
+extern const     int8_t   g_lfnst8x8[ 4 ][ 2 ][ 16 ][ 48 ];
+extern const     int8_t   g_lfnst4x4[ 4 ][ 2 ][ 16 ][ 16 ];
+
+extern const     uint8_t  g_lfnstLut[ NUM_INTRA_MODE + NUM_EXT_LUMA_MODE - 1 ];
 
 // ====================================================================================================================
 // Misc.
 // ====================================================================================================================
 extern SizeIndexInfo* gp_sizeIdxInfo;
-extern int8_t          g_aucLog2                       [MAX_CU_SIZE + 1];
-extern int8_t          g_aucNextLog2        [MAX_CU_SIZE + 1];
-extern int8_t          g_aucPrevLog2        [MAX_CU_SIZE + 1];
+
+extern const int       g_ictModes[2][4];
 
 inline bool is34( const SizeType& size )
 {
-  return ( size & ( ( int64_t ) 1 << ( g_aucLog2[size] - 1 ) ) );
+  return ( size & ( ( int64_t ) 1 << ( floorLog2(size) - 1 ) ) );
 }
 
 inline bool is58( const SizeType& size )
 {
-  return ( size & ( ( int64_t ) 1 << ( g_aucLog2[size] - 2 ) ) );
+  return ( size & ( ( int64_t ) 1 << ( floorLog2(size) - 2 ) ) );
 }
 
 inline bool isNonLog2BlockSize( const Size& size )
 {
-  return ( ( 1 << g_aucLog2[size.width] ) != size.width ) || ( ( 1 << g_aucLog2[size.height] ) != size.height );
+  return ( ( 1 << floorLog2(size.width) ) != size.width ) || ( ( 1 << floorLog2(size.height) ) != size.height );
 }
 
 inline bool isNonLog2Size( const SizeType& size )
 {
-  return ( ( 1 << g_aucLog2[size] ) != size );
+  return ( ( 1 << floorLog2(size) ) != size );
 }
 
 extern UnitScale     g_miScaling; // scaling object for motion scaling
@@ -168,7 +167,6 @@ extern CDTrace* g_trace_ctx;
 
 const char* nalUnitTypeToString(NalUnitType type);
 
-#if HEVC_USE_SCALING_LISTS
 extern const char *MatrixType   [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM];
 extern const char *MatrixType_DC[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM];
 
@@ -178,22 +176,21 @@ extern const int g_quantInterDefault8x8[8*8];
 
 extern const uint32_t g_scalingListSize [SCALING_LIST_SIZE_NUM];
 extern const uint32_t g_scalingListSizeX[SCALING_LIST_SIZE_NUM];
-#endif
 
 extern MsgLevel g_verbosity;
 
 
-extern const int8_t g_GbiLog2WeightBase;
-extern const int8_t g_GbiWeightBase;
-extern const int8_t g_GbiWeights[GBI_NUM];
-extern const int8_t g_GbiSearchOrder[GBI_NUM];
-extern       int8_t g_GbiCodingOrder[GBI_NUM];
-extern       int8_t g_GbiParsingOrder[GBI_NUM];
+extern const int8_t g_BcwLog2WeightBase;
+extern const int8_t g_BcwWeightBase;
+extern const int8_t g_BcwWeights[BCW_NUM];
+extern const int8_t g_BcwSearchOrder[BCW_NUM];
+extern       int8_t g_BcwCodingOrder[BCW_NUM];
+extern       int8_t g_BcwParsingOrder[BCW_NUM];
 
 class CodingStructure;
-int8_t getGbiWeight(uint8_t gbiIdx, uint8_t uhRefFrmList);
-void resetGbiCodingOrder(bool bRunDecoding, const CodingStructure &cs);
-uint32_t deriveWeightIdxBits(uint8_t gbiIdx);
+int8_t getBcwWeight(uint8_t bcwIdx, uint8_t uhRefFrmList);
+void resetBcwCodingOrder(bool bRunDecoding, const CodingStructure &cs);
+uint32_t deriveWeightIdxBits(uint8_t bcwIdx);
 
 constexpr uint8_t g_tbMax[257] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -209,8 +206,19 @@ constexpr uint8_t g_tbMax[257] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3,
 //! \}
 
 extern       uint8_t g_triangleMvStorage[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_DEPTH - MIN_CU_LOG2 + 1][MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2];
-
+// 7-tap/3-tap, direction, 2/4/8/16/32/64/128
+extern int16_t *g_triangleWeights[TRIANGLE_DIR_NUM][MAX_CU_DEPTH - MIN_CU_LOG2 + 2][MAX_CU_DEPTH - MIN_CU_LOG2 + 2];
 extern bool g_mctsDecCheckEnabled;
 
+class  Mv;
+extern Mv   g_reusedUniMVs[32][32][8][8][2][33];
+extern bool g_isReusedUniMVsFilled[32][32][8][8];
+
+extern const uint8_t g_paletteQuant[52];
+extern uint8_t g_paletteRunTopLut[5];
+extern uint8_t g_paletteRunLeftLut[5];
+
+const int g_IBCBufferSize = 256 * 128;
+
 #endif  //__TCOMROM__
 
diff --git a/source/Lib/CommonLib/RomLFNST.cpp b/source/Lib/CommonLib/RomLFNST.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d09e56cde128697853f9772d2f5b5bf06dd89fb3
--- /dev/null
+++ b/source/Lib/CommonLib/RomLFNST.cpp
@@ -0,0 +1,366 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     RomLFNST.cpp
+    \brief    LFNST Tables
+*/
+
+#include "Rom.h"
+
+
+#include <memory.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <iomanip>
+
+// ====================================================================================================================
+// LFNST Tables
+// ====================================================================================================================
+
+const uint8_t g_lfnstLut[ NUM_INTRA_MODE + NUM_EXT_LUMA_MODE - 1 ] =
+{//0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
+   0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+const int8_t g_lfnst8x8[ 4 ][ 2 ][ 16 ][ 48 ] = {
+ {  //0
+  {
+  { -117,   28,   18,    2,    4,    1,    2,    1,   32,  -18,   -2,    0,   -1,    0,    0,    0,   14,   -1,   -3,    0,   -1,    0,    0,    0,    2,    0,    0,    0,    0,    0,    0,    0,    3,    0,   -1,    0,    1,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0  },
+  {  -29,  -91,   47,    1,    9,    0,    3,    0,  -54,   26,   -8,    3,    0,    1,    0,    0,   33,    5,   -9,   -1,   -2,    0,   -1,    0,   -3,    3,    0,    0,    0,    0,    0,    0,    7,    2,   -2,    0,   -1,    1,    0,    0,    2,    1,   -1,    0,    0,    0,    0,    0  },
+  {  -10,   62,  -11,   -8,   -2,   -2,   -1,   -1,  -95,    3,   32,    0,    4,    0,    2,    0,   32,  -30,   -4,    4,   -1,    1,    0,    0,    6,    2,   -5,    0,    0,    0,    0,    0,    6,   -3,    0,    0,    2,    0,   -1,    0,    2,   -1,    0,    0,    1,    0,    0,    0  },
+  {  -15,   15,  -10,   -2,    1,    0,    1,    0,   10,  112,  -20,  -17,   -4,   -4,   -1,   -2,  -20,  -26,   31,    1,    0,    0,    0,    0,    2,  -16,   -1,    6,    0,    1,    0,    0,    1,   -4,    0,    0,    0,   -3,    0,    1,    0,   -1,    0,    0,    0,   -2,    0,    0  },
+  {   32,   39,   92,  -44,    4,  -10,    1,   -4,   26,   12,  -15,   13,   -5,    2,   -2,    0,   29,  -16,  -22,    8,    0,    1,    0,    1,  -20,    6,    4,   -3,    1,    0,    0,    0,    1,   -4,   -3,    2,   -4,    1,    0,    0,    1,   -1,   -2,    1,   -2,    0,    0,    0  },
+  {  -10,    1,   50,  -15,    2,   -3,    1,   -1,  -28,  -15,   14,    6,    1,    1,    1,    0,  -99,   -4,    9,    5,    5,    2,    2,    1,   44,  -10,  -11,    1,   -2,    0,   -1,    0,   -5,    4,   -3,    0,    8,   -1,   -2,    0,   -2,    1,   -1,    0,    4,    0,   -1,    0  },
+  {    1,  -33,  -11,  -14,    7,   -2,    2,    0,   29,  -12,   37,   -7,   -4,    0,   -1,    0,    6,  -99,    3,   26,   -1,    5,    0,    2,   14,   30,  -27,   -2,    1,   -1,    0,   -1,   -6,    6,    6,   -3,    1,    3,   -3,    0,   -1,    1,    1,    0,    0,    1,   -1,    0  },
+  {    0,    6,   -6,   21,   -4,    2,    0,    0,  -20,  -24, -104,   30,    5,    5,    1,    2,   -7,  -46,   10,  -14,    7,    0,    1,    0,    9,   21,    7,   -6,   -2,   -1,    0,   -1,    2,    2,    5,   -2,    0,    3,    4,   -1,    0,    0,    1,    0,    0,    1,    2,   -1  },
+  {  -13,  -13,  -37, -101,   29,  -11,    8,   -3,  -12,  -15,  -20,    2,  -11,    5,   -2,    1,  -12,   10,   26,   12,   -6,    0,   -1,    0,  -32,   -2,   11,    3,    3,   -1,    1,    0,   11,   -5,   -1,    6,   -4,    2,    1,    0,    3,   -1,    1,    2,   -1,    0,    0,    0  },
+  {    6,    1,  -14,  -36,    9,   -3,    2,    0,   10,    9,  -18,   -1,   -3,    1,    0,    0,   38,   26,  -13,   -1,   -5,   -1,   -1,    0,  102,    3,  -14,   -1,   -5,   -1,   -2,    0,  -29,   10,   10,    0,   10,   -4,   -1,    1,   -7,    1,    2,    1,    2,   -1,    0,    0  },
+  {  -12,   -2,  -26,  -12,   -9,    2,   -1,    1,   -3,   30,    4,   34,   -4,    0,   -1,    0,  -30,    3,  -92,   14,   19,    0,    3,    0,  -11,   34,   21,  -33,    1,   -2,    0,   -1,   -9,   -4,   18,    3,    2,    0,    0,   -2,   -1,   -1,    3,    0,    0,    0,    0,   -1  },
+  {    0,   -3,    0,   -4,  -15,    6,   -3,    1,   -7,  -15,  -28,  -86,   19,   -5,    4,   -1,   -5,  -17,  -41,   42,   -6,    2,   -1,    1,   -1,  -40,   37,   13,   -4,    2,   -1,    1,  -10,   13,   -1,   -4,    4,   -4,    3,    4,   -2,    2,   -1,   -1,    1,   -1,    1,    2  },
+  {   -1,    9,   13,    5,   14,   -2,    2,   -1,   -8,    3,   -4,  -62,    4,    1,    1,    0,  -12,   23,   16,  -11,  -17,    0,   -1,    0,  -11,   97,   -3,   -3,    0,   -6,    0,   -2,  -21,   -5,   23,    0,    2,   -2,   -1,    6,   -3,   -3,    1,    0,    0,    0,    0,    2  },
+  {    6,    2,   -3,    2,   10,   -1,    2,    0,    8,    3,   -1,  -20,    0,    1,    0,    0,   -4,    4,  -16,    0,   -2,    0,    1,    0,   34,   23,    6,   -7,   -4,   -2,   -1,    0,  108,   -5,  -30,    6,  -27,   10,    7,   -2,   11,   -3,   -1,    1,   -4,    1,    0,    1  },
+  {    6,    9,   -2,   35,  110,  -22,   11,   -4,   -2,    0,   -3,    1,  -18,   12,   -3,    2,   -5,   -4,  -22,    8,  -25,    3,    0,    0,   -3,  -21,    2,   -3,    9,   -2,    1,    0,   -7,    1,    3,   -5,    3,    0,   -1,    0,    0,    1,    0,   -1,    1,    0,    0,    0  },
+  {   -1,    7,   -2,    9,  -11,    5,   -1,    1,   -7,    2,  -22,    4,  -13,    0,   -1,    0,    0,   28,    0,   76,    4,   -6,    0,   -2,  -13,    5,  -76,   -4,   33,   -1,    3,    0,    9,   18,   -3,  -35,   -4,   -1,    6,    1,    1,    2,    0,   -3,   -1,    0,    2,    0  },
+  },
+  {
+  { -108,   48,    9,    1,    1,    1,    0,    0,   44,   -6,   -9,   -1,   -1,    0,   -1,    0,    9,   -9,   -1,    1,    0,    0,    0,    0,    3,   -1,    1,    0,    0,    0,    0,    0,    1,   -1,    0,    0,    1,    0,    0,    0,    0,   -1,    0,    0,    0,    0,    0,    0  },
+  {   55,   66,  -37,   -5,   -6,   -1,   -2,    0,   67,  -30,  -20,    4,   -2,    0,   -1,    0,  -31,  -19,   14,    4,    1,    1,    1,    0,   -6,    3,    5,   -2,    0,    0,    0,    0,   -7,   -1,    1,    0,   -1,    1,    1,    0,   -2,   -1,    1,    0,    0,    0,    0,    0  },
+  {    2,   86,  -21,  -13,   -4,   -2,   -1,   -1,  -88,    5,    6,    4,    5,    1,    1,    0,   14,   -5,    0,    3,    0,    0,    0,    0,   10,   -5,   -2,    0,   -1,    0,    0,    0,    6,   -5,    0,    1,    2,   -1,    0,    0,    1,   -1,    0,    0,    1,    0,    0,    0  },
+  {  -24,  -21,  -38,   19,    0,    4,   -1,    2,  -23,  -89,   31,   20,    2,    3,    1,    1,  -30,   26,   36,   -8,   -2,   -2,    0,   -1,   14,   18,   -7,   -9,   -1,   -1,    0,    0,    1,    3,   -2,   -1,    3,    2,   -2,   -1,    0,    1,    0,    0,    1,    1,   -1,    0  },
+  {    9,   20,   98,  -26,   -3,   -5,    0,   -2,   -9,  -26,   15,  -16,    2,    0,    1,    0,  -61,   -3,   -2,    3,    7,    1,    1,    0,   12,   16,   -6,   -1,    0,   -1,    0,    0,    2,    0,   -8,    1,    3,    1,   -1,    1,    0,   -1,   -2,    0,    1,    0,   -1,    0  },
+  {  -21,   -7,  -37,   10,    2,    2,   -1,    1,  -10,   69,   -5,   -7,   -2,   -2,    0,   -1,  -93,    2,   19,    0,    3,    0,    2,    0,   17,    4,    0,    0,   -1,    0,    0,    0,    5,   -4,   -2,    0,    4,   -2,    0,    1,    0,    0,    0,    0,    2,   -1,    0,    0  },
+  {  -10,  -25,    4,  -17,    8,   -2,    2,   -1,  -27,  -17,  -71,   25,    8,    2,    1,    1,   -4,  -66,   28,   36,   -5,    3,    0,    1,  -10,   20,   33,  -13,   -8,    0,    0,   -1,    3,    6,   -3,   -7,   -1,    3,    3,   -1,    1,    0,   -1,    0,    0,    1,    1,   -1  },
+  {    2,    5,   10,   64,   -9,    4,   -3,    1,   -4,    8,   62,    3,  -17,    1,   -2,    0,   -3,  -75,    5,  -14,    1,    4,    0,    1,  -36,    3,   18,   -4,    4,    0,    1,    0,    1,   14,   -2,   -8,   -2,    1,   -3,    0,    2,    2,   -1,   -2,    0,    1,   -1,    0  },
+  {  -11,  -15,  -28,  -97,    6,   -1,    4,   -1,    7,    3,   57,  -15,   10,   -2,    0,   -1,   -1,  -27,   13,    6,    1,   -1,    0,    0,  -34,   -6,    0,    3,    4,    1,    2,    0,   -2,    8,    1,    5,   -2,    0,   -3,    1,    1,    1,    0,    2,   -1,    0,   -1,    0  },
+  {    9,   13,   24,   -6,    7,   -2,    1,   -1,   16,   39,   20,   47,   -2,   -2,   -2,    0,   28,   23,   76,   -5,  -25,   -3,   -3,   -1,    6,   36,   -7,  -39,   -4,   -1,    0,   -1,    2,   -4,  -18,   -3,   -1,   -1,   -2,   -2,    1,   -2,   -2,    0,    0,    0,   -1,   -1  },
+  {   -7,   11,   12,    7,    2,   -1,    0,   -1,  -14,   -1,  -24,   11,    2,    0,    0,    0,  -20,   48,   11,  -13,   -5,   -2,    0,   -1, -105,  -19,   17,    0,    6,    2,    3,    0,  -14,    8,    8,    2,    1,    2,   -1,   -2,    3,    0,   -1,    0,    0,    0,    0,    0  },
+  {    0,    0,    7,   -6,   23,   -3,    3,   -1,    5,    1,   18,   96,   13,   -9,   -1,   -1,  -21,   -7,  -42,   14,  -24,   -3,    0,    0,   11,  -47,   -7,    3,   -5,    9,    1,    2,    0,   -1,   19,   -1,    1,    0,   -1,   -6,   -1,    1,    2,    0,    1,    0,    0,   -2  },
+  {   -2,   -6,   -1,  -10,    0,    1,    1,    0,   -7,   -2,  -28,   20,  -15,    4,   -3,    1,   -2,  -32,   -2,  -66,    3,    7,    1,    2,  -11,   13,  -70,    5,   43,   -2,    3,    0,    8,  -14,   -3,   43,   -1,    2,    7,   -1,    1,   -2,    1,    3,   -1,    1,    1,    0  },
+  {   -1,    6,  -16,    0,   24,   -3,    1,   -1,    2,    6,    6,   16,   18,   -7,    1,   -1,   -3,   11,  -63,    9,    4,   -5,    2,   -1,  -22,   94,   -4,   -6,   -4,   -4,    1,   -2,   10,   23,  -19,   -5,    0,   -6,   -4,    6,    3,   -2,    1,    1,    0,   -1,    0,    0  },
+  {   -5,   -6,   -3,  -19, -104,   18,   -4,    3,    0,    6,    0,   35,  -41,   20,   -2,    2,   -2,   10,  -18,   16,   21,    3,   -2,    0,   -2,   11,    6,  -10,    6,   -3,   -1,    0,   -1,    5,   -1,   -6,   -1,   -1,   -1,   -1,   -1,    0,    0,    0,    0,    0,    0,   -1  },
+  {   -1,   -2,    0,   23,   -9,    0,   -2,    0,    1,    1,    8,   -1,   29,    1,    1,    0,    3,   -6,   13,   76,   30,  -11,   -1,   -2,  -26,   -8,  -69,    7,   -9,   -7,    3,   -1,  -10,  -34,  -25,   13,   -1,    0,   11,    5,    1,   -1,    1,   -2,    0,    0,    2,    0  },
+  }
+ },
+ {  //1
+  {
+  {  110,  -49,   -3,   -4,   -1,   -1,    0,   -1,  -38,   -1,   10,    0,    2,    0,    1,    0,   -9,   13,    1,   -2,    0,    0,    0,    0,   -4,    2,   -3,    0,    0,    0,    0,    0,   -2,    2,    0,    1,   -1,    1,    0,    0,   -1,    1,    0,    0,   -1,    0,    0,    0  },
+  {  -43,  -19,   17,   -1,    3,    0,    1,    0,  -98,   46,   14,   -1,    2,    0,    1,    0,   26,   26,  -15,   -3,   -2,   -1,   -1,    0,   11,   -7,   -9,    2,    0,    0,    0,    0,    9,   -3,   -1,    2,    3,   -3,    0,    0,    4,   -1,    0,    0,    2,   -1,    0,    0  },
+  {  -19,   17,   -7,    3,   -2,    1,   -1,    0,  -32,  -59,   29,    3,    4,    0,    2,    0,  -72,   43,   34,   -9,    3,   -2,    1,   -1,   13,   36,  -18,  -10,    0,   -2,    0,   -1,    3,    0,  -12,    3,    6,    1,   -3,    2,    1,   -1,   -2,    0,    3,    1,   -1,    1  },
+  {  -35, -103,   39,    1,    7,    0,    2,    0,   38,  -13,   25,   -6,    1,   -1,    0,    0,   -1,    7,    6,   -7,    1,   -1,    0,    0,  -13,   14,    2,   -4,    2,   -1,    0,    0,   -2,   11,   -6,   -2,   -2,    4,   -3,    0,    0,    3,   -2,    0,   -1,    1,   -1,    0  },
+  {    9,    5,   -6,   -1,   -1,    0,   -1,    0,   42,    4,   21,  -11,    1,   -3,    1,   -1,   21,   70,  -32,  -21,    0,   -4,   -1,   -1,   34,  -26,  -57,   11,    4,    2,    0,    1,   -4,  -32,    5,   24,    1,   -6,   12,    4,   -3,   -2,    4,   -2,    0,   -1,    0,    0  },
+  {   -5,   -5,  -28,    9,   -3,    2,   -1,    1,  -20,  -78,   22,   16,    1,    3,    0,    1,   80,   -6,   25,   -5,   -4,   -1,   -1,    0,    6,  -24,    7,   -9,    0,    0,    0,    0,   -7,    3,   13,   -4,   -3,    5,    1,   -5,   -2,    3,    1,   -2,   -1,    2,   -1,   -2  },
+  {   14,   17,   27,  -12,    1,   -3,    1,   -1,    8,   19,  -13,    4,   -2,    1,   -1,    0,   48,   -1,   48,  -15,   -4,   -2,   -1,   -1,    1,   60,  -28,  -42,    5,   -6,    1,   -2,   11,  -11,  -51,   11,   -2,  -10,   -2,   13,    2,   -6,   -4,    4,   -2,   -3,    2,    2  },
+  {    7,   35,   17,   -4,   -1,    0,    0,    0,    3,    8,   54,  -17,    1,   -2,    1,   -1,   10,   14,  -11,  -34,    4,   -4,    1,   -1,  -80,   -7,   -6,    2,   15,    0,    3,    0,  -16,   46,    1,    3,    2,    7,  -24,    0,    2,   -2,   -5,    8,    1,   -1,   -2,    2  },
+  {  -13,  -27, -101,   24,   -8,    6,   -3,    2,   11,   43,    6,   28,   -6,    3,   -1,    1,   -3,   14,   21,  -12,   -7,   -2,   -1,   -1,  -23,   10,   -4,  -12,    3,    0,    1,    0,    2,    9,  -10,    0,    1,   -5,   -4,    4,    2,   -2,    2,    2,    0,   -2,    1,    0  },
+  {  -11,  -13,   -3,  -10,    3,   -1,    1,    0,  -19,  -19,  -37,    8,    4,    2,    0,    1,  -12,  -30,    3,   -9,    5,    0,    1,    0,  -56,   -9,  -47,    8,   21,    1,    4,    1,  -11,  -30,   10,   59,   -2,    8,   41,    8,    2,    5,    6,   -7,   -1,    3,    5,   -2  },
+  {   -4,  -10,  -24,  -11,    3,   -2,    0,   -1,   -6,  -37,  -45,  -17,    8,   -2,    2,   -1,   17,   14,  -58,   14,   15,    0,    2,    0,  -10,   34,   -7,   28,    4,   -1,    1,    0,   23,   34,  -31,    4,   10,  -22,  -30,   22,    4,  -15,    9,   20,    2,   -5,    9,    4  },
+  {   -2,    1,   13,  -17,    3,   -5,    1,   -2,    3,    0,  -55,   22,    6,    1,    1,    0,    8,   74,   21,   40,  -14,    0,   -2,    0,  -36,   -8,   11,  -13,  -23,    1,   -3,    0,  -36,    6,   16,  -14,    2,   19,   -4,  -12,   -1,    0,   -7,   -3,    0,    2,   -2,   -1  },
+  {    3,    1,    5,  -15,    1,   -2,    1,   -1,    7,    4,   -7,   29,   -1,    2,   -1,    1,    8,    3,   12,  -14,   -9,   -1,   -1,    0,    4,   29,  -15,   31,   10,    4,    1,    1,   61,   22,   55,   14,   13,    3,   -9,  -65,    1,  -11,  -21,   -7,    0,    0,   -1,    3  },
+  {   -4,   -8,   -1,  -50,    6,   -4,    2,   -2,   -1,    5,  -22,   20,    6,    1,    0,    0,  -16,  -15,   18,  -29,  -11,    2,   -2,    1,   40,  -45,  -19,  -22,   31,    2,    4,    1,  -25,   41,    0,   12,    9,    7,  -42,   12,   -3,  -14,    2,   28,    5,    1,    6,    2  },
+  {    5,   -1,   26,  102,  -13,   12,   -4,    4,   -4,   -2,  -40,   -7,  -23,    3,   -5,    1,   -1,    5,    8,  -23,    7,    2,    1,    1,   10,  -11,  -13,   -3,   12,   -3,    2,    0,   -9,   23,    4,    9,   14,    9,  -14,   -4,    0,  -12,   -7,    6,    3,    0,    6,    3  },
+  {   -5,   -6,  -27,  -22,  -12,    0,   -3,    0,   -5,    8,  -20,  -83,    0,    0,    0,    0,    9,    7,   24,  -20,   41,    3,    6,    1,   15,   20,   12,   11,   17,   -9,    1,   -2,  -26,   -1,   18,   -1,  -12,   32,    3,  -18,   -5,   10,  -25,   -5,   -2,    1,   -8,   10  },
+  },
+  {
+  {   80,  -49,    6,   -4,    1,   -1,    1,   -1,  -72,   36,    4,    0,    1,    0,    0,    0,   26,    0,  -12,    2,   -2,    1,   -1,    0,   -7,   -9,    6,    1,    0,    0,    0,    0,    3,    5,   -1,   -2,   -2,   -2,   -1,    1,    1,    1,    0,    0,   -1,   -1,    0,    0  },
+  {  -72,   -6,   17,    0,    3,    0,    1,    0,  -23,   58,  -21,    2,   -3,    1,   -1,    0,   55,  -46,   -1,    6,   -2,    1,   -1,    0,  -22,    7,   17,   -7,    2,   -1,    1,    0,    9,    5,  -12,    1,   -3,   -4,    4,    2,    4,    1,   -2,   -1,   -1,   -1,    1,    0  },
+  {  -50,   19,  -15,    4,   -1,    1,   -1,    1,  -58,   -2,   30,   -3,    4,   -1,    2,    0,    6,   57,  -34,    0,   -2,    0,   -1,    0,   34,  -48,   -2,   14,   -4,    3,   -1,    1,  -10,    7,   21,  -10,    6,    1,  -11,    0,   -1,   -1,    4,    2,    3,    0,   -2,   -1  },
+  {  -33,  -43,   28,   -7,    4,   -2,    2,   -1,  -38,   11,   -8,    4,    1,    1,    0,    0,  -55,   24,   26,   -5,    2,   -1,    1,    0,   15,   46,  -40,   -1,   -1,    0,   -1,    0,   17,  -38,    1,   17,   -3,   11,   15,  -11,    3,   -1,  -10,    1,    0,    1,    3,    2  },
+  {   10,   66,  -21,   -3,   -3,    0,   -1,    0,  -53,  -41,   -2,   16,   -1,    4,   -1,    1,   36,   -5,   41,  -20,    3,   -3,    1,   -1,  -30,   26,  -32,   -3,    7,   -2,    2,   -1,   15,   -8,    1,   17,   -1,   -2,    4,   -8,    2,    0,   -1,    3,    0,    0,    0,   -1  },
+  {   18,   14,   13,   -9,    2,   -2,    1,   -1,   34,   32,  -31,   12,   -5,    2,   -2,    1,   40,    4,   -4,   -9,   -3,   -2,   -1,   -1,   27,  -31,  -43,   19,   -2,    3,   -1,    1,    7,  -49,   52,   10,  -11,   22,    7,  -26,   -1,   -6,   -9,    6,   -2,    2,    4,   -2  },
+  {   21,   66,   -1,    9,   -4,    2,   -1,    1,  -21,   41,  -30,  -10,    0,   -2,    0,   -1,  -35,  -17,   -3,   26,   -6,    5,   -2,    2,   56,    3,   18,  -25,   -1,   -2,   -1,   -1,  -15,  -13,  -27,    9,    9,   -6,   20,    5,   -3,    2,   -6,   -9,    3,   -3,    1,    5  },
+  {    1,   -6,  -24,   17,   -5,    3,   -2,    1,   24,   10,   39,  -21,    5,   -4,    2,   -1,   33,   32,  -30,    4,   -3,   -1,   -1,    0,   -4,   13,  -16,  -10,    0,   -1,    0,    0,   24,  -26,  -37,   33,    5,  -32,   55,   -5,   -7,   22,  -14,  -22,    1,   -9,   -3,   13  },
+  {    9,   33,  -24,    1,    4,    0,    1,    0,    6,   50,   26,    1,  -10,    0,   -2,    0,  -27,    1,  -28,  -21,   16,   -5,    3,   -2,  -23,   36,   -2,   40,  -17,    4,   -3,    1,   43,  -13,    4,  -41,  -19,   -2,  -24,   17,   11,   -4,    8,    4,   -3,   -3,   -3,   -3  },
+  {   -7,   -9,  -32,   14,   -3,    3,   -1,    1,  -23,  -28,    0,   -5,   -1,    0,    0,    0,  -36,  -59,  -24,   14,    4,    2,    1,    1,  -23,  -26,   23,   26,   -3,    5,    0,    2,   10,  -26,   38,    7,  -12,   11,   42,  -22,   -5,   20,  -14,  -15,   -1,   -2,    1,    6  },
+  {    6,   30,   69,  -18,    5,   -4,    3,   -1,   -3,  -11,  -34,  -16,    9,   -4,    2,   -1,  -16,   35,  -35,   30,   -9,    3,   -2,    1,  -57,  -13,    6,    4,   -5,    5,   -1,    1,   28,   10,    4,    7,    0,  -15,    7,  -10,   -1,    7,   -2,    2,    1,   -3,    0,    0  },
+  {    1,   -8,   24,   -3,    7,   -2,    2,   -1,   -6,  -51,   -6,   -4,   -5,    0,   -1,    0,   38,   -1,    0,   25,    6,    2,    1,    1,   47,   20,   35,    1,  -27,    1,   -5,    0,   37,  -37,   -9,  -47,  -28,    5,    0,   18,    8,    6,    0,   -8,   -4,   -3,   -3,    1  },
+  {    4,   10,    4,   17,   -9,    4,   -2,    1,    5,   14,   32,  -15,    9,   -3,    2,   -1,    7,   13,   19,   15,   -8,    1,   -1,    0,    3,   25,   30,  -18,    1,   -2,    0,   -1,   11,   24,   22,  -11,   -3,   37,  -13,  -58,   -5,   12,  -63,   26,    9,  -15,   11,    8  },
+  {   -3,   -9,  -23,   10,  -10,    3,   -3,    1,   -5,  -14,  -16,  -27,   13,   -5,    2,   -1,   -1,  -13,  -30,   11,   -5,    2,   -1,    0,   -5,   -8,  -22,  -16,   10,    0,    1,    0,    0,  -29,  -27,    6,  -27,  -10,  -30,    9,   -3,  -10,   -7,   77,    9,  -13,   45,   -8  },
+  {    2,   11,   22,    2,    9,   -2,    2,    0,   -6,   -7,   20,  -32,   -3,   -4,    0,   -1,   13,   -5,  -28,    6,   18,   -4,    3,   -1,  -26,   27,  -14,    6,  -20,    0,   -2,    0,  -76,  -26,   -4,   -7,   12,   51,    5,   24,    7,  -17,  -16,  -12,   -5,    4,    2,   13  },
+  {    2,   -3,    8,   14,   -5,    3,   -1,    1,   -2,  -11,    5,  -18,    8,   -3,    2,   -1,   12,  -23,  -19,   22,    2,    0,    1,    0,   23,   41,   -7,   35,  -10,    4,   -1,    1,    5,    7,   23,    5,   69,  -38,   -8,  -32,  -15,  -31,   24,   11,    2,   18,   11,  -15  },
+  }
+ },
+ {  //2
+  {
+  { -121,   33,    4,    4,    1,    2,    0,    1,   -1,   -1,    1,    0,    0,    0,    0,    0,   24,   -5,   -1,   -1,    0,    0,    0,    0,    5,   -1,    0,    0,    0,    0,    0,    0,    3,   -1,    0,    0,    2,   -1,    0,    0,    2,   -1,    0,    0,    1,    0,    0,    0  },
+  {    0,   -2,    0,    0,    0,    0,    0,    0,  121,  -23,   -7,   -3,   -2,   -1,   -1,    0,   17,    1,   -2,    0,    0,    0,    0,    0,  -27,    4,    2,    0,    0,    0,    0,    0,  -12,    2,    1,    0,   -5,    1,    0,    0,   -1,    0,    0,    0,   -2,    0,    0,    0  },
+  {  -20,   19,   -5,    2,   -1,    1,    0,    0,   16,    3,   -2,    0,    0,    0,    0,    0, -120,   14,    8,    1,    3,    1,    1,    0,  -18,   -2,    3,    0,    1,    0,    0,    0,   17,   -3,   -1,    0,    6,   -1,   -1,    0,    2,    0,    0,    0,    2,    0,    0,    0  },
+  {   32,  108,  -43,   10,   -9,    3,   -3,    1,    4,   19,   -7,    1,   -1,    0,    0,    0,   11,  -30,    9,   -2,    1,   -1,    0,    0,    0,   -8,    2,    0,    0,    0,    0,    0,   -7,   -1,    2,    0,   -3,   -1,    1,    0,   -2,   -2,    1,    0,    0,    0,    0,    0  },
+  {   -3,    0,   -1,    0,    0,    0,    0,    0,  -29,   11,   -2,    1,    0,    0,    0,    0,   12,    7,   -1,    0,    0,    0,    0,    0, -117,   12,    9,    1,    3,    0,    1,    0,  -32,   -3,    3,    0,   12,   -2,   -1,    0,    7,    0,    0,    0,    1,    0,    0,    0  },
+  {   -4,  -12,   -3,    1,   -1,    0,    0,    0,   19,  105,  -31,    7,   -6,    1,   -2,    0,    9,   46,   -6,    0,    0,    0,    0,    0,    8,  -29,    9,   -3,    1,    0,    0,    0,   -3,  -19,    3,    0,   -4,   -6,    1,    0,    0,    0,    0,    0,    0,   -1,    0,    0  },
+  {    7,    1,    2,    0,    0,    0,    0,    0,    4,    3,   -2,    0,    0,    0,    0,    0,   22,   -8,    1,   -1,    0,    0,    0,    0,  -28,   -9,    4,    0,    1,    0,    0,    0,  117,  -10,   -8,    0,   32,    1,   -4,    0,    3,    1,   -1,    0,   -3,    1,    0,    0  },
+  {   -8,  -31,   14,   -4,    3,   -1,    1,    0,    9,   43,    0,    1,   -1,    0,    0,    0,  -13, -105,   17,   -2,    2,    0,    0,    0,   -8,  -25,   -3,    0,    0,    0,    0,    0,   -7,   32,   -5,    1,   -1,    4,    0,    0,    2,   -1,    0,    0,    1,    0,   -1,    0  },
+  {  -15,  -43, -100,   23,  -12,    6,   -4,    2,   -6,  -17,  -48,   10,   -5,    2,   -1,    1,    1,   -5,   19,   -6,    3,   -1,    1,    0,    2,    7,   15,   -3,    1,   -1,    0,    0,    4,   10,    5,   -1,    0,    3,    1,    0,   -2,    1,    2,    0,   -1,    1,    1,    0  },
+  {   -3,    1,    2,    0,    0,    0,    0,    0,   -6,    3,    1,    0,    0,    0,    0,    0,    0,    3,   -2,    0,    0,    0,    0,    0,  -20,    8,   -2,    0,    0,    0,    0,    0,   30,   13,   -3,    0, -116,    6,   10,    0,  -35,   -5,    4,    0,   -3,   -1,    0,    0  },
+  {   -1,   -6,   -3,    2,   -1,    0,    0,    0,   -6,  -35,    9,    0,    2,    0,    0,    0,    1,   -6,   11,   -2,    2,    0,    1,    0,   -9, -100,   17,   -1,    1,    0,    0,    0,  -10,  -63,    1,    2,  -17,    3,   -4,    0,   -1,    9,   -1,    0,    3,    4,   -1,    0  },
+  {   -5,  -14,  -48,    2,   -5,    1,   -2,    0,   10,   24,   99,  -17,   10,   -4,    3,   -1,    4,   14,   32,    0,    2,    0,    1,    0,   -4,    0,  -39,    6,   -4,    1,   -1,    0,    2,   -3,   -4,    0,    2,   -2,   -2,    0,    0,    0,   -1,    0,    0,   -1,   -1,    0  },
+  {   -2,    0,    2,    0,    0,    0,    0,    0,   -2,    0,    1,    0,    0,    0,    0,    0,   -1,   -1,    1,   -1,    0,    0,    0,    0,   -1,   -4,    2,    0,    0,    0,    0,    0,   -8,   -2,   -1,    1,   30,    4,   -4,    1, -102,    4,    8,   -1,  -69,   -2,    6,   -1  },
+  {   -2,  -10,   -4,    0,    0,    0,    0,    0,    3,   11,   -1,   -1,    0,    0,    0,    0,   -6,  -40,  -15,    6,   -2,    1,    0,    0,    5,   57,   -6,    2,    0,    0,    0,    0,    1,  -95,   18,   -6,  -10,  -34,   -2,    0,   -4,   17,   -2,    0,    0,    2,    1,    0  },
+  {   -2,   -3,  -25,   -2,   -3,    0,   -1,    0,   -1,   -3,   -1,    4,   -2,    2,    0,    1,   -7,   -8,  -97,   17,   -9,    3,   -3,    1,   -8,  -26,  -61,   -1,   -3,   -1,   -1,   -1,    2,   10,   24,   -7,    5,    9,   19,   -1,    0,    1,    4,    0,   -2,    0,    1,    0  },
+  {    4,   -4,   28,  103,  -42,   24,   -9,    7,    1,    2,    4,    0,    3,   -1,    0,    0,   -1,    0,   -9,  -42,   17,   -9,    3,   -2,   -1,    1,  -14,    6,   -4,    2,   -1,    0,   -1,   -2,   -4,    4,    0,    3,    1,   -1,    0,    2,    0,   -2,    2,    0,    0,    0  },
+  },
+  {
+  {   87,  -41,    3,   -4,    1,   -1,    0,   -1,  -73,   28,    2,    1,    1,    1,    0,    0,   30,   -5,   -6,    1,   -1,    0,    0,    0,   -8,   -3,    3,    0,    0,    0,    0,    0,    3,    2,   -1,    0,   -2,   -1,    0,    0,    1,    1,    0,    0,   -1,    0,    0,    0  },
+  {  -75,    4,    7,    0,    2,    0,    1,    0,  -41,   36,   -7,    3,   -1,    1,    0,    0,   72,  -29,   -2,    0,   -1,    0,   -1,    0,  -37,    6,    7,   -2,    1,    0,    0,    0,   12,    3,   -4,    0,   -3,   -2,    1,    0,    4,    0,    0,    0,   -1,    0,    0,    0  },
+  {   26,  -44,   22,   -6,    4,   -2,    1,   -1,   77,   24,  -22,    2,   -4,    0,   -1,    0,    7,  -38,   10,    0,    1,    0,    0,    0,  -51,   27,    4,   -3,    2,   -1,    1,    0,   31,   -5,   -8,    3,  -14,    0,    5,   -1,    6,    1,   -3,    0,   -4,   -1,    1,    0  },
+  {  -39,  -68,   37,   -7,    6,   -2,    2,    0,   -9,   56,  -21,    1,   -2,    0,   -1,    0,  -45,    4,   -3,    6,   -1,    2,    0,    1,   49,  -13,    3,   -3,   -1,    0,    0,    0,  -19,    2,    0,    0,    5,    1,    1,    0,   -2,    0,   -1,    0,    1,    0,    0,    0  },
+  {   10,  -20,    2,    0,    1,    0,    0,    0,   50,   -1,    8,   -5,    1,   -1,    0,    0,   66,   17,  -24,    4,   -3,    1,   -1,    0,   13,  -49,   15,    1,    0,    0,    0,    0,  -53,   34,    6,   -5,   30,   -7,  -11,    3,  -11,   -2,    5,    1,    4,    2,   -1,   -1  },
+  {  -21,  -45,    8,   -2,    3,   -1,    1,    0,   -7,  -30,   26,   -8,    3,   -1,    1,   -1,   -9,   69,  -33,    5,   -2,    0,   -1,    0,  -44,  -31,   10,    7,   -2,    2,    0,    1,   49,    7,    2,   -6,  -23,   -3,   -2,    2,    9,    4,    0,    0,   -2,   -1,   -1,    0  },
+  {   -4,   -2,  -55,   28,   -8,    5,   -3,    2,   -2,   37,   43,  -19,    1,   -2,    1,   -1,  -47,  -34,  -27,    5,    4,   -1,    1,    0,  -39,   -2,   27,    4,   -2,    1,    0,    0,  -11,   32,   -8,   -7,   27,  -12,   -6,    6,  -13,    0,    4,   -3,    3,   -1,   -2,    1  },
+  {    2,   19,   47,  -23,    6,   -4,    2,   -1,  -23,  -22,  -44,   17,   -2,    2,   -1,    0,  -33,    3,   22,   -2,   -4,    1,   -1,    0,  -58,  -17,    6,   -6,    7,   -1,    1,    0,  -23,   40,   -2,    5,   43,  -11,   -8,   -1,  -18,   -4,    5,    2,    4,    3,    0,   -1  },
+  {  -19,  -62,   -9,    3,    0,    0,    0,    0,  -12,  -56,   27,   -7,    3,   -1,    1,    0,    7,   -8,   16,   -6,    4,   -2,    1,   -1,  -15,   54,  -23,    2,   -1,    0,    0,    0,  -42,  -25,    4,    6,   34,    8,    2,   -2,  -15,   -1,    0,   -1,    3,    2,    0,    1  },
+  {    1,    9,   -5,    0,   -1,    0,    0,    0,    0,   22,   -1,    2,    0,    1,    0,    0,  -13,   17,    0,   -2,    0,   -1,    0,    0,  -46,  -10,  -10,    4,   -1,    1,    0,    0,  -80,  -27,   20,   -4,  -66,   23,   -2,   -2,   20,   -3,   -2,    3,  -14,    2,    3,   -1  },
+  {    5,   17,   -9,    0,   -2,    1,    0,    0,   13,   54,   -2,    7,   -1,    1,    0,    0,    4,   51,   -3,   -6,   -1,   -1,    0,    0,  -20,    6,  -34,    9,   -2,    2,   -1,    0,   16,  -52,   28,    1,   59,   15,   -8,   -5,  -28,   -7,    2,    2,   10,    3,    0,   -1  },
+  {    7,   27,   56,   -2,   10,   -3,    3,   -1,   -2,   -6,    8,  -28,    3,   -4,    1,   -1,   -1,   -4,  -68,   35,   -5,    5,   -2,    1,    0,   35,   43,   -4,   -6,    1,   -1,    0,  -14,  -38,  -12,  -10,    9,    5,    7,    6,   -9,    7,   -4,   -3,    4,   -4,    0,    3  },
+  {    0,    0,   19,   -4,    3,   -2,    2,   -1,   -3,  -13,   10,   -4,    1,    0,    0,    0,   -6,  -37,  -18,   -5,    2,   -2,    1,   -1,    6,   -6,   -7,   25,   -6,    4,   -1,    1,   16,   10,   55,  -24,   15,   46,  -52,    1,   35,  -43,   10,   12,  -23,   13,    5,   -8  },
+  {   -3,    0,  -27,  -80,   40,  -16,    6,   -4,    4,    3,   31,   61,  -22,    7,   -1,    1,   -4,   -7,  -26,   -6,  -10,    6,   -4,    1,    3,    8,   14,  -18,   15,   -5,    2,   -1,   -2,   -4,   -1,   13,    0,    2,   -4,   -3,    3,   -1,    2,    1,   -2,    0,   -2,   -1  },
+  {    1,    2,   -8,    6,   -1,    1,    0,    0,    2,    8,   -5,   -1,    0,    0,    0,    0,    1,   24,    3,    5,   -1,    1,    0,    0,   -3,   12,    6,  -10,    1,   -1,    0,    0,   -9,   -1,  -25,   10,   45,  -11,   18,    2,   86,    1,  -13,   -4,  -65,   -6,    7,    2  },
+  {   -4,  -18,  -57,    8,   -8,    1,   -3,    0,   -5,  -20,  -69,    7,   -6,    2,   -2,    1,    1,    4,    0,   33,   -7,    5,   -2,    1,    0,   -9,   53,  -22,    3,   -1,    0,    0,    4,  -27,   -2,   -9,    5,   36,  -13,    5,   -7,  -17,    1,    2,    4,    6,    4,   -1  },
+  }
+ },
+ {  //3
+  {
+  { -115,   37,    9,    2,    2,    1,    1,    0,   10,  -29,    8,    0,    1,    0,    1,    0,   23,   -8,   -8,    1,   -1,    0,    0,    0,    3,    3,   -2,   -1,    0,    0,    0,    0,    4,    0,    0,   -1,    1,    1,    0,    0,    2,    0,    0,    0,    0,    0,    0,    0  },
+  {   15,   51,  -18,    0,   -3,    0,   -1,    0,  -95,    7,   34,   -3,    5,   -1,    2,    0,   23,  -47,    1,    6,    0,    1,    0,    1,    8,    5,  -12,    0,   -1,    0,    0,    0,    3,   -3,    1,   -1,    2,    1,   -2,    0,    1,   -1,    0,    0,    1,    1,   -1,    0  },
+  {   29,  -22,   16,   -6,    3,   -2,    1,   -1,   -4,  -80,   12,   15,    0,    3,    0,    1,   45,    7,  -59,    7,   -2,    1,   -1,    0,  -15,   41,   -3,  -16,    2,   -3,    0,   -1,    1,    0,    7,   -2,   -3,    6,    1,   -2,    0,    0,    1,    0,   -1,    2,    0,   -1  },
+  {  -36,  -98,   25,    5,    4,    1,    2,    1,  -59,   11,  -17,    1,    1,    1,    0,    0,    6,  -13,    7,   -3,    0,    0,    0,    0,   14,   -4,  -14,    3,   -1,    0,    0,    0,    2,    8,   -3,   -5,    2,    0,    0,    0,    0,    3,    0,   -1,    1,    0,    0,    0  },
+  {   -6,   18,    3,   -3,   -1,    0,    0,    0,  -50,   -5,  -38,   12,    0,    2,    0,    1,    3,   67,   -7,  -40,    3,   -6,    1,   -3,  -12,  -13,   65,   -3,  -10,    0,   -1,    0,    9,  -20,   -5,   22,   -2,    0,    0,   -1,    2,   -3,   -2,    3,   -1,    0,    1,    0  },
+  {    4,   15,   52,  -13,    5,   -3,    2,   -1,  -17,  -45,   16,   24,   -2,    4,   -1,    2,  -87,   -8,  -14,    7,    8,    1,    2,    0,   23,  -35,   -6,   -3,    1,    1,    0,    0,    2,    5,  -17,    0,    3,   -1,   -1,   -5,    0,    1,   -4,    0,    1,    0,    0,   -2  },
+  {  -20,   -7,  -43,    4,    0,    1,   -1,    1,   -7,   35,    0,   12,   -4,    1,   -1,    0,  -51,   -2,  -57,    5,   15,    0,    4,    0,    7,   39,    5,  -55,    1,   -7,    1,   -3,    1,  -10,   41,    2,    4,   -3,   -2,    3,   -1,   -2,    7,    1,    1,   -1,   -1,    0  },
+  {    4,   29,    1,   26,   -5,    4,   -2,    1,  -17,   -7,  -73,    6,    6,    2,    1,    1,   -5,   21,   -3,    5,   -1,   -3,    0,   -1,  -11,    2,  -52,   -3,   27,   -2,    5,    0,    0,   27,    8,  -58,    2,   -5,   25,    3,    0,    3,    0,   -5,    0,   -2,    7,    0  },
+  {   12,   13,   10,    2,   -1,    3,   -1,    1,   17,   -2,  -46,   12,    7,    0,    2,    0,   16,  -45,   -9,  -53,    6,    1,    1,    0,   70,   16,    8,   -4,  -37,    1,   -7,    0,  -12,   29,    3,   21,    4,    0,    5,   -1,   -3,    4,    1,    4,    2,    0,    1,    0  },
+  {    5,   20,   90,  -17,    4,   -3,    2,   -1,    6,   66,    8,   28,   -7,    3,   -1,    1,   29,    5,  -19,   12,    9,   -1,    1,    0,  -10,   14,   -1,  -13,    7,    0,    1,    0,    0,   -6,   13,   -4,    0,   -4,    1,    5,    0,   -1,   -1,    1,    0,   -1,    0,    0  },
+  {   -3,   -4,  -34,  -12,    2,   -1,   -1,    0,    5,   25,   11,   43,  -10,    4,   -2,    1,   23,   20,  -40,   12,   21,   -3,    4,   -1,   25,  -28,  -10,    5,    8,    6,    0,    2,   -4,   21,  -64,   -8,   -5,   19,   10,  -48,    3,   -1,   10,   -3,    0,    4,    3,   -6  },
+  {   -1,   -3,    2,   19,   -2,    4,   -1,    2,    9,    3,  -35,   22,   11,    1,    2,    0,   -7,  -65,  -19,  -22,   11,    4,    2,    1,  -75,  -18,    3,   -1,  -10,    2,    0,    1,    2,  -35,  -27,    4,    1,    8,  -17,  -19,    3,    0,    3,   -6,    0,    2,   -1,   -2  },
+  {   10,   -4,   -6,   12,    5,    1,    1,    0,   11,   -9,  -12,   -2,   -7,    0,   -1,    0,   33,  -10,   -4,   18,   18,   -4,    4,   -1,   28,  -72,    1,  -49,   15,    2,    2,    1,   56,  -23,   22,   -1,    4,   -1,  -15,   26,    6,    4,  -10,    0,    0,    2,   -3,    2  },
+  {    4,    6,   14,   53,   -4,    4,    0,    2,    0,   -1,  -20,  -13,    3,    2,   -1,    1,   -3,    1,   -5,   35,  -16,   -6,   -1,   -2,   46,   29,   13,   21,   37,   -5,    4,   -1,  -10,  -53,  -18,    8,    9,   12,  -41,  -25,   -2,    2,   13,  -16,    4,    1,   -5,    1  },
+  {    2,    9,   13,   37,   19,    6,    2,    2,   -9,   -3,   -9,  -28,  -20,   -4,   -3,   -1,    1,   18,    9,   28,   24,    6,    2,    2,  -20,   -5,  -25,  -33,  -36,    9,   -2,    2,  -13,   42,    1,   57,  -22,   -2,  -25,  -28,    5,    6,   19,  -12,   -5,   -3,   -2,    4  },
+  {    3,   -3,   12,   84,  -12,    8,   -2,    3,    6,   13,   50,   -1,   45,    1,    7,    0,   -2,   18,  -22,  -37,  -13,   14,    0,    3,    1,  -12,   -3,    2,  -15,   -8,    1,   -1,   19,   14,   -4,  -12,   -4,    5,   17,    8,    2,   -4,   -4,    4,   -2,    2,    1,    0  },
+  },
+  {
+  {  109,  -26,   -8,   -3,   -2,   -1,   -1,    0,  -50,   28,    2,    1,    0,    0,    0,    0,  -18,   -8,    6,    0,    1,    0,    1,    0,    6,   -2,   -3,    0,    0,    0,    0,    0,   -3,    2,    1,   -1,    0,    0,    0,    0,   -2,    0,    0,    0,    0,    0,    0,    0  },
+  {  -39,   31,   -5,    2,   -1,    1,    0,    0,  -95,    6,   18,    0,    4,    0,    1,    0,   32,  -49,    5,    1,    1,    0,    0,    0,   27,   -1,  -14,    2,   -2,    1,   -1,    0,    3,    5,   -3,   -2,    4,    1,   -1,   -1,    2,    0,    0,    0,    2,    0,    0,    0  },
+  {   29,   -3,   -2,   -2,    0,    0,    0,    0,    0,  -41,    9,    0,    2,    0,    1,    0,   86,    4,  -33,    2,   -6,    1,   -2,    0,  -32,   58,    1,   -7,    0,   -2,    0,   -1,  -14,   -8,   20,    0,   -2,   -3,    0,    4,   -1,   -1,    0,    0,   -1,    1,    0,    0  },
+  {   18,   96,  -23,    2,   -5,    1,   -2,    0,  -10,    6,   10,   -2,    1,   -1,    1,    0,  -14,   26,    2,   -4,    1,   -1,    0,    0,  -43,   -9,   35,   -2,    4,   -1,    1,    0,   14,  -40,    1,   10,    2,    1,  -10,    1,    2,   -4,   -1,   -1,    0,    0,   -1,    0  },
+  {  -29,  -60,   16,   -2,    3,   -1,    1,    0,  -52,    9,  -17,    5,   -2,    1,   -1,    1,   13,   56,   -2,   -9,    0,   -2,    0,   -1,  -34,  -18,   41,    0,    3,    0,    1,    0,   19,  -36,  -10,   13,    3,    6,  -14,   -1,    3,    1,   -1,   -3,    1,    1,   -1,   -1  },
+  {  -23,   -5,  -15,    5,   -2,    1,   -1,    1,    2,   79,  -13,   -4,   -2,   -1,   -1,    0,   -9,    1,    5,   -1,    1,    0,    0,    0,   -4,   49,    2,  -14,    1,   -3,    0,   -1,  -31,  -14,   56,   -1,   13,  -37,   -4,   20,   -2,    2,  -10,    0,    2,   -4,    0,   -1  },
+  {   -7,   -3,   12,   -3,    3,   -1,    1,    0,  -31,  -62,    8,    7,    0,    2,    0,    1,  -75,    9,  -45,    5,   -1,    1,   -1,    0,   14,   35,    0,  -23,    2,   -5,    1,   -2,    1,   -8,   32,   -1,    7,  -12,   -4,   10,    0,    2,   -6,   -1,    2,    0,    0,   -2  },
+  {    1,  -26,    5,    0,    1,    0,    1,    0,   24,   -3,   43,   -6,    4,   -2,    1,   -1,   -7,  -64,    9,   14,    0,    3,    0,    1,  -12,   -4,    5,    3,   -1,    1,    0,    0,    8,  -59,   -3,   26,   14,    6,  -58,    6,   -5,   17,   -7,  -18,    3,    3,   -1,   -5  },
+  {   11,   14,    6,   -3,    1,   -1,    1,    0,   10,   -7,   -9,    3,   -2,    1,   -1,    0,   22,   21,    1,  -21,    2,   -4,    1,   -2,   92,    1,   53,    0,   -9,    1,   -2,    0,  -21,  -11,    1,   40,   -5,   -4,  -24,    5,   -4,    5,   -6,   -5,    0,    0,    0,   -3  },
+  {  -10,  -11,  -47,    3,   -4,    1,   -1,    0,    5,   28,   11,   -2,   -1,    0,    0,    0,  -12,   -2,  -38,    2,    0,    1,    0,    0,   16,   38,   11,  -16,   -1,   -3,    0,   -2,   12,   -9,  -22,    7,   -8,   60,    4,  -36,   -6,  -15,   54,    7,    3,   -7,   -8,   14  },
+  {   -8,  -24,  -99,   11,  -10,    3,   -4,    1,   -5,  -36,   19,  -26,    4,   -5,    1,   -2,    0,   25,   41,    5,   -3,    1,    0,    0,   10,   -5,   -7,   12,    2,    1,    0,    0,   -1,    1,    9,   -3,   -3,  -14,   -3,   12,    2,    4,  -13,   -2,   -1,    3,    2,   -4  },
+  {   -5,    1,   -1,    0,    1,    0,    0,    0,  -10,  -14,   -6,    8,    0,    1,    0,    0,  -17,   -2,    7,   -5,    3,   -1,    0,    0,  -16,   13,    3,   31,   -1,    6,    0,    2,  -93,  -15,  -46,   -3,   23,  -19,    0,  -47,    8,    4,    8,    3,    2,    3,    0,    0  },
+  {    1,   12,  -20,   21,   -4,    5,   -2,    2,   -5,   -2,  -75,    9,   -1,    2,   -1,    1,   -1,   -2,  -16,   -4,    0,   -1,    0,    0,   -7,    7,  -31,    0,    3,    0,    0,    0,    4,   11,  -12,    4,  -12,   14,  -50,   -1,   -8,   32,   -4,  -54,    2,    0,   30,  -15  },
+  {    2,   -9,  -18,    8,   -3,    3,   -1,    1,    3,  -25,  -62,   -6,    0,   -2,    0,   -1,   -6,  -61,   14,  -51,    2,   -6,    0,   -2,  -19,    0,   40,   -7,  -17,    0,   -3,    0,   13,   -4,   11,    9,   17,    0,   24,    5,    1,  -12,    4,   28,    0,    0,  -15,    8  },
+  {    4,    9,   39,   18,    0,    2,    0,    1,   -6,  -16,  -22,  -37,    5,   -5,    1,   -2,   -5,   15,   63,    9,  -16,    0,   -3,    0,   18,   42,  -18,   27,   15,    1,    3,    1,   12,  -34,    9,  -24,    4,   28,   -2,    4,  -11,   -4,   30,    2,    5,  -13,   -4,   18  },
+  {   -7,   -2,   15,   -6,    1,   -1,    1,   -1,  -11,   -3,   22,  -14,    0,   -2,    1,   -1,  -18,   -7,   30,   -9,   -4,    0,   -1,    0,  -35,   23,   23,   10,  -17,    1,   -3,    0,  -19,   53,    6,   48,  -65,   12,  -12,   11,   -8,  -16,   10,  -21,   -2,  -12,    6,    2  },
+  }
+ }
+};
+
+const int8_t g_lfnst4x4[ 4 ][ 2 ][ 16 ][ 16 ] = {
+ {  //0
+  {
+  {  108,  -44,  -15,    1,  -44,   19,    7,   -1,  -11,    6,    2,   -1,    0,   -1,   -1,    0  },
+  {  -40,  -97,   56,   12,  -11,   29,  -12,   -3,   18,   18,  -15,   -3,   -1,   -3,    2,    1  },
+  {   25,  -31,   -1,    7,  100,  -16,  -29,    1,  -54,   21,   14,   -4,   -7,    2,    4,    0  },
+  {  -32,  -39,  -92,   51,   -6,  -16,   36,   -8,    3,   22,   18,  -15,    4,    1,   -5,    2  },
+  {    8,   -9,   33,   -8,  -16, -102,   36,   23,   -4,   38,  -27,   -5,    5,   16,   -8,   -6  },
+  {  -25,    5,   16,   -3,  -38,   14,   11,   -3,  -97,    7,   26,    1,   55,  -10,  -19,    3  },
+  {    8,    9,   16,    1,   37,   36,   94,  -38,   -7,    3,  -47,   11,   -6,  -13,  -17,   10  },
+  {    2,   34,   -5,    1,   -7,   24,  -25,   -3,    8,   99,  -28,  -29,    6,  -43,   21,   11  },
+  {  -16,  -27,  -39, -109,    6,   10,   16,   24,    3,   19,   10,   24,   -4,   -7,   -2,   -3  },
+  {   -9,  -10,  -34,    4,   -9,   -5,  -29,    5,  -33,  -26,  -96,   33,   14,    4,   39,  -14  },
+  {  -13,    1,    4,   -9,  -30,  -17,   -3,  -64,  -35,   11,   17,   19,  -86,    6,   36,   14  },
+  {    8,   -7,   -5,  -15,    7,  -30,  -28,  -87,   31,    4,    4,   33,   61,   -5,  -17,   22  },
+  {   -2,   13,   -6,   -4,   -2,   28,  -13,  -14,   -3,   37,  -15,   -3,   -2,  107,  -36,  -24  },
+  {    4,    9,   11,   31,    4,    9,   16,   19,   12,   33,   32,   94,   12,    0,   34,  -45  },
+  {    2,   -2,    8,  -16,    8,    5,   28,  -17,    6,   -7,   18,  -45,   40,   36,   97,   -8  },
+  {    0,   -2,    0,  -10,   -1,   -7,   -3,  -35,   -1,   -7,   -2,  -32,   -6,  -33,  -16, -112  },
+  },
+  {
+  {  119,  -30,  -22,   -3,  -23,   -2,    3,    2,  -16,    3,    6,    0,   -3,    2,    1,    0  },
+  {  -27, -101,   31,   17,  -47,    2,   22,    3,   19,   30,   -7,   -9,    5,    3,   -5,   -1  },
+  {    0,   58,   22,  -15, -102,    2,   38,    2,   10,  -13,   -5,    4,   14,   -1,   -9,    0  },
+  {   23,    4,   66,  -11,   22,   89,   -2,  -26,   13,   -8,  -38,   -1,   -9,  -20,   -2,    8  },
+  {  -19,   -5,  -89,    2,  -26,   76,  -11,  -17,   20,   13,   18,   -4,    1,  -15,    3,    5  },
+  {  -10,   -1,   -1,    6,   23,   25,   87,   -7,  -74,    4,   39,   -5,    0,   -1,  -20,   -1  },
+  {  -17,  -28,   12,   -8,  -32,   14,  -53,   -6,  -68,  -67,   17,   29,    2,    6,   25,    4  },
+  {    1,  -24,  -23,    1,   17,   -7,   52,    9,   50,  -92,  -15,   27,  -15,  -10,   -6,    3  },
+  {   -6,  -17,   -2, -111,    7,  -17,    8,  -42,    9,   18,   16,   25,   -4,    2,   -1,   11  },
+  {    9,    5,   35,    0,    6,   21,   -9,   34,   44,   -3,  102,   11,   -7,   13,   11,  -20  },
+  {    4,   -5,   -5,  -10,   15,   19,   -2,    6,    6,  -12,  -13,    6,   95,   69,  -29,  -24  },
+  {   -6,   -4,   -9,  -39,    1,   22,    0,  102,  -19,   19,  -32,   30,  -16,  -14,   -8,  -23  },
+  {    4,   -4,    7,    8,    4,  -13,  -18,    5,    0,    0,   21,   22,   58,  -88,  -54,   28  },
+  {   -4,   -7,    0,  -24,   -7,    0,  -25,    3,   -3,  -30,    8,  -76,  -34,    4,  -80,  -26  },
+  {    0,    6,    0,   30,   -6,    1,  -13,  -23,    1,   20,   -2,   80,  -44,   37,  -68,    1  },
+  {    0,    0,   -1,    5,   -1,   -7,    1,  -34,   -2,    3,   -6,   19,    5,  -38,   11, -115  },
+  }
+ },
+ {  //1
+  {
+  { -111,   39,    4,    3,   44,   11,  -12,   -1,    7,  -16,   -5,    2,    3,   -1,    4,    2  },
+  {  -47,  -27,   15,   -1,  -92,   43,   20,   -2,   20,   39,  -16,   -5,   10,   -5,  -13,    2  },
+  {  -35,  -23,    4,    4,  -17,  -72,   32,    6,  -59,   18,   50,   -6,    0,   40,    0,  -13  },
+  {   13,   93,  -27,   -4,  -48,   13,  -34,    4,  -52,   11,    1,   10,    3,   16,   -3,    1  },
+  {  -11,  -27,    1,    2,  -47,   -4,  -36,   10,   -2,  -85,   14,   29,  -20,   -2,   57,    4  },
+  {    0,  -35,   32,   -2,   26,   60,   -3,  -17,  -82,    1,  -30,    0,  -37,   21,    3,   12  },
+  {  -17,  -46,  -92,   14,    7,  -10,  -39,   29,  -17,   27,  -28,   17,    1,  -15,  -13,   17  },
+  {    4,  -10,  -23,    4,   16,   58,  -17,   26,   30,   21,   67,    2,  -13,   59,   13,  -40  },
+  {    5,  -20,   32,   -5,    8,   -3,  -46,   -7,   -4,    2,  -15,   24,  100,   44,    0,    5  },
+  {   -4,   -1,   38,  -18,   -7,  -42,  -63,   -6,   33,   34,  -23,   15,  -65,   33,  -20,    2  },
+  {   -2,  -10,   35,  -19,    5,    8,  -44,   14,  -25,   25,   58,   17,    7,  -84,  -16,  -18  },
+  {    5,   13,   18,   34,   11,   -4,   18,   18,    5,   58,   -3,   42,   -2,  -10,   85,   38  },
+  {   -5,   -7,  -34,  -83,    2,   -1,   -4,  -73,    4,   20,   15,  -12,    4,   -3,   44,   12  },
+  {    0,    4,   -2,  -60,    5,    9,   42,   34,    5,  -14,    9,   80,   -5,   13,  -38,   37  },
+  {   -1,    2,    7,  -57,    3,   -7,    9,   68,   -9,    6,  -49,  -20,    6,   -4,   36,  -64  },
+  {   -1,    0,  -12,   23,    1,   -4,   17,  -53,   -3,    4,  -21,   72,   -4,   -8,   -3,  -83  },
+  },
+  {
+  {   88,  -55,    6,   -3,  -66,   27,    9,   -2,   11,   11,  -13,    1,   -2,   -7,    1,    2  },
+  {  -58,  -20,   27,   -2,  -27,   75,  -29,    0,   47,  -42,  -11,   11,   -9,   -3,   19,   -4  },
+  {  -51,   23,  -22,    5,  -63,    3,   37,   -5,    1,   64,  -35,   -4,   29,  -31,  -11,   13  },
+  {  -27,  -76,   49,   -2,   40,   14,    9,  -17,  -56,   36,  -25,    6,   14,    3,   -6,    8  },
+  {   19,   -4,  -36,   22,   52,    7,   36,  -23,   28,  -17,  -64,   15,   -5,  -44,   48,    9  },
+  {   29,   50,   13,  -10,    1,   34,  -59,    1,  -51,    4,  -16,   30,   52,  -33,   24,   -5  },
+  {  -12,  -21,  -74,   43,  -13,   39,   18,   -5,  -58,  -35,   27,   -5,   19,   26,    6,   -5  },
+  {   19,   38,  -10,   -5,   28,   66,    0,   -5,   -4,   19,  -30,  -26,  -40,   28,  -60,   37  },
+  {   -6,   27,   18,   -5,  -37,  -18,   12,  -25,  -44,  -10,  -38,   37,  -66,   45,   40,   -7  },
+  {  -13,  -28,  -45,  -39,    0,   -5,  -39,   69,  -23,   16,  -12,  -18,  -50,  -31,   24,   13  },
+  {   -1,    8,   24,  -51,  -15,   -9,   44,   10,  -28,  -70,  -12,  -39,   24,  -18,   -4,   51  },
+  {   -8,  -22,  -17,   33,  -18,  -45,  -57,  -27,    0,  -31,  -30,   29,   -2,  -13,  -53,   49  },
+  {    1,   12,   32,   51,   -8,    8,   -2,  -31,  -22,    4,   46,  -39,  -49,  -67,   14,   17  },
+  {    4,    5,   24,   60,   -5,  -14,  -23,   38,    9,    8,  -34,  -59,   24,   47,   42,   28  },
+  {   -1,   -5,  -20,  -34,    4,    4,  -15,  -46,   18,   31,   42,   10,   10,   27,   49,   78  },
+  {   -3,   -7,  -22,  -34,   -5,  -11,  -36,  -69,   -1,   -3,  -25,  -73,    5,    4,    4,  -49  },
+  }
+ },
+ {  //2
+  {
+  { -112,   47,   -2,    2,  -34,   13,    2,    0,   15,   -7,    1,    0,    8,   -3,   -1,    0  },
+  {   29,   -7,    1,   -1, -108,   40,    2,    0,  -45,   13,    4,   -1,    8,   -5,    1,    0  },
+  {  -36,  -87,   69,  -10,  -17,  -33,   26,   -2,    7,   14,  -11,    2,    6,    8,   -7,    0  },
+  {   28,   -5,    2,   -2,  -29,   13,   -2,    0,  103,  -36,   -4,    1,   48,  -16,   -4,    1  },
+  {  -12,  -24,   15,   -3,   26,   80,  -61,    9,   15,   54,  -36,    2,    0,   -4,    6,   -2  },
+  {   18,   53,   69,  -74,   14,   24,   28,  -30,   -6,   -7,  -11,   12,   -5,   -7,   -6,    8  },
+  {    5,   -1,    2,    0,  -26,    6,    0,    1,   45,   -9,   -1,    0, -113,   28,    8,   -1  },
+  {  -13,  -32,   18,   -2,   15,   34,  -27,    7,  -25,  -80,   47,   -1,  -16,  -50,   28,    2  },
+  {   -4,  -13,  -10,   19,   18,   46,   60,  -48,   16,   33,   60,  -48,    1,    0,    5,   -2  },
+  {   15,   33,   63,   89,    8,   15,   25,   40,   -4,   -8,  -15,   -8,   -2,   -6,   -9,   -7  },
+  {   -8,  -24,  -27,   15,   12,   41,   26,  -29,  -17,  -50,  -39,   27,    0,   35,  -67,   26  },
+  {   -2,   -6,  -24,   13,   -1,   -8,   37,  -22,    3,   18,  -51,   22,  -23,  -95,   17,   17  },
+  {   -3,   -7,  -16,  -21,   10,   24,   46,   75,    8,   20,   38,   72,    1,    2,    1,    7  },
+  {    2,    6,   10,   -3,   -5,  -16,  -31,   12,    7,   24,   41,  -16,  -16,  -41,  -89,   49  },
+  {    4,    8,   21,   40,   -4,  -11,  -28,  -57,    5,   14,   31,   70,    7,   18,   32,   52  },
+  {    0,    1,    4,   11,   -2,   -4,  -13,  -34,    3,    7,   20,   47,   -6,  -19,  -42, -101  },
+  },
+  {
+  {  -99,   39,   -1,    2,   65,  -20,   -5,    0,  -15,   -2,    5,   -1,    0,    3,   -1,    0  },
+  {   58,   42,  -33,    3,   33,  -63,   23,   -1,  -55,   32,    3,   -5,   21,   -2,   -8,    3  },
+  {  -15,   71,  -44,    5,  -58,  -29,   25,    3,   62,   -7,   -4,   -4,  -19,    4,    0,    1  },
+  {   46,    5,    4,   -6,   71,  -12,  -15,    5,   52,  -38,   13,   -2,  -63,   23,    3,   -3  },
+  {  -14,  -54,  -29,   29,   25,   -9,   61,  -29,   27,   44,  -48,    5,  -27,  -21,   12,    7  },
+  {   -3,    3,   69,  -42,  -11,  -50,  -26,   26,   24,   63,  -19,   -5,  -18,  -22,   12,    0  },
+  {   17,   16,   -2,    1,   38,   18,  -12,    0,   62,    1,  -14,    5,   89,  -42,    8,   -2  },
+  {   15,   54,   -8,    6,    6,   60,  -26,   -8,  -30,   17,  -38,   22,  -43,  -45,   42,   -7  },
+  {   -6,  -17,  -55,  -28,    9,   30,   -8,   58,    4,   34,   41,  -52,  -16,  -36,  -20,   16  },
+  {   -2,   -1,   -9,  -79,    7,   11,   48,   44,  -13,  -34,  -55,    6,   12,   23,   20,  -11  },
+  {    7,   29,   14,   -6,   12,   53,   10,  -11,   14,   59,  -15,   -3,    5,   71,  -54,   13  },
+  {   -5,  -24,  -53,   15,   -3,  -15,  -61,   26,    6,   30,  -16,   23,   13,   56,   44,  -35  },
+  {    4,    8,   21,   52,   -1,   -1,   -5,   29,   -7,  -17,  -44,  -84,    8,   20,   31,   39  },
+  {   -2,  -11,  -25,   -4,   -4,  -21,  -53,    2,   -5,  -26,  -64,   19,   -8,  -19,  -73,   39  },
+  {   -3,   -5,  -23,  -57,   -2,   -4,  -24,  -75,    1,    3,    9,  -25,    6,   15,   41,   61  },
+  {    1,    1,    7,   18,    1,    2,   16,   47,    2,    5,   24,   67,    3,    9,   25,   88  },
+  }
+ },
+ {  //3
+  {
+  { -114,   37,    3,    2,  -22,  -23,   14,    0,   21,  -17,   -5,    2,    5,    2,   -4,   -1  },
+  {  -19,  -41,   19,   -2,   85,  -60,  -11,    7,   17,   31,  -34,    2,  -11,   19,    2,   -8  },
+  {   36,  -25,   18,   -2,  -42,  -53,   35,    5,   46,  -60,  -25,   19,    8,   21,  -33,   -1  },
+  {  -27,  -80,   44,   -3,  -58,    1,  -29,   19,  -41,   18,  -12,   -7,   12,  -17,    7,   -6  },
+  {  -11,  -21,   37,  -10,   44,   -4,   47,  -12,  -37,  -41,   58,   18,   10,  -46,  -16,   31  },
+  {   15,   47,   10,   -6,  -16,  -44,   42,   10,  -80,   25,  -40,   21,  -23,   -2,    3,  -14  },
+  {   13,   25,   79,  -39,  -13,   10,   31,   -4,   49,   45,   12,   -8,    3,   -1,   43,    7  },
+  {   16,   11,  -26,   13,  -13,  -74,  -20,   -1,    5,   -6,   29,  -47,   26,  -49,   54,    2  },
+  {   -8,  -34,  -26,    7,  -26,  -19,   29,  -37,    1,   22,   46,   -9,  -81,   37,   14,   20  },
+  {   -6,  -30,  -42,  -12,   -3,    5,   57,  -52,   -2,   37,  -12,    6,   74,   10,    6,  -15  },
+  {    5,    9,   -6,   42,  -15,  -18,   -9,   26,   15,   58,   14,   43,   23,  -10,  -37,   75  },
+  {   -5,  -23,  -23,   36,    3,   22,   36,   40,   27,   -4,  -16,   56,  -25,  -46,   56,  -24  },
+  {    1,    3,   23,   73,    8,    5,   34,   46,  -12,    2,   35,  -38,   26,   52,    2,  -31  },
+  {   -3,   -2,  -21,  -52,    1,  -10,  -17,   44,  -19,  -20,   30,   45,   27,   61,   49,   21  },
+  {   -2,   -7,  -33,  -56,   -4,   -6,   21,   63,   15,   31,   32,  -22,  -10,  -26,  -52,  -38  },
+  {   -5,  -12,  -18,  -12,    8,   22,   38,   36,   -5,  -15,  -51,  -63,   -5,    0,   15,   73  },
+  },
+  {
+  { -102,   22,    7,    2,   66,  -25,   -6,   -1,  -15,   14,    1,   -1,    2,   -2,    1,    0  },
+  {   12,   93,  -27,   -6,  -27,  -64,   36,    6,   13,    5,  -23,    0,   -2,    6,    5,   -3  },
+  {  -59,  -24,   17,    1,  -62,   -2,   -3,    2,   83,  -12,  -17,   -2,  -24,   14,    7,   -2  },
+  {  -33,   23,  -36,   11,  -21,   50,   35,  -16,  -23,  -78,   16,   19,   22,   15,  -30,   -5  },
+  {    0,  -38,  -81,   30,   27,    5,   51,  -32,   24,   36,  -16,   12,  -24,   -8,    9,    1  },
+  {   28,   38,    8,   -9,   62,   32,  -13,    2,   51,  -32,   15,    5,  -66,   28,    0,   -1  },
+  {   11,  -35,   21,  -17,   30,  -18,   31,   18,  -11,  -36,  -80,   12,   16,   49,   13,  -32  },
+  {  -13,   23,   22,  -36,  -12,   64,   39,   25,  -19,   23,  -36,    9,  -30,  -58,   33,   -7  },
+  {   -9,  -20,  -55,  -83,    3,   -2,    1,   62,    8,    2,   27,  -28,    7,   15,  -11,    5  },
+  {   -6,   24,  -38,   23,   -8,   40,  -49,    0,   -7,    9,  -25,  -44,   23,   39,   70,   -3  },
+  {   12,   17,   17,    0,   32,   27,   21,    2,   67,   11,   -6,  -10,   89,  -22,  -12,   16  },
+  {    2,   -9,    8,   45,    7,   -8,   27,   35,   -9,  -31,  -17,  -87,  -23,  -22,  -19,   44  },
+  {   -1,   -9,   28,  -24,   -1,  -10,   49,  -30,   -8,   -7,   40,    1,    4,   33,   65,   67  },
+  {    5,  -12,  -24,  -17,   13,  -34,  -32,  -16,   14,  -67,   -7,    9,    7,  -74,   49,    1  },
+  {    2,   -6,   11,   45,    3,  -10,   33,   55,    8,   -5,   59,    4,    7,   -4,   44,  -66  },
+  {   -1,    1,  -14,   36,   -1,    2,  -20,   69,    0,    0,  -15,   72,    3,    4,    5,   65  },
+  }
+ }
+};
+
+//--------------------------------------------------------------------------------------------------
diff --git a/source/Lib/CommonLib/RomTr.cpp b/source/Lib/CommonLib/RomTr.cpp
index f7fcc1ffed168a922f71213e18aa076deb894d03..a60611a633be90961c6282a6fe04acfd24741ea6 100644
--- a/source/Lib/CommonLib/RomTr.cpp
+++ b/source/Lib/CommonLib/RomTr.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/SEI.cpp b/source/Lib/CommonLib/SEI.cpp
index 10fc50e50ac168e4c2a58d471bb201fd1eaa563c..d061c51fdb6759d12dffb6a53421b2097421718d 100644
--- a/source/Lib/CommonLib/SEI.cpp
+++ b/source/Lib/CommonLib/SEI.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -96,35 +96,51 @@ void deleteSEIs (SEIMessages &seiList)
   seiList.clear();
 }
 
-void SEIBufferingPeriod::copyTo (SEIBufferingPeriod& target)
+void SEIBufferingPeriod::copyTo (SEIBufferingPeriod& target) const
 {
-  target.m_bpSeqParameterSetId = m_bpSeqParameterSetId;
-  target.m_rapCpbParamsPresentFlag = m_rapCpbParamsPresentFlag;
-  target.m_cpbDelayOffset = m_cpbDelayOffset;
-  target.m_dpbDelayOffset = m_dpbDelayOffset;
+  target.m_bpNalCpbParamsPresentFlag = m_bpNalCpbParamsPresentFlag;
+  target.m_bpVclCpbParamsPresentFlag = m_bpVclCpbParamsPresentFlag;
+  target.m_initialCpbRemovalDelayLength = m_initialCpbRemovalDelayLength;
+  target.m_cpbRemovalDelayLength = m_cpbRemovalDelayLength;
+  target.m_dpbOutputDelayLength = m_dpbOutputDelayLength;
+  target.m_duCpbRemovalDelayIncrementLength = m_duCpbRemovalDelayIncrementLength;
+  target.m_dpbOutputDelayDuLength = m_dpbOutputDelayDuLength;
   target.m_concatenationFlag = m_concatenationFlag;
   target.m_auCpbRemovalDelayDelta = m_auCpbRemovalDelayDelta;
+  target.m_cpbRemovalDelayDeltasPresentFlag =  m_cpbRemovalDelayDeltasPresentFlag;
+  target.m_numCpbRemovalDelayDeltas = m_numCpbRemovalDelayDeltas;
+  target.m_bpMaxSubLayers = m_bpMaxSubLayers;
   ::memcpy(target.m_initialCpbRemovalDelay, m_initialCpbRemovalDelay, sizeof(m_initialCpbRemovalDelay));
-  ::memcpy(target.m_initialCpbRemovalDelayOffset, m_initialCpbRemovalDelayOffset, sizeof(m_initialCpbRemovalDelayOffset));
-  ::memcpy(target.m_initialAltCpbRemovalDelay, m_initialAltCpbRemovalDelay, sizeof(m_initialAltCpbRemovalDelay));
-  ::memcpy(target.m_initialAltCpbRemovalDelayOffset, m_initialAltCpbRemovalDelayOffset, sizeof(m_initialAltCpbRemovalDelayOffset));
+  ::memcpy(target.m_initialCpbRemovalOffset, m_initialCpbRemovalOffset, sizeof(m_initialCpbRemovalOffset));
+  ::memcpy(target.m_cpbRemovalDelayDelta, m_cpbRemovalDelayDelta, sizeof(m_cpbRemovalDelayDelta));
+  target.m_bpCpbCnt = m_bpCpbCnt;
+  target.m_bpDecodingUnitHrdParamsPresentFlag = m_bpDecodingUnitHrdParamsPresentFlag;
+  target.m_decodingUnitCpbParamsInPicTimingSeiFlag = m_decodingUnitCpbParamsInPicTimingSeiFlag;
+  target.m_sublayerInitialCpbRemovalDelayPresentFlag = m_sublayerInitialCpbRemovalDelayPresentFlag;
+  target.m_concatenationFlag = m_concatenationFlag;
+  target.m_maxInitialRemovalDelayForConcatenation = m_maxInitialRemovalDelayForConcatenation;
+  target.m_altCpbParamsPresentFlag = m_altCpbParamsPresentFlag;
 }
 
-void SEIPictureTiming::copyTo (SEIPictureTiming& target)
+void SEIPictureTiming::copyTo (SEIPictureTiming& target) const
 {
-  target.m_picStruct = m_picStruct;
-  target.m_sourceScanType = m_sourceScanType;
-  target.m_duplicateFlag = m_duplicateFlag;
-
-  target.m_auCpbRemovalDelay = m_auCpbRemovalDelay;
+  ::memcpy(target.m_auCpbRemovalDelay, m_auCpbRemovalDelay, sizeof(m_auCpbRemovalDelay));
+  ::memcpy(target.m_ptSubLayerDelaysPresentFlag, m_ptSubLayerDelaysPresentFlag, sizeof(m_ptSubLayerDelaysPresentFlag));
+  ::memcpy(target.m_duCommonCpbRemovalDelayMinus1, m_duCommonCpbRemovalDelayMinus1, sizeof(m_duCommonCpbRemovalDelayMinus1));
+  ::memcpy(target.m_cpbRemovalDelayDeltaEnabledFlag, m_cpbRemovalDelayDeltaEnabledFlag, sizeof(m_cpbRemovalDelayDeltaEnabledFlag));
+  ::memcpy(target.m_cpbRemovalDelayDeltaIdx, m_cpbRemovalDelayDeltaIdx, sizeof(m_cpbRemovalDelayDeltaIdx));
   target.m_picDpbOutputDelay = m_picDpbOutputDelay;
   target.m_picDpbOutputDuDelay = m_picDpbOutputDuDelay;
   target.m_numDecodingUnitsMinus1 = m_numDecodingUnitsMinus1;
   target.m_duCommonCpbRemovalDelayFlag = m_duCommonCpbRemovalDelayFlag;
-  target.m_duCommonCpbRemovalDelayMinus1 = m_duCommonCpbRemovalDelayMinus1;
 
   target.m_numNalusInDuMinus1 = m_numNalusInDuMinus1;
   target.m_duCpbRemovalDelayMinus1 = m_duCpbRemovalDelayMinus1;
+  target.m_cpbAltTimingInfoPresentFlag = m_cpbAltTimingInfoPresentFlag;
+  target.m_cpbAltInitialCpbRemovalDelayDelta = m_cpbAltInitialCpbRemovalDelayDelta;
+  target.m_cpbAltInitialCpbRemovalOffsetDelta = m_cpbAltInitialCpbRemovalOffsetDelta;
+  target.m_cpbDelayOffset = m_cpbDelayOffset;
+  target.m_dpbDelayOffset = m_dpbDelayOffset;
 }
 
 // Static member
@@ -134,41 +150,34 @@ const char *SEI::getSEIMessageString(SEI::PayloadType payloadType)
   {
     case SEI::BUFFERING_PERIOD:                     return "Buffering period";
     case SEI::PICTURE_TIMING:                       return "Picture timing";
+#if HEVC_SEI
     case SEI::PAN_SCAN_RECT:                        return "Pan-scan rectangle";                   // not currently decoded
+#endif
     case SEI::FILLER_PAYLOAD:                       return "Filler payload";                       // not currently decoded
     case SEI::USER_DATA_REGISTERED_ITU_T_T35:       return "User data registered";                 // not currently decoded
     case SEI::USER_DATA_UNREGISTERED:               return "User data unregistered";
-    case SEI::RECOVERY_POINT:                       return "Recovery point";
-    case SEI::SCENE_INFO:                           return "Scene information";                    // not currently decoded
-    case SEI::FULL_FRAME_SNAPSHOT:                  return "Picture snapshot";                     // not currently decoded
-    case SEI::PROGRESSIVE_REFINEMENT_SEGMENT_START: return "Progressive refinement segment start"; // not currently decoded
-    case SEI::PROGRESSIVE_REFINEMENT_SEGMENT_END:   return "Progressive refinement segment end";   // not currently decoded
     case SEI::FILM_GRAIN_CHARACTERISTICS:           return "Film grain characteristics";           // not currently decoded
-    case SEI::POST_FILTER_HINT:                     return "Post filter hint";                     // not currently decoded
-    case SEI::TONE_MAPPING_INFO:                    return "Tone mapping information";
-    case SEI::KNEE_FUNCTION_INFO:                   return "Knee function information";
     case SEI::FRAME_PACKING:                        return "Frame packing arrangement";
-    case SEI::DISPLAY_ORIENTATION:                  return "Display orientation";
-    case SEI::GREEN_METADATA:                       return "Green metadata information";
-    case SEI::SOP_DESCRIPTION:                      return "Structure of pictures information";
-    case SEI::ACTIVE_PARAMETER_SETS:                return "Active parameter sets";
     case SEI::DECODING_UNIT_INFO:                   return "Decoding unit information";
+#if HEVC_SEI
     case SEI::TEMPORAL_LEVEL0_INDEX:                return "Temporal sub-layer zero index";
+#endif
     case SEI::DECODED_PICTURE_HASH:                 return "Decoded picture hash";
-    case SEI::SCALABLE_NESTING:                     return "Scalable nesting";
-    case SEI::REGION_REFRESH_INFO:                  return "Region refresh information";
-    case SEI::NO_DISPLAY:                           return "No display";
-    case SEI::TIME_CODE:                            return "Time code";
+    case SEI::DEPENDENT_RAP_INDICATION:             return "Dependent RAP indication";
     case SEI::MASTERING_DISPLAY_COLOUR_VOLUME:      return "Mastering display colour volume";
-    case SEI::SEGM_RECT_FRAME_PACKING:              return "Segmented rectangular frame packing arrangement";
-#if HEVC_TILES_WPP
-    case SEI::TEMP_MOTION_CONSTRAINED_TILE_SETS:    return "Temporal motion constrained tile sets";
-#endif
-    case SEI::CHROMA_RESAMPLING_FILTER_HINT:        return "Chroma sampling filter hint";
-    case SEI::COLOUR_REMAPPING_INFO:                return "Colour remapping info";
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
     case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS: return "Alternative transfer characteristics";
 #endif
+    case SEI::CONTENT_LIGHT_LEVEL_INFO:             return "Content light level information";
+    case SEI::AMBIENT_VIEWING_ENVIRONMENT:          return "Ambient viewing environment";
+    case SEI::CONTENT_COLOUR_VOLUME:                return "Content colour volume";
+    case SEI::EQUIRECTANGULAR_PROJECTION:           return "Equirectangular projection";
+    case SEI::SPHERE_ROTATION:                      return "Sphere rotation";
+    case SEI::REGION_WISE_PACKING:                  return "Region wise packing information";
+    case SEI::OMNI_VIEWPORT:                        return "Omni viewport";
+    case SEI::GENERALIZED_CUBEMAP_PROJECTION:       return "Generalized cubemap projection";
+    case SEI::SAMPLE_ASPECT_RATIO_INFO:             return "Sample aspect ratio information";
+    case SEI::SUBPICTURE_LEVEL_INFO:                return "Subpicture level information";
     default:                                        return "Unknown";
   }
 }
diff --git a/source/Lib/CommonLib/SEI.h b/source/Lib/CommonLib/SEI.h
index c6ea74f82eb4bc67b37642992002c3c2fb6cecf4..a214052564b2c582c38f8d2fd461bce7b134ee78 100644
--- a/source/Lib/CommonLib/SEI.h
+++ b/source/Lib/CommonLib/SEI.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -56,41 +56,33 @@ public:
   {
     BUFFERING_PERIOD                     = 0,
     PICTURE_TIMING                       = 1,
+#if HEVC_SEI
     PAN_SCAN_RECT                        = 2,
+#endif
     FILLER_PAYLOAD                       = 3,
     USER_DATA_REGISTERED_ITU_T_T35       = 4,
     USER_DATA_UNREGISTERED               = 5,
-    RECOVERY_POINT                       = 6,
-    SCENE_INFO                           = 9,
-    FULL_FRAME_SNAPSHOT                  = 15,
-    PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
-    PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17,
     FILM_GRAIN_CHARACTERISTICS           = 19,
-    POST_FILTER_HINT                     = 22,
-    TONE_MAPPING_INFO                    = 23,
     FRAME_PACKING                        = 45,
-    DISPLAY_ORIENTATION                  = 47,
-    GREEN_METADATA                       = 56,
-    SOP_DESCRIPTION                      = 128,
-    ACTIVE_PARAMETER_SETS                = 129,
     DECODING_UNIT_INFO                   = 130,
+#if HEVC_SEI
     TEMPORAL_LEVEL0_INDEX                = 131,
+#endif
     DECODED_PICTURE_HASH                 = 132,
-    SCALABLE_NESTING                     = 133,
-    REGION_REFRESH_INFO                  = 134,
-    NO_DISPLAY                           = 135,
-    TIME_CODE                            = 136,
     MASTERING_DISPLAY_COLOUR_VOLUME      = 137,
-    SEGM_RECT_FRAME_PACKING              = 138,
-#if HEVC_TILES_WPP
-    TEMP_MOTION_CONSTRAINED_TILE_SETS    = 139,
-#endif
-    CHROMA_RESAMPLING_FILTER_HINT        = 140,
-    KNEE_FUNCTION_INFO                   = 141,
-    COLOUR_REMAPPING_INFO                = 142,
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-    ALTERNATIVE_TRANSFER_CHARACTERISTICS = 182,
-#endif
+    DEPENDENT_RAP_INDICATION             = 145,
+    EQUIRECTANGULAR_PROJECTION           = 150,
+    SPHERE_ROTATION                      = 154,
+    REGION_WISE_PACKING                  = 155,
+    OMNI_VIEWPORT                        = 156,
+    GENERALIZED_CUBEMAP_PROJECTION       = 153,
+    FRAME_FIELD_INFO                     = 168,
+    SUBPICTURE_LEVEL_INFO                = 203,
+    SAMPLE_ASPECT_RATIO_INFO             = 204,
+    CONTENT_LIGHT_LEVEL_INFO             = 144,
+    ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147,
+    AMBIENT_VIEWING_ENVIRONMENT          = 148,
+    CONTENT_COLOUR_VOLUME                = 149,
   };
 
   SEI() {}
@@ -101,6 +93,130 @@ public:
   virtual PayloadType payloadType() const = 0;
 };
 
+
+class SEIEquirectangularProjection : public SEI
+{
+public:
+  PayloadType payloadType() const { return EQUIRECTANGULAR_PROJECTION; }
+
+  SEIEquirectangularProjection()  {}
+  virtual ~SEIEquirectangularProjection() {}
+
+  bool    m_erpCancelFlag;
+  bool    m_erpPersistenceFlag;
+  bool    m_erpGuardBandFlag;
+  uint8_t m_erpGuardBandType;
+  uint8_t m_erpLeftGuardBandWidth;
+  uint8_t m_erpRightGuardBandWidth;
+};
+
+class SEISphereRotation : public SEI
+{
+public:
+  PayloadType payloadType() const { return SPHERE_ROTATION; }
+
+  SEISphereRotation()  {}
+  virtual ~SEISphereRotation() {}
+
+  bool  m_sphereRotationCancelFlag;
+  bool  m_sphereRotationPersistenceFlag;
+  int   m_sphereRotationYaw;
+  int   m_sphereRotationPitch;
+  int   m_sphereRotationRoll;
+};
+
+class SEIOmniViewport : public SEI
+{
+public:
+  PayloadType payloadType() const { return OMNI_VIEWPORT; }
+
+  SEIOmniViewport() {}
+  virtual ~SEIOmniViewport() {}
+
+  struct OmniViewport
+  {
+    int      azimuthCentre;
+    int      elevationCentre;
+    int      tiltCentre;
+    uint32_t horRange;
+    uint32_t verRange;
+  };
+
+  uint32_t m_omniViewportId;
+  bool     m_omniViewportCancelFlag;
+  bool     m_omniViewportPersistenceFlag;
+  uint8_t  m_omniViewportCntMinus1;
+  std::vector<OmniViewport> m_omniViewportRegions;  
+};
+
+class SEIRegionWisePacking : public SEI
+{
+public:
+  PayloadType payloadType() const { return REGION_WISE_PACKING; }
+  SEIRegionWisePacking() {}
+  virtual ~SEIRegionWisePacking() {}
+  bool                  m_rwpCancelFlag;
+  bool                  m_rwpPersistenceFlag;
+  bool                  m_constituentPictureMatchingFlag;
+  int                   m_numPackedRegions;
+  int                   m_projPictureWidth;
+  int                   m_projPictureHeight;
+  int                   m_packedPictureWidth;
+  int                   m_packedPictureHeight;
+  std::vector<uint8_t>  m_rwpTransformType;
+  std::vector<bool>     m_rwpGuardBandFlag;
+  std::vector<uint32_t> m_projRegionWidth;
+  std::vector<uint32_t> m_projRegionHeight;
+  std::vector<uint32_t> m_rwpProjRegionTop;
+  std::vector<uint32_t> m_projRegionLeft;
+  std::vector<uint16_t> m_packedRegionWidth;
+  std::vector<uint16_t> m_packedRegionHeight;
+  std::vector<uint16_t> m_packedRegionTop;
+  std::vector<uint16_t> m_packedRegionLeft;
+  std::vector<uint8_t>  m_rwpLeftGuardBandWidth;
+  std::vector<uint8_t>  m_rwpRightGuardBandWidth;
+  std::vector<uint8_t>  m_rwpTopGuardBandHeight;
+  std::vector<uint8_t>  m_rwpBottomGuardBandHeight;
+  std::vector<bool>     m_rwpGuardBandNotUsedForPredFlag;
+  std::vector<uint8_t>  m_rwpGuardBandType;
+};
+
+class SEIGeneralizedCubemapProjection : public SEI
+{
+public:
+  PayloadType payloadType() const { return GENERALIZED_CUBEMAP_PROJECTION; }
+
+  SEIGeneralizedCubemapProjection()  {}
+  virtual ~SEIGeneralizedCubemapProjection() {}
+
+  bool                 m_gcmpCancelFlag;
+  bool                 m_gcmpPersistenceFlag;
+  uint8_t              m_gcmpPackingType;
+  uint8_t              m_gcmpMappingFunctionType;
+  std::vector<uint8_t> m_gcmpFaceIndex;
+  std::vector<uint8_t> m_gcmpFaceRotation;
+  std::vector<uint8_t> m_gcmpFunctionCoeffU;
+  std::vector<bool>    m_gcmpFunctionUAffectedByVFlag;
+  std::vector<uint8_t> m_gcmpFunctionCoeffV;
+  std::vector<bool>    m_gcmpFunctionVAffectedByUFlag;
+  bool                 m_gcmpGuardBandFlag;
+  bool                 m_gcmpGuardBandBoundaryType;
+  uint8_t              m_gcmpGuardBandSamplesMinus1;
+};
+
+class SEISampleAspectRatioInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return SAMPLE_ASPECT_RATIO_INFO; }
+  SEISampleAspectRatioInfo() {}
+  virtual ~SEISampleAspectRatioInfo() {}
+  bool                  m_sariCancelFlag;
+  bool                  m_sariPersistenceFlag;
+  int                   m_sariAspectRatioIdc;
+  int                   m_sariSarWidth;
+  int                   m_sariSarHeight;
+};
+
 static const uint32_t ISO_IEC_11578_LEN=16;
 
 class SEIuserDataUnregistered : public SEI
@@ -135,90 +251,138 @@ public:
   PictureHash m_pictureHash;
 };
 
+class SEIDependentRAPIndication : public SEI
+{
+public:
+  PayloadType payloadType() const { return DEPENDENT_RAP_INDICATION; }
+  SEIDependentRAPIndication() { }
+
+  virtual ~SEIDependentRAPIndication() { }
+};
+
+#if HEVC_SEI
 class SEIActiveParameterSets : public SEI
 {
 public:
   PayloadType payloadType() const { return ACTIVE_PARAMETER_SETS; }
 
   SEIActiveParameterSets()
-#if HEVC_VPS
-    : activeVPSId            (0)
-    , m_selfContainedCvsFlag (false)
-#else
     : m_selfContainedCvsFlag(false)
-#endif
     , m_noParameterSetUpdateFlag (false)
     , numSpsIdsMinus1        (0)
   {}
   virtual ~SEIActiveParameterSets() {}
 
-#if HEVC_VPS
-  int activeVPSId;
-#endif
   bool m_selfContainedCvsFlag;
   bool m_noParameterSetUpdateFlag;
   int numSpsIdsMinus1;
   std::vector<int> activeSeqParameterSetId;
 };
+#endif
 
 class SEIBufferingPeriod : public SEI
 {
 public:
   PayloadType payloadType() const { return BUFFERING_PERIOD; }
-  void copyTo (SEIBufferingPeriod& target);
+  void copyTo (SEIBufferingPeriod& target) const;
 
   SEIBufferingPeriod()
-  : m_bpSeqParameterSetId (0)
-  , m_rapCpbParamsPresentFlag (false)
-  , m_cpbDelayOffset      (0)
-  , m_dpbDelayOffset      (0)
+  : m_bpNalCpbParamsPresentFlag (false)
+  , m_bpVclCpbParamsPresentFlag (false)
+  , m_initialCpbRemovalDelayLength (0)
+  , m_cpbRemovalDelayLength (0)
+  , m_dpbOutputDelayLength (0)
+  , m_bpCpbCnt(0)
+  , m_duCpbRemovalDelayIncrementLength (0)
+  , m_dpbOutputDelayDuLength (0)
+  , m_cpbRemovalDelayDeltasPresentFlag (false)
+  , m_numCpbRemovalDelayDeltas (0)
+  , m_bpMaxSubLayers (0)
+  , m_bpDecodingUnitHrdParamsPresentFlag (false)
+  , m_decodingUnitCpbParamsInPicTimingSeiFlag (false)
+    , m_sublayerInitialCpbRemovalDelayPresentFlag(false)
+    , m_additionalConcatenationInfoPresentFlag (false)
+    , m_maxInitialRemovalDelayForConcatenation (0)
+    , m_altCpbParamsPresentFlag (false)
+    , m_useAltCpbParamsFlag (false)
   {
     ::memset(m_initialCpbRemovalDelay, 0, sizeof(m_initialCpbRemovalDelay));
-    ::memset(m_initialCpbRemovalDelayOffset, 0, sizeof(m_initialCpbRemovalDelayOffset));
-    ::memset(m_initialAltCpbRemovalDelay, 0, sizeof(m_initialAltCpbRemovalDelay));
-    ::memset(m_initialAltCpbRemovalDelayOffset, 0, sizeof(m_initialAltCpbRemovalDelayOffset));
+    ::memset(m_initialCpbRemovalOffset, 0, sizeof(m_initialCpbRemovalOffset));
+    ::memset(m_cpbRemovalDelayDelta, 0, sizeof(m_cpbRemovalDelayDelta));
   }
   virtual ~SEIBufferingPeriod() {}
 
-  uint32_t m_bpSeqParameterSetId;
-  bool m_rapCpbParamsPresentFlag;
-  uint32_t m_cpbDelayOffset;
-  uint32_t m_dpbDelayOffset;
-  uint32_t m_initialCpbRemovalDelay         [MAX_CPB_CNT][2];
-  uint32_t m_initialCpbRemovalDelayOffset   [MAX_CPB_CNT][2];
-  uint32_t m_initialAltCpbRemovalDelay      [MAX_CPB_CNT][2];
-  uint32_t m_initialAltCpbRemovalDelayOffset[MAX_CPB_CNT][2];
+  void      setDuCpbRemovalDelayIncrementLength( uint32_t value )        { m_duCpbRemovalDelayIncrementLength = value;        }
+  uint32_t  getDuCpbRemovalDelayIncrementLength( ) const                 { return m_duCpbRemovalDelayIncrementLength;         }
+  void      setDpbOutputDelayDuLength( uint32_t value )                  { m_dpbOutputDelayDuLength = value;                  }
+  uint32_t  getDpbOutputDelayDuLength( ) const                           { return m_dpbOutputDelayDuLength;                   }
+  bool m_bpNalCpbParamsPresentFlag;
+  bool m_bpVclCpbParamsPresentFlag;
+  uint32_t m_initialCpbRemovalDelayLength;
+  uint32_t m_cpbRemovalDelayLength;
+  uint32_t m_dpbOutputDelayLength;
+  int      m_bpCpbCnt;
+  uint32_t m_duCpbRemovalDelayIncrementLength;
+  uint32_t m_dpbOutputDelayDuLength;
+  uint32_t m_initialCpbRemovalDelay         [MAX_TLAYER][MAX_CPB_CNT][2];
+  uint32_t m_initialCpbRemovalOffset        [MAX_TLAYER][MAX_CPB_CNT][2];
   bool m_concatenationFlag;
   uint32_t m_auCpbRemovalDelayDelta;
+  bool m_cpbRemovalDelayDeltasPresentFlag;
+  int  m_numCpbRemovalDelayDeltas;
+  int  m_bpMaxSubLayers;
+  uint32_t m_cpbRemovalDelayDelta    [15];
+  bool m_bpDecodingUnitHrdParamsPresentFlag;
+  bool m_decodingUnitCpbParamsInPicTimingSeiFlag;
+  bool m_sublayerInitialCpbRemovalDelayPresentFlag;
+  bool     m_additionalConcatenationInfoPresentFlag;
+  uint32_t m_maxInitialRemovalDelayForConcatenation;
+  bool     m_altCpbParamsPresentFlag;
+  bool     m_useAltCpbParamsFlag;
 };
+
 class SEIPictureTiming : public SEI
 {
 public:
   PayloadType payloadType() const { return PICTURE_TIMING; }
-  void copyTo (SEIPictureTiming& target);
+  void copyTo (SEIPictureTiming& target) const;
 
   SEIPictureTiming()
-  : m_picStruct               (0)
-  , m_sourceScanType          (0)
-  , m_duplicateFlag           (false)
-  , m_picDpbOutputDuDelay     (0)
-  {}
+  : m_picDpbOutputDelay (0)
+  , m_picDpbOutputDuDelay (0)
+  , m_numDecodingUnitsMinus1 (0)
+  , m_duCommonCpbRemovalDelayFlag (false)
+  , m_cpbAltTimingInfoPresentFlag (false)
+  , m_cpbDelayOffset (0)
+  , m_dpbDelayOffset (0)
+  {
+    ::memset(m_ptSubLayerDelaysPresentFlag, 0, sizeof(m_ptSubLayerDelaysPresentFlag));
+    ::memset(m_duCommonCpbRemovalDelayMinus1, 0, sizeof(m_duCommonCpbRemovalDelayMinus1));
+    ::memset(m_cpbRemovalDelayDeltaEnabledFlag, 0, sizeof(m_cpbRemovalDelayDeltaEnabledFlag));
+    ::memset(m_cpbRemovalDelayDeltaIdx, 0, sizeof(m_cpbRemovalDelayDeltaIdx));
+    ::memset(m_auCpbRemovalDelay, 0, sizeof(m_auCpbRemovalDelay));
+  }
   virtual ~SEIPictureTiming()
   {
   }
 
-  uint32_t  m_picStruct;
-  uint32_t  m_sourceScanType;
-  bool  m_duplicateFlag;
 
-  uint32_t  m_auCpbRemovalDelay;
+  bool  m_ptSubLayerDelaysPresentFlag[MAX_TLAYER];
+  bool  m_cpbRemovalDelayDeltaEnabledFlag[MAX_TLAYER];
+  uint32_t  m_cpbRemovalDelayDeltaIdx[MAX_TLAYER];
+  uint32_t  m_auCpbRemovalDelay[MAX_TLAYER];
   uint32_t  m_picDpbOutputDelay;
   uint32_t  m_picDpbOutputDuDelay;
   uint32_t  m_numDecodingUnitsMinus1;
   bool  m_duCommonCpbRemovalDelayFlag;
-  uint32_t  m_duCommonCpbRemovalDelayMinus1;
+  uint32_t  m_duCommonCpbRemovalDelayMinus1[MAX_TLAYER];
   std::vector<uint32_t> m_numNalusInDuMinus1;
   std::vector<uint32_t> m_duCpbRemovalDelayMinus1;
+  bool     m_cpbAltTimingInfoPresentFlag;
+  std::vector<uint32_t> m_cpbAltInitialCpbRemovalDelayDelta;
+  std::vector<uint32_t> m_cpbAltInitialCpbRemovalOffsetDelta;
+  uint32_t m_cpbDelayOffset;
+  uint32_t m_dpbDelayOffset;
 };
 
 class SEIDecodingUnitInfo : public SEI
@@ -228,30 +392,51 @@ public:
 
   SEIDecodingUnitInfo()
     : m_decodingUnitIdx(0)
-    , m_duSptCpbRemovalDelay(0)
     , m_dpbOutputDuDelayPresentFlag(false)
     , m_picSptDpbOutputDuDelay(0)
-  {}
+  {
+    ::memset(m_duiSubLayerDelaysPresentFlag, 0, sizeof(m_duiSubLayerDelaysPresentFlag));
+    ::memset(m_duSptCpbRemovalDelayIncrement, 0, sizeof(m_duSptCpbRemovalDelayIncrement));
+  }
   virtual ~SEIDecodingUnitInfo() {}
   int m_decodingUnitIdx;
-  int m_duSptCpbRemovalDelay;
+  bool m_duiSubLayerDelaysPresentFlag[MAX_TLAYER];
+  int m_duSptCpbRemovalDelayIncrement[MAX_TLAYER];
   bool m_dpbOutputDuDelayPresentFlag;
   int m_picSptDpbOutputDuDelay;
 };
 
-class SEIRecoveryPoint : public SEI
+
+class SEIFrameFieldInfo : public SEI
 {
 public:
-  PayloadType payloadType() const { return RECOVERY_POINT; }
-
-  SEIRecoveryPoint() {}
-  virtual ~SEIRecoveryPoint() {}
-
-  int  m_recoveryPocCnt;
-  bool m_exactMatchingFlag;
-  bool m_brokenLinkFlag;
+  PayloadType payloadType() const { return FRAME_FIELD_INFO; }
+
+  SEIFrameFieldInfo()
+    : m_fieldPicFlag(false)
+    , m_bottomFieldFlag (false)
+    , m_pairingIndicatedFlag (false)
+    , m_pairedWithNextFieldFlag(false)
+    , m_displayFieldsFromFrameFlag(false)
+    , m_topFieldFirstFlag(false)
+    , m_displayElementalPeriodsMinus1(0)
+    , m_sourceScanType(0)
+    , m_duplicateFlag(false)
+  {}
+  virtual ~SEIFrameFieldInfo() {}
+
+  bool m_fieldPicFlag;
+  bool m_bottomFieldFlag;
+  bool m_pairingIndicatedFlag;
+  bool m_pairedWithNextFieldFlag;
+  bool m_displayFieldsFromFrameFlag;
+  bool m_topFieldFirstFlag;
+  int  m_displayElementalPeriodsMinus1;
+  int  m_sourceScanType;
+  bool m_duplicateFlag;
 };
 
+
 class SEIFramePacking : public SEI
 {
 public:
@@ -280,210 +465,6 @@ public:
   bool m_upsampledAspectRatio;
 };
 
-class SEISegmentedRectFramePacking : public SEI
-{
-public:
-  PayloadType payloadType() const { return SEGM_RECT_FRAME_PACKING; }
-
-  SEISegmentedRectFramePacking() {}
-  virtual ~SEISegmentedRectFramePacking() {}
-
-  bool m_arrangementCancelFlag;
-  int  m_contentInterpretationType;
-  bool m_arrangementPersistenceFlag;
-};
-
-class SEIDisplayOrientation : public SEI
-{
-public:
-  PayloadType payloadType() const { return DISPLAY_ORIENTATION; }
-
-  SEIDisplayOrientation()
-    : cancelFlag(true)
-    , persistenceFlag(0)
-    , extensionFlag(false)
-    {}
-  virtual ~SEIDisplayOrientation() {}
-
-  bool cancelFlag;
-  bool horFlip;
-  bool verFlip;
-
-  uint32_t anticlockwiseRotation;
-  bool persistenceFlag;
-  bool extensionFlag;
-};
-
-class SEITemporalLevel0Index : public SEI
-{
-public:
-  PayloadType payloadType() const { return TEMPORAL_LEVEL0_INDEX; }
-
-  SEITemporalLevel0Index()
-    : tl0Idx(0)
-    , rapIdx(0)
-    {}
-  virtual ~SEITemporalLevel0Index() {}
-
-  uint32_t tl0Idx;
-  uint32_t rapIdx;
-};
-
-class SEIGradualDecodingRefreshInfo : public SEI
-{
-public:
-  PayloadType payloadType() const { return REGION_REFRESH_INFO; }
-
-  SEIGradualDecodingRefreshInfo()
-    : m_gdrForegroundFlag(0)
-  {}
-  virtual ~SEIGradualDecodingRefreshInfo() {}
-
-  bool m_gdrForegroundFlag;
-};
-
-class SEINoDisplay : public SEI
-{
-public:
-  PayloadType payloadType() const { return NO_DISPLAY; }
-
-  SEINoDisplay()
-    : m_noDisplay(false)
-  {}
-  virtual ~SEINoDisplay() {}
-
-  bool m_noDisplay;
-};
-
-class SEISOPDescription : public SEI
-{
-public:
-  PayloadType payloadType() const { return SOP_DESCRIPTION; }
-
-  SEISOPDescription() {}
-  virtual ~SEISOPDescription() {}
-
-  uint32_t m_sopSeqParameterSetId;
-  uint32_t m_numPicsInSopMinus1;
-
-  uint32_t m_sopDescVclNaluType[MAX_NUM_PICS_IN_SOP];
-  uint32_t m_sopDescTemporalId[MAX_NUM_PICS_IN_SOP];
-  uint32_t m_sopDescStRpsIdx[MAX_NUM_PICS_IN_SOP];
-  int m_sopDescPocDelta[MAX_NUM_PICS_IN_SOP];
-};
-
-class SEIToneMappingInfo : public SEI
-{
-public:
-  PayloadType payloadType() const { return TONE_MAPPING_INFO; }
-  SEIToneMappingInfo() {}
-  virtual ~SEIToneMappingInfo() {}
-
-  int    m_toneMapId;
-  bool   m_toneMapCancelFlag;
-  bool   m_toneMapPersistenceFlag;
-  int    m_codedDataBitDepth;
-  int    m_targetBitDepth;
-  int    m_modelId;
-  int    m_minValue;
-  int    m_maxValue;
-  int    m_sigmoidMidpoint;
-  int    m_sigmoidWidth;
-  std::vector<int> m_startOfCodedInterval;
-  int    m_numPivots;
-  std::vector<int> m_codedPivotValue;
-  std::vector<int> m_targetPivotValue;
-  int    m_cameraIsoSpeedIdc;
-  int    m_cameraIsoSpeedValue;
-  int    m_exposureIndexIdc;
-  int    m_exposureIndexValue;
-  bool   m_exposureCompensationValueSignFlag;
-  int    m_exposureCompensationValueNumerator;
-  int    m_exposureCompensationValueDenomIdc;
-  int    m_refScreenLuminanceWhite;
-  int    m_extendedRangeWhiteLevel;
-  int    m_nominalBlackLevelLumaCodeValue;
-  int    m_nominalWhiteLevelLumaCodeValue;
-  int    m_extendedWhiteLevelLumaCodeValue;
-};
-
-class SEIKneeFunctionInfo : public SEI
-{
-public:
-  PayloadType payloadType() const { return KNEE_FUNCTION_INFO; }
-  SEIKneeFunctionInfo() {}
-  virtual ~SEIKneeFunctionInfo() {}
-
-  int   m_kneeId;
-  bool  m_kneeCancelFlag;
-  bool  m_kneePersistenceFlag;
-  int   m_kneeInputDrange;
-  int   m_kneeInputDispLuminance;
-  int   m_kneeOutputDrange;
-  int   m_kneeOutputDispLuminance;
-  int   m_kneeNumKneePointsMinus1;
-  std::vector<int> m_kneeInputKneePoint;
-  std::vector<int> m_kneeOutputKneePoint;
-};
-
-class SEIColourRemappingInfo : public SEI
-{
-public:
-
-  struct CRIlut
-  {
-    int codedValue;
-    int targetValue;
-    bool operator < (const CRIlut& a) const
-    {
-      return codedValue < a.codedValue;
-    }
-  };
-
-  PayloadType payloadType() const { return COLOUR_REMAPPING_INFO; }
-  SEIColourRemappingInfo() {}
-  ~SEIColourRemappingInfo() {}
-
-  void copyFrom( const SEIColourRemappingInfo &seiCriInput)
-  {
-    (*this) = seiCriInput;
-  }
-
-  uint32_t                m_colourRemapId;
-  bool                m_colourRemapCancelFlag;
-  bool                m_colourRemapPersistenceFlag;
-  bool                m_colourRemapVideoSignalInfoPresentFlag;
-  bool                m_colourRemapFullRangeFlag;
-  int                 m_colourRemapPrimaries;
-  int                 m_colourRemapTransferFunction;
-  int                 m_colourRemapMatrixCoefficients;
-  int                 m_colourRemapInputBitDepth;
-  int                 m_colourRemapBitDepth;
-  int                 m_preLutNumValMinus1[3];
-  std::vector<CRIlut> m_preLut[3];
-  bool                m_colourRemapMatrixPresentFlag;
-  int                 m_log2MatrixDenom;
-  int                 m_colourRemapCoeffs[3][3];
-  int                 m_postLutNumValMinus1[3];
-  std::vector<CRIlut> m_postLut[3];
-};
-
-class SEIChromaResamplingFilterHint : public SEI
-{
-public:
-  PayloadType payloadType() const {return CHROMA_RESAMPLING_FILTER_HINT;}
-  SEIChromaResamplingFilterHint() {}
-  virtual ~SEIChromaResamplingFilterHint() {}
-
-  int                            m_verChromaFilterIdc;
-  int                            m_horChromaFilterIdc;
-  bool                           m_verFilteringFieldProcessingFlag;
-  int                            m_targetFormatIdc;
-  bool                           m_perfectReconstructionFlag;
-  std::vector<std::vector<int> > m_verFilterCoeff;
-  std::vector<std::vector<int> > m_horFilterCoeff;
-};
-
 class SEIMasteringDisplayColourVolume : public SEI
 {
 public:
@@ -505,6 +486,7 @@ SEIMessages extractSeisByType(SEIMessages &seiList, SEI::PayloadType seiType);
 /// delete list of SEI messages (freeing the referenced objects)
 void deleteSEIs (SEIMessages &seiList);
 
+#if HEVC_SEI
 class SEIScalableNesting : public SEI
 {
 public:
@@ -543,7 +525,6 @@ public:
   SEITimeSet timeSetArray[MAX_TIMECODE_SEI_SETS];
 };
 
-#if HEVC_TILES_WPP
 //definition according to P1005_v1;
 class SEITempMotionConstrainedTileSets: public SEI
 {
@@ -607,7 +588,7 @@ void xTraceSEIHeader();
 void xTraceSEIMessageType( SEI::PayloadType payloadType );
 #endif
 
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI 
 class SEIAlternativeTransferCharacteristics : public SEI
 {
 public:
@@ -621,20 +602,125 @@ public:
   uint32_t m_preferredTransferCharacteristics;
 };
 #endif
+class SEIUserDataRegistered : public SEI
+{
+public:
+  PayloadType payloadType() const { return USER_DATA_REGISTERED_ITU_T_T35; }
+
+  SEIUserDataRegistered() {}
+  virtual ~SEIUserDataRegistered() {}
+
+  uint16_t m_ituCountryCode;
+  std::vector<uint8_t> m_userData;
+};
+
+class SEIFilmGrainCharacteristics : public SEI
+{
+public:
+  PayloadType payloadType() const { return FILM_GRAIN_CHARACTERISTICS; }
+
+  SEIFilmGrainCharacteristics() {}
+  virtual ~SEIFilmGrainCharacteristics() {}
+
+  bool        m_filmGrainCharacteristicsCancelFlag;
+  uint8_t     m_filmGrainModelId;
+  bool        m_separateColourDescriptionPresentFlag;
+  uint8_t     m_filmGrainBitDepthLumaMinus8;
+  uint8_t     m_filmGrainBitDepthChromaMinus8;
+  bool        m_filmGrainFullRangeFlag;
+  uint8_t     m_filmGrainColourPrimaries;
+  uint8_t     m_filmGrainTransferCharacteristics;
+  uint8_t     m_filmGrainMatrixCoeffs;
+  uint8_t     m_blendingModeId;
+  uint8_t     m_log2ScaleFactor;
+
+  struct CompModelIntensityValues
+  {
+    uint8_t intensityIntervalLowerBound;
+    uint8_t intensityIntervalUpperBound;
+    std::vector<int> compModelValue;
+  };
+
+  struct CompModel
+  {
+    bool  presentFlag;
+    uint8_t numModelValues;
+    std::vector<CompModelIntensityValues> intensityValues;
+  };
+
+  CompModel m_compModel[MAX_NUM_COMPONENT];
+  bool      m_filmGrainCharacteristicsPersistenceFlag;
+};
+
+class SEIContentLightLevelInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return CONTENT_LIGHT_LEVEL_INFO; }
+  SEIContentLightLevelInfo() { }
+
+  virtual ~SEIContentLightLevelInfo() { }
 
-class SEIGreenMetadataInfo : public SEI
+  uint32_t m_maxContentLightLevel;
+  uint32_t m_maxPicAverageLightLevel;
+};
+
+class SEIAmbientViewingEnvironment : public SEI
 {
 public:
-    PayloadType payloadType() const { return GREEN_METADATA; }
-    SEIGreenMetadataInfo() {}
+  PayloadType payloadType() const { return AMBIENT_VIEWING_ENVIRONMENT; }
+  SEIAmbientViewingEnvironment() { }
 
-    virtual ~SEIGreenMetadataInfo() {}
+  virtual ~SEIAmbientViewingEnvironment() { }
 
-    uint32_t m_greenMetadataType;
-    uint32_t m_xsdMetricType;
-    uint32_t m_xsdMetricValue;
+  uint32_t m_ambientIlluminance;
+  uint16_t m_ambientLightX;
+  uint16_t m_ambientLightY;
+};
+
+class SEIContentColourVolume : public SEI
+{
+public:
+  PayloadType payloadType() const { return CONTENT_COLOUR_VOLUME; }
+  SEIContentColourVolume() {}
+  virtual ~SEIContentColourVolume() {}
+
+  bool      m_ccvCancelFlag;
+  bool      m_ccvPersistenceFlag;
+  bool      m_ccvPrimariesPresentFlag;
+  bool      m_ccvMinLuminanceValuePresentFlag;
+  bool      m_ccvMaxLuminanceValuePresentFlag;
+  bool      m_ccvAvgLuminanceValuePresentFlag;
+  int       m_ccvPrimariesX[MAX_NUM_COMPONENT];
+  int       m_ccvPrimariesY[MAX_NUM_COMPONENT];
+  uint32_t  m_ccvMinLuminanceValue;
+  uint32_t  m_ccvMaxLuminanceValue;
+  uint32_t  m_ccvAvgLuminanceValue;
 };
 
 #endif
 
+
+class SEISubpicureLevelInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return SUBPICTURE_LEVEL_INFO; }
+  SEISubpicureLevelInfo()
+  : m_sliSeqParameterSetId(0)
+  , m_numRefLevels(0)
+  , m_explicitFractionPresentFlag (false)
+  {}
+  virtual ~SEISubpicureLevelInfo() {}
+
+  int       m_sliSeqParameterSetId;
+  int       m_numRefLevels;
+  bool      m_explicitFractionPresentFlag;
+  std::vector<Level::Name>      m_refLevelIdc;
+  std::vector<std::vector<int>> m_refLevelFraction;
+};
+
+
+
+
 //! \}
+
+
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
index 7180629c0ae50656bd64a49f2a1633295a61a223..0953b569a4650468f729db30dff7a300d8d78aad 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -112,6 +112,7 @@ const SAOBlkParam& SAOBlkParam::operator= (const SAOBlkParam& src)
 
 SampleAdaptiveOffset::SampleAdaptiveOffset()
 {
+  m_numberOfComponents = 0;
 }
 
 
@@ -291,7 +292,9 @@ void SampleAdaptiveOffset::xReconstructBlkSAOParams(CodingStructure& cs, SAOBlkP
 
 void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng& clpRng, int typeIdx, int* offset
                                           , const Pel* srcBlk, Pel* resBlk, int srcStride, int resStride,  int width, int height
-                                          , bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail)
+                                          , bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail
+                                          , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry
+  )
 {
   int x,y, startX, startY, endX, endY, edgeType;
   int firstLineStartX, firstLineEndX, lastLineStartX, lastLineEndX;
@@ -313,6 +316,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
         for (x=startX; x< endX; x++)
         {
           signRight = (int8_t)sgn(srcLine[x] - srcLine[x+1]);
+          if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos))
+          {
+            signLeft = -signRight;
+            continue;
+          }
           edgeType =  signRight + signLeft;
           signLeft  = -signRight;
 
@@ -351,6 +359,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
         for (x=0; x< width; x++)
         {
           signDown  = (int8_t)sgn(srcLine[x] - srcLineBelow[x]);
+          if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+          {
+            signUpLine[x] = -signDown;
+            continue;
+          }
           edgeType = signDown + signUpLine[x];
           signUpLine[x]= -signDown;
 
@@ -386,6 +399,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
       firstLineEndX   = isAboveAvail? endX: 1;
       for(x= firstLineStartX; x< firstLineEndX; x++)
       {
+        if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+        {
+          continue;
+        }
         edgeType  =  sgn(srcLine[x] - srcLineAbove[x- 1]) - signUpLine[x+1];
 
         resLine[x] = ClipPel<int>( srcLine[x] + offset[edgeType], clpRng);
@@ -402,6 +419,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
         for (x=startX; x<endX; x++)
         {
           signDown =  (int8_t)sgn(srcLine[x] - srcLineBelow[x+ 1]);
+          if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+          {
+            signDownLine[x + 1] = -signDown;
+            continue;
+          }
           edgeType =  signDown + signUpLine[x];
           resLine[x] = ClipPel<int>( srcLine[x] + offset[edgeType], clpRng);
 
@@ -423,6 +445,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
       lastLineEndX   = isBelowRightAvail ? width : (width -1);
       for(x= lastLineStartX; x< lastLineEndX; x++)
       {
+        if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, height - 1, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+        {
+          continue;
+        }
         edgeType =  sgn(srcLine[x] - srcLineBelow[x+ 1]) + signUpLine[x];
         resLine[x] = ClipPel<int>( srcLine[x] + offset[edgeType], clpRng);
 
@@ -451,6 +477,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
       firstLineEndX   = isAboveRightAvail ? width : (width-1);
       for(x= firstLineStartX; x< firstLineEndX; x++)
       {
+        if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+        {
+          continue;
+        }
         edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) -signUpLine[x-1];
         resLine[x] = ClipPel<int>(srcLine[x] + offset[edgeType], clpRng);
       }
@@ -465,6 +495,11 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
         for(x= startX; x< endX; x++)
         {
           signDown =  (int8_t)sgn(srcLine[x] - srcLineBelow[x-1]);
+          if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+          {
+            signUpLine[x - 1] = -signDown;
+            continue;
+          }
           edgeType =  signDown + signUpLine[x];
           resLine[x] = ClipPel<int>(srcLine[x] + offset[edgeType], clpRng);
           signUpLine[x-1] = -signDown;
@@ -480,6 +515,10 @@ void SampleAdaptiveOffset::offsetBlock(const int channelBitDepth, const ClpRng&
       lastLineEndX   = isBelowAvail ? endX : 1;
       for(x= lastLineStartX; x< lastLineEndX; x++)
       {
+        if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, height - 1, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+        {
+          continue;
+        }
         edgeType = sgn(srcLine[x] - srcLineBelow[x-1]) + signUpLine[x];
         resLine[x] = ClipPel<int>(srcLine[x] + offset[edgeType], clpRng);
 
@@ -535,6 +574,12 @@ void SampleAdaptiveOffset::offsetCTU( const UnitArea& area, const CPelUnitBuf& s
     m_signLineBuf2.resize(lineBufferSize);
   }
 
+  int numHorVirBndry = 0, numVerVirBndry = 0;
+  int horVirBndryPos[] = { -1,-1,-1 };
+  int verVirBndryPos[] = { -1,-1,-1 };
+  int horVirBndryPosComp[] = { -1,-1,-1 };
+  int verVirBndryPosComp[] = { -1,-1,-1 };
+  bool isCtuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries(area.Y().x, area.Y().y, area.Y().width, area.Y().height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cs.picHeader );
   for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
   {
     const ComponentID compID = ComponentID(compIdx);
@@ -547,6 +592,14 @@ void SampleAdaptiveOffset::offsetCTU( const UnitArea& area, const CPelUnitBuf& s
       const Pel* srcBlk = src.get(compID).bufAt(compArea);
       int  resStride    = res.get(compID).stride;
       Pel* resBlk       = res.get(compID).bufAt(compArea);
+      for (int i = 0; i < numHorVirBndry; i++)
+      {
+        horVirBndryPosComp[i] = (horVirBndryPos[i] >> ::getComponentScaleY(compID, area.chromaFormat)) - compArea.y;
+      }
+      for (int i = 0; i < numVerVirBndry; i++)
+      {
+        verVirBndryPosComp[i] = (verVirBndryPos[i] >> ::getComponentScaleX(compID, area.chromaFormat)) - compArea.x;
+      }
 
       offsetBlock( cs.sps->getBitDepth(toChannelType(compID)),
                    cs.slice->clpRng(compID),
@@ -556,6 +609,7 @@ void SampleAdaptiveOffset::offsetCTU( const UnitArea& area, const CPelUnitBuf& s
                   , isAboveAvail, isBelowAvail
                   , isAboveLeftAvail, isAboveRightAvail
                   , isBelowLeftAvail, isBelowRightAvail
+                  , isCtuCrossedByVirtualBoundaries, horVirBndryPosComp, verVirBndryPosComp, numHorVirBndry, numVerVirBndry
                   );
     }
   } //compIdx
@@ -608,101 +662,8 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP
   DTRACE    ( g_trace_ctx, D_CRC, "SAO" );
   DTRACE_CRC( g_trace_ctx, D_CRC, cs, cs.getRecoBuf() );
 
-  xPCMLFDisableProcess(cs);
-}
-
-void SampleAdaptiveOffset::xPCMLFDisableProcess(CodingStructure& cs)
-{
-  const PreCalcValues& pcv = *cs.pcv;
-  const bool bPCMFilter = (cs.sps->getPCMEnabledFlag() && cs.sps->getPCMFilterDisableFlag()) ? true : false;
-
-  if( bPCMFilter || cs.pps->getTransquantBypassEnabledFlag() )
-  {
-    for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
-    {
-      for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth )
-      {
-        UnitArea ctuArea( cs.area.chromaFormat, Area( xPos, yPos, pcv.maxCUWidth, pcv.maxCUHeight ) );
-
-        // CU-based deblocking
-        xPCMCURestoration(cs, ctuArea);
-      }
-    }
-  }
 }
 
-void SampleAdaptiveOffset::xPCMCURestoration(CodingStructure& cs, const UnitArea &ctuArea)
-{
-  const SPS& sps = *cs.sps;
-  uint32_t numComponents = CS::isDualITree(cs) ? 1 : m_numberOfComponents;
-  for( auto &cu : cs.traverseCUs( ctuArea, CH_L ) )
-  {
-    // restore PCM samples
-    if( ( cu.ipcm && sps.getPCMFilterDisableFlag() ) || CU::isLosslessCoded( cu ) )
-    {
-
-      for( uint32_t comp = 0; comp < numComponents; comp++ )
-      {
-        xPCMSampleRestoration( cu, ComponentID( comp ) );
-      }
-    }
-  }
-  numComponents = m_numberOfComponents;
-  if (CS::isDualITree(cs) && numComponents)
-  {
-    for (auto &cu : cs.traverseCUs(ctuArea, CH_C))
-    {
-      // restore PCM samples
-      if ((cu.ipcm && sps.getPCMFilterDisableFlag()) || CU::isLosslessCoded(cu))
-      {
-        for (uint32_t comp = 1; comp < numComponents; comp++)
-        {
-          xPCMSampleRestoration(cu, ComponentID(comp));
-        }
-      }
-    }
-  }
-}
-
-void SampleAdaptiveOffset::xPCMSampleRestoration(CodingUnit& cu, const ComponentID compID)
-{
-  const CompArea& ca = cu.block(compID);
-
-  if( CU::isLosslessCoded( cu ) && !cu.ipcm )
-  {
-    for( auto &currTU : CU::traverseTUs( cu ) )
-    {
-      const CPelBuf& pcmBuf = currTU.getPcmbuf( compID );
-             PelBuf dstBuf  = cu.cs->getRecoBuf( currTU.block(compID) );
-
-      dstBuf.copyFrom( pcmBuf );
-      if (cu.slice->getReshapeInfo().getUseSliceReshaper() && isLuma(compID))
-      {
-        dstBuf.rspSignal(m_pcReshape->getInvLUT());
-      }
-    }
-
-    return;
-  }
-
-  const TransformUnit& tu = *cu.firstTU; CHECK( cu.firstTU != cu.lastTU, "Multiple TUs present in a PCM CU" );
-  const CPelBuf& pcmBuf   = tu.getPcmbuf( compID );
-         PelBuf dstBuf    = cu.cs->getRecoBuf( ca );
-  const SPS &sps = *cu.cs->sps;
-  const uint32_t uiPcmLeftShiftBit = sps.getBitDepth(toChannelType(compID)) - sps.getPCMBitDepth(toChannelType(compID));
-
-  for (uint32_t y = 0; y < ca.height; y++)
-  {
-    for (uint32_t x = 0; x < ca.width; x++)
-    {
-      dstBuf.at(x,y) = (pcmBuf.at(x,y) << uiPcmLeftShiftBit);
-    }
-  }
-  if (cu.slice->getReshapeInfo().getUseSliceReshaper() && isLuma(compID))
-  {
-    dstBuf.rspSignal(m_pcReshape->getInvLUT());
-  }
-}
 
 void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos,
   bool& isLeftAvail,
@@ -728,43 +689,30 @@ void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure&
   const CodingUnit* cuBelowRight = cs.getCU(pos.offset(width, height), CH_L);
 
   // check cross slice flags
+  const bool isLoopFilterAcrossSlicePPS = cs.pps->getLoopFilterAcrossSlicesEnabledFlag();
+  if (!isLoopFilterAcrossSlicePPS)
   {
-    //left
-    isLeftAvail       = (cuLeft != NULL)       ? ( !CU::isSameSlice(*cuCurr, *cuLeft)       ? cuCurr->slice->getLFCrossSliceBoundaryFlag()       : true ) : false;
-
-    //above
-    isAboveAvail      = (cuAbove != NULL)      ? ( !CU::isSameSlice(*cuCurr, *cuAbove)      ? cuCurr->slice->getLFCrossSliceBoundaryFlag()       : true ) : false;
-
-    //right
-    isRightAvail      = (cuRight != NULL)      ? ( !CU::isSameSlice(*cuCurr, *cuRight)      ? cuRight->slice->getLFCrossSliceBoundaryFlag()      : true ) : false;
-
-    //below
-    isBelowAvail      = (cuBelow != NULL)      ? ( !CU::isSameSlice(*cuCurr, *cuBelow)      ? cuBelow->slice->getLFCrossSliceBoundaryFlag()      : true ) : false;
-
-    //above-left
-    isAboveLeftAvail  = (cuAboveLeft != NULL)  ? ( !CU::isSameSlice(*cuCurr, *cuAboveLeft)  ? cuCurr->slice->getLFCrossSliceBoundaryFlag()       : true ) : false;
-
-    //below-right
-    isBelowRightAvail = (cuBelowRight != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuBelowRight) ? cuBelowRight->slice->getLFCrossSliceBoundaryFlag() : true ) : false;
-
-    //above-right
-    isAboveRightAvail = false;
-    if (cuAboveRight != NULL)
-    {
-      const bool bLFCrossSliceBoundaryFlag = (cuCurr->slice->getSliceCurStartCtuTsAddr() > cuAboveRight->slice->getSliceCurStartCtuTsAddr()) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : cuAboveRight->slice->getLFCrossSliceBoundaryFlag();
-      isAboveRightAvail = ( !CU::isSameSlice(*cuCurr, *cuAboveRight) ) ? bLFCrossSliceBoundaryFlag : true;
-    }
-
-    //below-left
-    isBelowLeftAvail = false;
-    if (cuBelowLeft != NULL)
-    {
-      const bool bLFCrossSliceBoundaryFlag = (cuCurr->slice->getSliceCurStartCtuTsAddr() > cuBelowLeft->slice->getSliceCurStartCtuTsAddr()) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : cuBelowLeft->slice->getLFCrossSliceBoundaryFlag();
-      isBelowLeftAvail = ( !CU::isSameSlice(*cuCurr, *cuBelowLeft) ) ? bLFCrossSliceBoundaryFlag : true;
-    }
+    isLeftAvail       = (cuLeft == NULL)       ? false : CU::isSameSlice(*cuCurr, *cuLeft);
+    isAboveAvail      = (cuAbove == NULL)      ? false : CU::isSameSlice(*cuCurr, *cuAbove);
+    isRightAvail      = (cuRight == NULL)      ? false : CU::isSameSlice(*cuCurr, *cuRight);
+    isBelowAvail      = (cuBelow == NULL)      ? false : CU::isSameSlice(*cuCurr, *cuBelow);
+    isAboveLeftAvail  = (cuAboveLeft == NULL)  ? false : CU::isSameSlice(*cuCurr, *cuAboveLeft);
+    isAboveRightAvail = (cuAboveRight == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveRight);
+    isBelowLeftAvail  = (cuBelowLeft == NULL)  ? false : CU::isSameSlice(*cuCurr, *cuBelowLeft);
+    isBelowRightAvail = (cuBelowRight == NULL) ? false : CU::isSameSlice(*cuCurr, *cuBelowRight);
+  }
+  else
+  {
+    isLeftAvail       = (cuLeft != NULL);
+    isAboveAvail      = (cuAbove != NULL);
+    isRightAvail      = (cuRight != NULL);
+    isBelowAvail      = (cuBelow != NULL);
+    isAboveLeftAvail  = (cuAboveLeft != NULL);
+    isAboveRightAvail = (cuAboveRight != NULL);
+    isBelowLeftAvail  = (cuBelowLeft != NULL);
+    isBelowRightAvail = (cuBelowRight != NULL);
   }
 
-#if HEVC_TILES_WPP
   // check cross tile flags
   const bool isLoopFilterAcrossTilePPS = cs.pps->getLoopFilterAcrossTilesEnabledFlag();
   if (!isLoopFilterAcrossTilePPS)
@@ -778,7 +726,28 @@ void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure&
     isBelowLeftAvail  = (!isBelowLeftAvail)  ? false : CU::isSameTile(*cuCurr, *cuBelowLeft);
     isBelowRightAvail = (!isBelowRightAvail) ? false : CU::isSameTile(*cuCurr, *cuBelowRight);
   }
-#endif
 }
 
+bool SampleAdaptiveOffset::isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader )
+{
+  numHorVirBndry = 0; numVerVirBndry = 0;
+  if (picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag())
+  {
+    for (int i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++)
+    {
+     if (yPos <= picHeader->getVirtualBoundariesPosY(i) && picHeader->getVirtualBoundariesPosY(i) <= yPos + height)
+      {
+        horVirBndryPos[numHorVirBndry++] = picHeader->getVirtualBoundariesPosY(i);
+      }
+    }
+    for (int i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++)
+    {
+      if (xPos <= picHeader->getVirtualBoundariesPosX(i) && picHeader->getVirtualBoundariesPosX(i) <= xPos + width)
+      {
+        verVirBndryPos[numVerVirBndry++] = picHeader->getVirtualBoundariesPosX(i);
+      }
+    }
+  }
+  return numHorVirBndry > 0 || numVerVirBndry > 0 ;
+}
 //! \}
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.h b/source/Lib/CommonLib/SampleAdaptiveOffset.h
index d3141d56ed63f1141f1eb9d08b7883691ef2364e..b8b47d48f7bb27590bb75e2ed1a8b1a7a0d84cbd 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.h
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -84,15 +84,36 @@ protected:
     ) const;
 
   void offsetBlock(const int channelBitDepth, const ClpRng& clpRng, int typeIdx, int* offset, const Pel* srcBlk, Pel* resBlk, int srcStride, int resStride,  int width, int height
-                  , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail);
+                  , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail
+                  , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry
+    );
   void invertQuantOffsets(ComponentID compIdx, int typeIdc, int typeAuxInfo, int* dstOffsets, int* srcOffsets);
   void reconstructBlkSAOParam(SAOBlkParam& recParam, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES]);
   int  getMergeList(CodingStructure& cs, int ctuRsAddr, SAOBlkParam* blkParams, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES]);
   void offsetCTU(const UnitArea& area, const CPelUnitBuf& src, PelUnitBuf& res, SAOBlkParam& saoblkParam, CodingStructure& cs);
-  void xPCMLFDisableProcess(CodingStructure& cs);
-  void xPCMCURestoration(CodingStructure& cs, const UnitArea &ctuArea);
-  void xPCMSampleRestoration(CodingUnit& cu, const ComponentID compID);
   void xReconstructBlkSAOParams(CodingStructure& cs, SAOBlkParam* saoBlkParams);
+  bool isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader);
+  inline bool isProcessDisabled(int xPos, int yPos, int numVerVirBndry, int numHorVirBndry, int verVirBndryPos[], int horVirBndryPos[])
+  {
+    bool bDisabledFlag = false;
+    for (int i = 0; i < numVerVirBndry; i++)
+    {
+      if ((xPos == verVirBndryPos[i]) || (xPos == verVirBndryPos[i] - 1))
+      {
+        bDisabledFlag = true;
+        break;
+      }
+    }
+    for (int i = 0; i < numHorVirBndry; i++)
+    {
+      if ((yPos == horVirBndryPos[i]) || (yPos == horVirBndryPos[i] - 1))
+      {
+        bDisabledFlag = true;
+        break;
+      }
+    }
+    return bDisabledFlag;
+  }
   Reshape* m_pcReshape;
 protected:
   uint32_t m_offsetStepLog2[MAX_NUM_COMPONENT]; //offset step
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index f6c45dfc631bffffa8465aadf707495e003dc731..4281fecbeb0209d48fb0ccf6fd84590ff62ef433 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -47,97 +47,49 @@
 //! \{
 
 Slice::Slice()
-: m_iPPSId                        ( -1 )
-, m_PicOutputFlag                 ( true )
-, m_iPOC                          ( 0 )
+: m_iPOC                          ( 0 )
 , m_iLastIDR                      ( 0 )
 , m_iAssociatedIRAP               ( 0 )
 , m_iAssociatedIRAPType           ( NAL_UNIT_INVALID )
-, m_pRPS                          ( 0 )
-, m_localRPS                      ( )
-, m_rpsIdx                        ( 0 )
-, m_RefPicListModification        ( )
+, m_rpl0Idx                       ( -1 )
+, m_rpl1Idx                       ( -1 )
 , m_eNalUnitType                  ( NAL_UNIT_CODED_SLICE_IDR_W_RADL )
 , m_eSliceType                    ( I_SLICE )
 , m_iSliceQp                      ( 0 )
-#if HEVC_DEPENDENT_SLICES
-, m_dependentSliceSegmentFlag     ( false )
-#endif
 , m_ChromaQpAdjEnabled            ( false )
 , m_deblockingFilterDisable       ( false )
 , m_deblockingFilterOverrideFlag  ( false )
 , m_deblockingFilterBetaOffsetDiv2( 0 )
 , m_deblockingFilterTcOffsetDiv2  ( 0 )
 , m_pendingRasInit                ( false )
-, m_depQuantEnabledFlag           ( false )
-#if HEVC_USE_SIGN_HIDING
-, m_signDataHidingEnabledFlag     ( false )
-#endif
 , m_bCheckLDC                     ( false )
 , m_biDirPred                    ( false )
 , m_iSliceQpDelta                 ( 0 )
 , m_iDepth                        ( 0 )
-#if HEVC_VPS
-, m_pcVPS                         ( NULL )
-#endif
+, m_dps                           ( nullptr )
 , m_pcSPS                         ( NULL )
 , m_pcPPS                         ( NULL )
 , m_pcPic                         ( NULL )
+, m_pcPicHeader                   ( NULL )
 , m_colFromL0Flag                 ( true )
-, m_noOutputPriorPicsFlag         ( false )
-, m_noRaslOutputFlag              ( false )
+, m_noIncorrectPicOutputFlag      ( false )
 , m_handleCraAsCvsStartFlag            ( false )
 , m_colRefIdx                     ( 0 )
-, m_maxNumMergeCand               ( 0 )
-, m_maxNumAffineMergeCand         ( 0 )
-, m_disFracMMVD                   ( false )
 , m_uiTLayer                      ( 0 )
 , m_bTLayerSwitchingFlag          ( false )
-, m_sliceMode                     ( NO_SLICES )
-, m_sliceArgument                 ( 0 )
-, m_sliceCurStartCtuTsAddr        ( 0 )
-, m_sliceCurEndCtuTsAddr          ( 0 )
 , m_independentSliceIdx           ( 0 )
-#if HEVC_DEPENDENT_SLICES
-, m_sliceSegmentIdx               ( 0 )
-, m_sliceSegmentMode              ( NO_SLICES )
-, m_sliceSegmentArgument          ( 0 )
-, m_sliceSegmentCurStartCtuTsAddr ( 0 )
-, m_sliceSegmentCurEndCtuTsAddr   ( 0 )
-#endif
 , m_nextSlice                     ( false )
-#if HEVC_DEPENDENT_SLICES
-, m_nextSliceSegment              ( false )
-#endif
 , m_sliceBits                     ( 0 )
-#if HEVC_DEPENDENT_SLICES
-, m_sliceSegmentBits              ( 0 )
-#endif
 , m_bFinalized                    ( false )
 , m_bTestWeightPred               ( false )
 , m_bTestWeightBiPred             ( false )
 , m_substreamSizes                ( )
+, m_numEntryPoints                ( 0 )
 , m_cabacInitFlag                 ( false )
-, m_bLMvdL1Zero                   ( false )
-#if !JVET_M0101_HLS
-, m_temporalLayerNonReferenceFlag ( false )
-#endif
-, m_LFCrossSliceBoundaryFlag      ( false )
-, m_enableTMVPFlag                ( true )
+ , m_sliceSubPicId               ( 0 )
 , m_encCABACTableIdx              (I_SLICE)
 , m_iProcessingStartTime          ( 0 )
 , m_dProcessingTime               ( 0 )
-, m_splitConsOverrideFlag         ( false )
-, m_uiMinQTSize                   ( 0 )
-, m_uiMaxBTDepth                  ( 0 )
-, m_uiMaxTTSize                   ( 0 )
-, m_uiMinQTSizeIChroma            ( 0 )
-, m_uiMaxBTDepthIChroma           ( 0 )
-, m_uiMaxBTSizeIChroma            ( 0 )
-, m_uiMaxTTSizeIChroma            ( 0 )
-, m_uiMaxBTSize                   ( 0 )
-, m_apsId                        ( -1 )
-, m_aps                          (NULL)
 {
   for(uint32_t i=0; i<NUM_REF_PIC_LIST_01; i++)
   {
@@ -149,6 +101,7 @@ Slice::Slice()
     m_lambdas            [component] = 0.0;
     m_iSliceChromaQpDelta[component] = 0;
   }
+  m_iSliceChromaQpDelta[JOINT_CbCr] = 0;
 
   initEqualRef();
 
@@ -174,16 +127,14 @@ Slice::Slice()
     m_saoEnabledFlag[ch] = false;
   }
 
-  m_sliceReshapeInfo.setUseSliceReshaper(false);
-  m_sliceReshapeInfo.setSliceReshapeModelPresentFlag(false);
-  m_sliceReshapeInfo.setSliceReshapeChromaAdj(0);
-  m_sliceReshapeInfo.reshaperModelMinBinIdx = 0;
-  m_sliceReshapeInfo.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1;
-  memset(m_sliceReshapeInfo.reshaperModelBinCWDelta, 0, PIC_CODE_CW_BINS * sizeof(int));
+  memset(m_alfApss, 0, sizeof(m_alfApss));
+
+  m_sliceMap.initSliceMap();
 }
 
 Slice::~Slice()
 {
+  m_sliceMap.initSliceMap();
 }
 
 
@@ -208,16 +159,79 @@ void Slice::initSlice()
   {
     m_iSliceChromaQpDelta[component] = 0;
   }
+  m_iSliceChromaQpDelta[JOINT_CbCr] = 0;
 
-  m_maxNumMergeCand = MRG_MAX_NUM_CANDS;
-  m_maxNumAffineMergeCand = AFFINE_MRG_MAX_NUM_CANDS;
 
   m_bFinalized=false;
 
-  m_disFracMMVD          = false;
   m_substreamSizes.clear();
   m_cabacInitFlag        = false;
-  m_enableTMVPFlag       = true;
+  m_enableDRAPSEI        = false;
+  m_useLTforDRAP         = false;
+  m_isDRAP               = false;
+  m_latestDRAPPOC        = MAX_INT;
+  resetTileGroupAlfEnabledFlag();
+}
+
+void Slice::inheritFromPicHeader( PicHeader *picHeader, const PPS *pps, const SPS *sps )
+{ 
+  if(picHeader->getPicRplPresentFlag())
+  {
+    setRPL0idx( picHeader->getRPL0idx() );
+    *getLocalRPL0() = *picHeader->getLocalRPL0();
+    if(getRPL0idx() != -1)
+    {
+      setRPL0(sps->getRPLList0()->getReferencePictureList(getRPL0idx()));
+    }
+    else
+    {
+      setRPL0(getLocalRPL0());
+    }
+    
+    setRPL1idx( picHeader->getRPL1idx() );
+    *getLocalRPL1() = *picHeader->getLocalRPL1();
+    if(getRPL1idx() != -1)
+    {
+      setRPL1(sps->getRPLList1()->getReferencePictureList(getRPL1idx()));
+    }
+    else
+    {
+      setRPL1(getLocalRPL1());
+    }
+  }
+
+  setDeblockingFilterDisable( picHeader->getDeblockingFilterDisable() );
+  setDeblockingFilterBetaOffsetDiv2( picHeader->getDeblockingFilterBetaOffsetDiv2() );
+  setDeblockingFilterTcOffsetDiv2( picHeader->getDeblockingFilterTcOffsetDiv2() );
+
+  setSaoEnabledFlag(CHANNEL_TYPE_LUMA,     picHeader->getSaoEnabledFlag(CHANNEL_TYPE_LUMA));
+  setSaoEnabledFlag(CHANNEL_TYPE_CHROMA,   picHeader->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA));
+
+  setTileGroupAlfEnabledFlag(COMPONENT_Y,  picHeader->getAlfEnabledFlag(COMPONENT_Y));
+  setTileGroupAlfEnabledFlag(COMPONENT_Cb, picHeader->getAlfEnabledFlag(COMPONENT_Cb));
+  setTileGroupAlfEnabledFlag(COMPONENT_Cr, picHeader->getAlfEnabledFlag(COMPONENT_Cr));
+  setTileGroupNumAps(picHeader->getNumAlfAps());
+  setAlfAPSs(picHeader->getAlfAPSs());
+  setTileGroupApsIdChroma(picHeader->getAlfApsIdChroma());   
+}
+
+void  Slice::setNumEntryPoints( const PPS *pps ) 
+{
+  uint32_t ctuAddr, ctuX, ctuY;
+  m_numEntryPoints = 0;
+
+  // count the number of CTUs that align with either the start of a tile, or with an entropy coding sync point
+  // ignore the first CTU since it doesn't count as an entry point
+  for( uint32_t i = 1; i < m_sliceMap.getNumCtuInSlice(); i++ ) 
+  {
+    ctuAddr = m_sliceMap.getCtuAddrInSlice( i );
+    ctuX = ( ctuAddr % pps->getPicWidthInCtu() );
+    ctuY = ( ctuAddr / pps->getPicWidthInCtu() );
+    if( pps->ctuIsTileColBd( ctuX ) && (pps->ctuIsTileRowBd( ctuY ) || pps->getEntropyCodingSyncEnabledFlag() ) ) 
+    {
+      m_numEntryPoints++;
+    }
+  }
 }
 
 void Slice::setDefaultClpRng( const SPS& sps )
@@ -237,11 +251,6 @@ bool Slice::getRapPicFlag() const
 {
   return getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
       || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP
-#if !JVET_M0101_HLS
-      || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP
-      || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-      || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-#endif
     || getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA;
 }
 
@@ -284,14 +293,14 @@ void  Slice::sortPicList        (PicList& rcListPic)
   }
 }
 
-Picture* Slice::xGetRefPic (PicList& rcListPic, int poc)
+Picture* Slice::xGetRefPic( PicList& rcListPic, int poc, const int layerId )
 {
   PicList::iterator  iterPic = rcListPic.begin();
   Picture*           pcPic   = *(iterPic);
 
   while ( iterPic != rcListPic.end() )
   {
-    if(pcPic->getPOC() == poc)
+    if( pcPic->getPOC() == poc && pcPic->layerId == layerId )
     {
       break;
     }
@@ -301,8 +310,7 @@ Picture* Slice::xGetRefPic (PicList& rcListPic, int poc)
   return  pcPic;
 }
 
-
-Picture* Slice::xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb)
+Picture* Slice::xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb, const int layerId )
 {
   PicList::iterator  iterPic = rcListPic.begin();
   Picture*           pcPic   = *(iterPic);
@@ -317,7 +325,7 @@ Picture* Slice::xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb)
   while ( iterPic != rcListPic.end() )
   {
     pcPic = *(iterPic);
-    if (pcPic && pcPic->getPOC()!=this->getPOC() && pcPic->referenced)
+    if( pcPic && pcPic->getPOC() != this->getPOC() && pcPic->referenced && pcPic->layerId == layerId )
     {
       int picPoc = pcPic->getPOC();
       if (!pocHasMsb)
@@ -374,181 +382,85 @@ void Slice::setList1IdxToList0Idx()
   }
 }
 
-void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool bCopyL0toL1ErrorCase )
+void Slice::constructRefPicList(PicList& rcListPic)
 {
-  if ( m_eSliceType == I_SLICE)
+  ::memset(m_bIsUsedAsLongTerm, 0, sizeof(m_bIsUsedAsLongTerm));
+  if (m_eSliceType == I_SLICE)
   {
-    ::memset( m_apcRefPicList, 0, sizeof (m_apcRefPicList));
-    ::memset( m_aiNumRefIdx,   0, sizeof ( m_aiNumRefIdx ));
-
-    if (!checkNumPocTotalCurr)
-    {
-      return;
-    }
+    ::memset(m_apcRefPicList, 0, sizeof(m_apcRefPicList));
+    ::memset(m_aiNumRefIdx, 0, sizeof(m_aiNumRefIdx));
+    return;
   }
 
-  Picture*  pcRefPic= NULL;
-  static const uint32_t MAX_NUM_NEGATIVE_PICTURES=16;
-  Picture*  RefPicSetStCurr0[MAX_NUM_NEGATIVE_PICTURES];
-  Picture*  RefPicSetStCurr1[MAX_NUM_NEGATIVE_PICTURES];
-  Picture*  RefPicSetLtCurr[MAX_NUM_NEGATIVE_PICTURES];
-  uint32_t NumPicStCurr0 = 0;
-  uint32_t NumPicStCurr1 = 0;
-  uint32_t NumPicLtCurr = 0;
-  int i;
+  Picture*  pcRefPic = NULL;
+  uint32_t numOfActiveRef = 0;
+  //construct L0
+  numOfActiveRef = getNumRefIdx(REF_PIC_LIST_0);
+  int layerIdx = m_pcPic->cs->vps == nullptr ? 0 : m_pcPic->cs->vps->getGeneralLayerIdx( m_pcPic->layerId );
 
-  for(i=0; i < m_pRPS->getNumberOfNegativePictures(); i++)
+  for (int ii = 0; ii < numOfActiveRef; ii++)
   {
-    if(m_pRPS->getUsed(i))
+    if( m_pRPL0->isInterLayerRefPic( ii ) )
     {
-      pcRefPic = xGetRefPic(rcListPic, getPOC()+m_pRPS->getDeltaPOC(i));
-      pcRefPic->longTerm = false;
-      pcRefPic->extendPicBorder();
-      RefPicSetStCurr0[NumPicStCurr0] = pcRefPic;
-      NumPicStCurr0++;
-    }
-  }
+      CHECK( m_pRPL0->getInterLayerRefPicIdx( ii ) == NOT_VALID, "Wrong ILRP index" );
 
-  for(; i < m_pRPS->getNumberOfNegativePictures()+m_pRPS->getNumberOfPositivePictures(); i++)
-  {
-    if(m_pRPS->getUsed(i))
-    {
-      pcRefPic = xGetRefPic(rcListPic, getPOC()+m_pRPS->getDeltaPOC(i));
-      pcRefPic->longTerm = false;
-      pcRefPic->extendPicBorder();
-      RefPicSetStCurr1[NumPicStCurr1] = pcRefPic;
-      NumPicStCurr1++;
-    }
-  }
+      int refLayerIdx = m_pcPic->cs->vps->getDirectRefLayerIdx( layerIdx, m_pRPL0->getInterLayerRefPicIdx( ii ) );
 
-  for(i = m_pRPS->getNumberOfNegativePictures()+m_pRPS->getNumberOfPositivePictures()+m_pRPS->getNumberOfLongtermPictures()-1; i > m_pRPS->getNumberOfNegativePictures()+m_pRPS->getNumberOfPositivePictures()-1 ; i--)
-  {
-    if(m_pRPS->getUsed(i))
-    {
-      pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i));
+      pcRefPic = xGetRefPic( rcListPic, getPOC(), refLayerIdx );
       pcRefPic->longTerm = true;
-      pcRefPic->extendPicBorder();
-      RefPicSetLtCurr[NumPicLtCurr] = pcRefPic;
-      NumPicLtCurr++;
     }
-    if(pcRefPic==NULL)
-    {
-      pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i));
-    }
-  }
-  // ref_pic_list_init
-  Picture*  rpsCurrList0[MAX_NUM_REF+1];
-  Picture*  rpsCurrList1[MAX_NUM_REF+1];
-  int numPicTotalCurr = NumPicStCurr0 + NumPicStCurr1 + NumPicLtCurr;
-
-  if (checkNumPocTotalCurr)
-  {
-    // The variable NumPocTotalCurr is derived as specified in subclause 7.4.7.2. It is a requirement of bitstream conformance that the following applies to the value of NumPocTotalCurr:
-    // - If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
-    // - Otherwise, when the current picture contains a P or B slice, the value of NumPocTotalCurr shall not be equal to 0.
-    if (getRapPicFlag())
+    else
+    if (!m_pRPL0->isRefPicLongterm(ii))
     {
-        CHECK(numPicTotalCurr != 0, "Invalid state");
+      pcRefPic = xGetRefPic( rcListPic, getPOC() - m_pRPL0->getRefPicIdentifier( ii ), m_pcPic->layerId );
+      pcRefPic->longTerm = false;
     }
-
-    if (m_eSliceType == I_SLICE)
+    else
     {
-      return;
+      int pocBits = getSPS()->getBitsForPOC();
+      int pocMask = (1 << pocBits) - 1;
+      int ltrpPoc = m_pRPL0->getRefPicIdentifier(ii) & pocMask;
+      ltrpPoc += m_localRPL0.getDeltaPocMSBPresentFlag(ii) ? (pocMask + 1) * m_localRPL0.getDeltaPocMSBCycleLT(ii) : 0;
+      pcRefPic = xGetLongTermRefPic( rcListPic, ltrpPoc, m_localRPL0.getDeltaPocMSBPresentFlag( ii ), m_pcPic->layerId );
+      pcRefPic->longTerm = true;
     }
-
-    CHECK(numPicTotalCurr == 0, "Invalid state");
-    // general tier and level limit:
-    CHECK(numPicTotalCurr > 8, "Invalid state");
+    pcRefPic->extendPicBorder();
+    m_apcRefPicList[REF_PIC_LIST_0][ii] = pcRefPic;
+    m_bIsUsedAsLongTerm[REF_PIC_LIST_0][ii] = pcRefPic->longTerm;
   }
 
-  int cIdx = 0;
-  for ( i=0; i<NumPicStCurr0; i++, cIdx++)
+  //construct L1
+  numOfActiveRef = getNumRefIdx(REF_PIC_LIST_1);
+  for (int ii = 0; ii < numOfActiveRef; ii++)
   {
-    rpsCurrList0[cIdx] = RefPicSetStCurr0[i];
-  }
-  for ( i=0; i<NumPicStCurr1; i++, cIdx++)
-  {
-    rpsCurrList0[cIdx] = RefPicSetStCurr1[i];
-  }
-  for ( i=0; i<NumPicLtCurr;  i++, cIdx++)
-  {
-    rpsCurrList0[cIdx] = RefPicSetLtCurr[i];
-  }
-  CHECK(cIdx != numPicTotalCurr, "Invalid state");
-
-  if (m_eSliceType==B_SLICE)
-  {
-    cIdx = 0;
-    for ( i=0; i<NumPicStCurr1; i++, cIdx++)
-    {
-      rpsCurrList1[cIdx] = RefPicSetStCurr1[i];
-    }
-    for ( i=0; i<NumPicStCurr0; i++, cIdx++)
-    {
-      rpsCurrList1[cIdx] = RefPicSetStCurr0[i];
-    }
-    for ( i=0; i<NumPicLtCurr;  i++, cIdx++)
+    if( m_pRPL1->isInterLayerRefPic( ii ) )
     {
-      rpsCurrList1[cIdx] = RefPicSetLtCurr[i];
-    }
-    CHECK(cIdx != numPicTotalCurr, "Invalid state");
-  }
+      CHECK( m_pRPL1->getInterLayerRefPicIdx( ii ) == NOT_VALID, "Wrong ILRP index" );
 
-  ::memset(m_bIsUsedAsLongTerm, 0, sizeof(m_bIsUsedAsLongTerm));
+      int refLayerIdx = m_pcPic->cs->vps->getDirectRefLayerIdx( layerIdx, m_pRPL1->getInterLayerRefPicIdx( ii ) );
 
-  for (int rIdx = 0; rIdx < m_aiNumRefIdx[REF_PIC_LIST_0]; rIdx ++)
-  {
-    cIdx = m_RefPicListModification.getRefPicListModificationFlagL0() ? m_RefPicListModification.getRefPicSetIdxL0(rIdx) : rIdx % numPicTotalCurr;
-    CHECK(cIdx < 0 || cIdx >= numPicTotalCurr, "Invalid state");
-    m_apcRefPicList[REF_PIC_LIST_0][rIdx] = rpsCurrList0[ cIdx ];
-    m_bIsUsedAsLongTerm[REF_PIC_LIST_0][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 );
-  }
-  if ( m_eSliceType != B_SLICE )
-  {
-    m_aiNumRefIdx[REF_PIC_LIST_1] = 0;
-    ::memset( m_apcRefPicList[REF_PIC_LIST_1], 0, sizeof(m_apcRefPicList[REF_PIC_LIST_1]));
-  }
-  else
-  {
-    for (int rIdx = 0; rIdx < m_aiNumRefIdx[REF_PIC_LIST_1]; rIdx ++)
-    {
-      cIdx = m_RefPicListModification.getRefPicListModificationFlagL1() ? m_RefPicListModification.getRefPicSetIdxL1(rIdx) : rIdx % numPicTotalCurr;
-      CHECK(cIdx < 0 || cIdx >= numPicTotalCurr, "Invalid state");
-      m_apcRefPicList[REF_PIC_LIST_1][rIdx] = rpsCurrList1[ cIdx ];
-      m_bIsUsedAsLongTerm[REF_PIC_LIST_1][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 );
+      pcRefPic = xGetRefPic( rcListPic, getPOC(), refLayerIdx );
+      pcRefPic->longTerm = true;
     }
-  }
-    // For generalized B
-  // note: maybe not existed case (always L0 is copied to L1 if L1 is empty)
-  if( bCopyL0toL1ErrorCase && isInterB() && getNumRefIdx(REF_PIC_LIST_1) == 0)
-  {
-    int iNumRefIdx = getNumRefIdx(REF_PIC_LIST_0);
-    setNumRefIdx( REF_PIC_LIST_1, iNumRefIdx );
-
-    for (int iRefIdx = 0; iRefIdx < iNumRefIdx; iRefIdx++)
+    else
+    if (!m_pRPL1->isRefPicLongterm(ii))
     {
-      m_apcRefPicList[REF_PIC_LIST_1][iRefIdx] = m_apcRefPicList[REF_PIC_LIST_0] [iRefIdx];
+      pcRefPic = xGetRefPic( rcListPic, getPOC() - m_pRPL1->getRefPicIdentifier( ii ), m_pcPic->layerId );
+      pcRefPic->longTerm = false;
     }
-  }
-}
-
-
-int Slice::getNumRpsCurrTempList() const
-{
-  int numRpsCurrTempList = 0;
-
-  if (m_eSliceType == I_SLICE)
-  {
-    return 0;
-  }
-  for(uint32_t i=0; i < m_pRPS->getNumberOfNegativePictures()+ m_pRPS->getNumberOfPositivePictures() + m_pRPS->getNumberOfLongtermPictures(); i++)
-  {
-    if(m_pRPS->getUsed(i))
+    else
     {
-      numRpsCurrTempList++;
+      int pocBits = getSPS()->getBitsForPOC();
+      int pocMask = (1 << pocBits) - 1;
+      int ltrpPoc = m_pRPL1->getRefPicIdentifier(ii) & pocMask;
+      ltrpPoc += m_localRPL1.getDeltaPocMSBPresentFlag(ii) ? (pocMask + 1) * m_localRPL1.getDeltaPocMSBCycleLT(ii) : 0;
+      pcRefPic = xGetLongTermRefPic( rcListPic, ltrpPoc, m_localRPL1.getDeltaPocMSBPresentFlag( ii ), m_pcPic->layerId );
+      pcRefPic->longTerm = true;
     }
+    pcRefPic->extendPicBorder();
+    m_apcRefPicList[REF_PIC_LIST_1][ii] = pcRefPic;
+    m_bIsUsedAsLongTerm[REF_PIC_LIST_1][ii] = pcRefPic->longTerm;
   }
-    return numRpsCurrTempList;
 }
 
 void Slice::initEqualRef()
@@ -589,50 +501,108 @@ void Slice::checkColRefIdx(uint32_t curSliceSegmentIdx, const Picture* pic)
   }
 }
 
-void Slice::checkCRA(const ReferencePictureSet *pReferencePictureSet, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic)
+void Slice::checkCRA(const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic)
 {
-  for(int i = 0; i < pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures(); i++)
+  if (pocCRA < MAX_UINT && getPOC() > pocCRA)
   {
-    if(pocCRA < MAX_UINT && getPOC() > pocCRA)
+    uint32_t numRefPic = pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures();
+    for (int i = 0; i < numRefPic; i++)
     {
-      CHECK(getPOC()+pReferencePictureSet->getDeltaPOC(i) < pocCRA, "Invalid state");
+      if (!pRPL0->isRefPicLongterm(i))
+      {
+        CHECK(getPOC() - pRPL0->getRefPicIdentifier(i) < pocCRA, "Invalid state");
+      }
+      else
+      {
+        CHECK( xGetLongTermRefPic( rcListPic, pRPL0->getRefPicIdentifier( i ), pRPL0->getDeltaPocMSBPresentFlag( i ), m_pcPic->layerId )->getPOC() < pocCRA, "Invalid state" );
+      }
     }
-  }
-  for(int i = pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures(); i < pReferencePictureSet->getNumberOfPictures(); i++)
-  {
-    if(pocCRA < MAX_UINT && getPOC() > pocCRA)
+    numRefPic = pRPL1->getNumberOfShorttermPictures() + pRPL1->getNumberOfLongtermPictures();
+    for (int i = 0; i < numRefPic; i++)
     {
-      if (!pReferencePictureSet->getCheckLTMSBPresent(i))
+      if (!pRPL1->isRefPicLongterm(i))
       {
-        CHECK(xGetLongTermRefPic(rcListPic, pReferencePictureSet->getPOC(i), false)->getPOC() < pocCRA, "Invalid state");
+        CHECK(getPOC() - pRPL1->getRefPicIdentifier(i) < pocCRA, "Invalid state");
       }
-      else
+      else if( !pRPL1->isInterLayerRefPic( i ) )
       {
-        CHECK(pReferencePictureSet->getPOC(i) < pocCRA, "Invalid state");
+        CHECK( xGetLongTermRefPic( rcListPic, pRPL1->getRefPicIdentifier( i ), pRPL1->getDeltaPocMSBPresentFlag( i ), m_pcPic->layerId )->getPOC() < pocCRA, "Invalid state" );
       }
     }
   }
-  if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) // IDR picture found
+  if (getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP) // IDR picture found
   {
     pocCRA = getPOC();
     associatedIRAPType = getNalUnitType();
   }
-  else if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) // CRA picture found
+  else if (getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) // CRA picture found
   {
     pocCRA = getPOC();
     associatedIRAPType = getNalUnitType();
   }
-#if !JVET_M0101_HLS
-  else if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-         || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-         || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP ) // BLA picture found
+}
+
+void Slice::checkSTSA(PicList& rcListPic)
+{
+  int ii;
+  Picture* pcRefPic = NULL;
+  int numOfActiveRef = getNumRefIdx(REF_PIC_LIST_0);
+
+  for (ii = 0; ii < numOfActiveRef; ii++)
   {
-    pocCRA = getPOC();
-    associatedIRAPType = getNalUnitType();
+    pcRefPic = m_apcRefPicList[REF_PIC_LIST_0][ii];
+
+    // Checking this: "When the current picture is an STSA picture, there shall be no active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that has TemporalId equal to that of the current picture"
+    if (getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA)
+    {
+      CHECK(pcRefPic->layer == m_uiTLayer, "When the current picture is an STSA picture, there shall be no active entry in the RPL that has TemporalId equal to that of the current picture");
+    }
+
+    // Checking this: "When the current picture is a picture that follows, in decoding order, an STSA picture that has TemporalId equal to that of the current picture, there shall be no
+    // picture that has TemporalId equal to that of the current picture included as an active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that precedes the STSA picture in decoding order."
+    CHECK(pcRefPic->subLayerNonReferencePictureDueToSTSA, "The RPL of the current picture contains a picture that is not allowed in this temporal layer due to an earlier STSA picture");
+  }
+
+  numOfActiveRef = getNumRefIdx(REF_PIC_LIST_1);
+  for (ii = 0; ii < numOfActiveRef; ii++)
+  {
+    pcRefPic = m_apcRefPicList[REF_PIC_LIST_1][ii];
+
+    // Checking this: "When the current picture is an STSA picture, there shall be no active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that has TemporalId equal to that of the current picture"
+    if (getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA)
+    {
+      CHECK(pcRefPic->layer == m_uiTLayer, "When the current picture is an STSA picture, there shall be no active entry in the RPL that has TemporalId equal to that of the current picture");
+    }
+
+    // Checking this: "When the current picture is a picture that follows, in decoding order, an STSA picture that has TemporalId equal to that of the current picture, there shall be no
+    // picture that has TemporalId equal to that of the current picture included as an active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that precedes the STSA picture in decoding order."
+    CHECK(pcRefPic->subLayerNonReferencePictureDueToSTSA, "The active RPL part of the current picture contains a picture that is not allowed in this temporal layer due to an earlier STSA picture");
+  }
+
+  // If the current picture is an STSA picture, make all reference pictures in the DPB with temporal
+  // id equal to the temproal id of the current picture sub-layer non-reference pictures. The flag
+  // subLayerNonReferencePictureDueToSTSA equal to true means that the picture may not be used for
+  // reference by a picture that follows the current STSA picture in decoding order
+  if (getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA)
+  {
+    PicList::iterator iterPic = rcListPic.begin();
+    while (iterPic != rcListPic.end())
+    {
+      pcRefPic = *(iterPic++);
+      if (!pcRefPic->referenced || pcRefPic->getPOC() == m_iPOC)
+      {
+        continue;
+      }
+
+      if (pcRefPic->layer == m_uiTLayer)
+      {
+        pcRefPic->subLayerNonReferencePictureDueToSTSA = true;
+      }
+    }
   }
-#endif
 }
 
+
 /** Function for marking the reference pictures when an IDR/CRA/CRANT/BLA/BLANT is encountered.
  * \param pocCRA POC of the CRA/CRANT/BLA/BLANT picture
  * \param bRefreshPending flag indicating if a deferred decoding refresh is pending
@@ -656,16 +626,8 @@ void Slice::decodingRefreshMarking(int& pocCRA, bool& bRefreshPending, PicList&
   Picture* rpcPic;
   int      pocCurr = getPOC();
 
-#if !JVET_M0101_HLS
-  if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-    || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-    || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP
-    || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-    || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP )  // IDR or BLA picture
-#else
   if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
     || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP)  // IDR picture
-#endif
   {
     // mark all pictures as not used for reference
     PicList::iterator        iterPic       = rcListPic.begin();
@@ -679,14 +641,6 @@ void Slice::decodingRefreshMarking(int& pocCRA, bool& bRefreshPending, PicList&
       }
       iterPic++;
     }
-#if !JVET_M0101_HLS
-    if ( getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-      || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-      || getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP )
-    {
-      pocCRA = pocCurr;
-    }
-#endif
     if (bEfficientFieldIRAPEnabled)
     {
       bRefreshPending = true;
@@ -776,6 +730,8 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   {
     m_iSliceChromaQpDelta[component] = pSrc->m_iSliceChromaQpDelta[component];
   }
+  m_iSliceChromaQpDelta[JOINT_CbCr] = pSrc->m_iSliceChromaQpDelta[JOINT_CbCr];
+
   for (i = 0; i < NUM_REF_PIC_LIST_01; i++)
   {
     for (j = 0; j < MAX_NUM_REF; j++)
@@ -789,11 +745,13 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   if( cpyAlmostAll ) m_iDepth = pSrc->m_iDepth;
 
   // access channel
-  if( cpyAlmostAll ) m_pRPS   = pSrc->m_pRPS;
+  if (cpyAlmostAll) m_pRPL0 = pSrc->m_pRPL0;
+  if (cpyAlmostAll) m_pRPL1 = pSrc->m_pRPL1;
   m_iLastIDR             = pSrc->m_iLastIDR;
 
   if( cpyAlmostAll ) m_pcPic  = pSrc->m_pcPic;
 
+  m_pcPicHeader          = pSrc->m_pcPicHeader;
   m_colFromL0Flag        = pSrc->m_colFromL0Flag;
   m_colRefIdx            = pSrc->m_colRefIdx;
 
@@ -813,22 +771,9 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   m_uiTLayer                      = pSrc->m_uiTLayer;
   m_bTLayerSwitchingFlag          = pSrc->m_bTLayerSwitchingFlag;
 
-  m_sliceMode                     = pSrc->m_sliceMode;
-  m_sliceArgument                 = pSrc->m_sliceArgument;
-  m_sliceCurStartCtuTsAddr        = pSrc->m_sliceCurStartCtuTsAddr;
-  m_sliceCurEndCtuTsAddr          = pSrc->m_sliceCurEndCtuTsAddr;
+  m_sliceMap                      = pSrc->m_sliceMap;
   m_independentSliceIdx           = pSrc->m_independentSliceIdx;
-#if HEVC_DEPENDENT_SLICES
-  m_sliceSegmentIdx               = pSrc->m_sliceSegmentIdx;
-  m_sliceSegmentMode              = pSrc->m_sliceSegmentMode;
-  m_sliceSegmentArgument          = pSrc->m_sliceSegmentArgument;
-  m_sliceSegmentCurStartCtuTsAddr = pSrc->m_sliceSegmentCurStartCtuTsAddr;
-  m_sliceSegmentCurEndCtuTsAddr   = pSrc->m_sliceSegmentCurEndCtuTsAddr;
-#endif
   m_nextSlice                     = pSrc->m_nextSlice;
-#if HEVC_DEPENDENT_SLICES
-  m_nextSliceSegment              = pSrc->m_nextSliceSegment;
-#endif
   m_clpRngs                       = pSrc->m_clpRngs;
   m_pendingRasInit                = pSrc->m_pendingRasInit;
 
@@ -846,28 +791,21 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   }
 
   m_cabacInitFlag                 = pSrc->m_cabacInitFlag;
+  memcpy(m_alfApss, pSrc->m_alfApss, sizeof(m_alfApss)); // this might be quite unsafe
+  memcpy( m_tileGroupAlfEnabledFlag, pSrc->m_tileGroupAlfEnabledFlag, sizeof(m_tileGroupAlfEnabledFlag));
+  m_tileGroupNumAps               = pSrc->m_tileGroupNumAps;
+  m_tileGroupLumaApsId            = pSrc->m_tileGroupLumaApsId;
+  m_tileGroupChromaApsId          = pSrc->m_tileGroupChromaApsId;
+  m_disableSATDForRd              = pSrc->m_disableSATDForRd;
 
-  m_bLMvdL1Zero                   = pSrc->m_bLMvdL1Zero;
-  m_LFCrossSliceBoundaryFlag      = pSrc->m_LFCrossSliceBoundaryFlag;
-  m_enableTMVPFlag                = pSrc->m_enableTMVPFlag;
-  m_maxNumMergeCand               = pSrc->m_maxNumMergeCand;
-  m_maxNumAffineMergeCand         = pSrc->m_maxNumAffineMergeCand;
-  m_disFracMMVD                   = pSrc->m_disFracMMVD;
   if( cpyAlmostAll ) m_encCABACTableIdx  = pSrc->m_encCABACTableIdx;
-  m_splitConsOverrideFlag         = pSrc->m_splitConsOverrideFlag;
-  m_uiMinQTSize                   = pSrc->m_uiMinQTSize;
-  m_uiMaxBTDepth                  = pSrc->m_uiMaxBTDepth;
-  m_uiMaxTTSize                   = pSrc->m_uiMaxTTSize;
-  m_uiMinQTSizeIChroma            = pSrc->m_uiMinQTSizeIChroma;
-  m_uiMaxBTDepthIChroma           = pSrc->m_uiMaxBTDepthIChroma;
-  m_uiMaxBTSizeIChroma            = pSrc->m_uiMaxBTSizeIChroma;
-  m_uiMaxTTSizeIChroma            = pSrc->m_uiMaxTTSizeIChroma;
-  m_uiMaxBTSize                   = pSrc->m_uiMaxBTSize;
-
-  m_depQuantEnabledFlag           = pSrc->m_depQuantEnabledFlag;
-  m_signDataHidingEnabledFlag     = pSrc->m_signDataHidingEnabledFlag;
-
-  m_sliceReshapeInfo              = pSrc->m_sliceReshapeInfo;
+  for( int i = 0; i < NUM_REF_PIC_LIST_01; i ++ )
+  {
+    for (int j = 0; j < MAX_NUM_REF_PICS; j ++ )
+    {
+      m_scalingRatio[i][j]          = pSrc->m_scalingRatio[i][j];
+    }
+  }
 }
 
 
@@ -899,7 +837,7 @@ bool Slice::isStepwiseTemporalLayerSwitchingPointCandidate(PicList& rcListPic) c
   while ( iterPic != rcListPic.end())
   {
     const Picture* pcPic = *(iterPic++);
-    if( pcPic->referenced &&  pcPic->usedByCurr && pcPic->poc != getPOC())
+    if( pcPic->referenced && pcPic->poc != getPOC())
     {
       if( pcPic->layer >= getTLayer())
       {
@@ -919,59 +857,25 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
   if(this->getAssociatedIRAPPOC() > this->getPOC())
   {
     // Do not check IRAP pictures since they may get a POC lower than their associated IRAP
-#if !JVET_M0101_HLS
-    if(nalUnitType < NAL_UNIT_CODED_SLICE_BLA_W_LP ||
-       nalUnitType > NAL_UNIT_RESERVED_IRAP_VCL23)
-#else
     if (nalUnitType < NAL_UNIT_CODED_SLICE_IDR_W_RADL ||
-        nalUnitType > NAL_UNIT_RESERVED_IRAP_VCL13)
-#endif
+        nalUnitType > NAL_UNIT_CODED_SLICE_CRA)
     {
-#if !JVET_M0101_HLS
-      CHECK( nalUnitType != NAL_UNIT_CODED_SLICE_RASL_N &&
-             nalUnitType != NAL_UNIT_CODED_SLICE_RASL_R &&
-             nalUnitType != NAL_UNIT_CODED_SLICE_RADL_N &&
-             nalUnitType != NAL_UNIT_CODED_SLICE_RADL_R, "Invalid NAL unit type");
-#else
       CHECK(nalUnitType != NAL_UNIT_CODED_SLICE_RASL &&
             nalUnitType != NAL_UNIT_CODED_SLICE_RADL, "Invalid NAL unit type");
-#endif
     }
   }
 
   // When a picture is a trailing picture, it shall not be a RADL or RASL picture.
   if(this->getAssociatedIRAPPOC() < this->getPOC())
   {
-#if !JVET_M0101_HLS
-    CHECK( nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N ||
-           nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R ||
-           nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N ||
-           nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R, "Invalid NAL unit type" );
-#else
     CHECK(nalUnitType == NAL_UNIT_CODED_SLICE_RASL ||
           nalUnitType == NAL_UNIT_CODED_SLICE_RADL, "Invalid NAL unit type");
-#endif
   }
 
-#if !JVET_M0101_HLS
-  // No RASL pictures shall be present in the bitstream that are associated
-  // with a BLA picture having nal_unit_type equal to BLA_W_RADL or BLA_N_LP.
-  if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N ||
-     nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R)
-  {
-    CHECK (this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL ||
-           this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_N_LP, "Invalid NAL unit type");
-  }
-#endif
 
   // No RASL pictures shall be present in the bitstream that are associated with
   // an IDR picture.
-#if !JVET_M0101_HLS
-  if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N ||
-     nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R)
-#else
   if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL)
-#endif
   {
     CHECK( this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_N_LP   ||
            this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL, "Invalid NAL unit type");
@@ -980,23 +884,14 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
   // No RADL pictures shall be present in the bitstream that are associated with
   // a BLA picture having nal_unit_type equal to BLA_N_LP or that are associated
   // with an IDR picture having nal_unit_type equal to IDR_N_LP.
-#if !JVET_M0101_HLS
-  if(nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N ||
-     nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R)
-#else
   if (nalUnitType == NAL_UNIT_CODED_SLICE_RADL)
-#endif
   {
-#if !JVET_M0101_HLS
-    CHECK (this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_N_LP   ||
-           this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_N_LP, "Invalid NAL unit type");
-#else
     CHECK (this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_IDR_N_LP, "Invalid NAL unit type");
-#endif
   }
 
   // loop through all pictures in the reference picture buffer
   PicList::iterator iterPic = rcListPic.begin();
+  int numLeadingPicsFound = 0;
   while ( iterPic != rcListPic.end())
   {
     Picture* pcPic = *(iterPic++);
@@ -1013,20 +908,11 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
     // Any picture that has PicOutputFlag equal to 1 that precedes an IRAP picture
     // in decoding order shall precede the IRAP picture in output order.
     // (Note that any picture following in output order would be present in the DPB)
-    if(pcSlice->getPicOutputFlag() == 1 && !this->getNoOutputPriorPicsFlag())
-    {
-#if !JVET_M0101_HLS
-      if(nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP    ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP    ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL  ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_CRA         ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP    ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL)
-#else
+    if(pcSlice->getPicHeader()->getPicOutputFlag() == 1 && !this->getPicHeader()->getNoOutputOfPriorPicsFlag())
+    {
       if (nalUnitType == NAL_UNIT_CODED_SLICE_CRA ||
           nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP ||
           nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL)
-#endif
       {
         CHECK(pcPic->poc >= this->getPOC(), "Invalid POC");
       }
@@ -1035,14 +921,9 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
     // Any picture that has PicOutputFlag equal to 1 that precedes an IRAP picture
     // in decoding order shall precede any RADL picture associated with the IRAP
     // picture in output order.
-    if(pcSlice->getPicOutputFlag() == 1)
+    if(pcSlice->getPicHeader()->getPicOutputFlag() == 1)
     {
-#if !JVET_M0101_HLS
-      if((nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N ||
-          nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R))
-#else
       if (nalUnitType == NAL_UNIT_CODED_SLICE_RADL)
-#endif
       {
         // rpcPic precedes the IRAP in decoding order
         if(this->getAssociatedIRAPPOC() > pcSlice->getAssociatedIRAPPOC())
@@ -1058,49 +939,27 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
 
     // When a picture is a leading picture, it shall precede, in decoding order,
     // all trailing pictures that are associated with the same IRAP picture.
-#if !JVET_M0101_HLS
-    if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N ||
-         nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R)
-#else
-    if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL ||
-        nalUnitType == NAL_UNIT_CODED_SLICE_RADL )
-#endif
+    if ((nalUnitType == NAL_UNIT_CODED_SLICE_RASL || nalUnitType == NAL_UNIT_CODED_SLICE_RADL) &&
+        (pcSlice->getNalUnitType() != NAL_UNIT_CODED_SLICE_RASL && pcSlice->getNalUnitType() != NAL_UNIT_CODED_SLICE_RADL)  )
+    {
+      if (pcSlice->getAssociatedIRAPPOC() == this->getAssociatedIRAPPOC())
       {
-        if(pcSlice->getAssociatedIRAPPOC() == this->getAssociatedIRAPPOC())
-        {
-          // rpcPic is a picture that preceded the leading in decoding order since it exist in the DPB
-          // rpcPic would violate the constraint if it was a trailing picture
-          CHECK( pcPic->poc > this->getAssociatedIRAPPOC(), "Invalid POC");
-        }
+        numLeadingPicsFound++;
+        int limitNonLP = 0;
+        if (pcSlice->getSPS()->getVuiParameters() && pcSlice->getSPS()->getVuiParameters()->getFieldSeqFlag())
+          limitNonLP = 1;
+        CHECK(pcPic->poc > this->getAssociatedIRAPPOC() && numLeadingPicsFound > limitNonLP, "Invalid POC");
       }
+    }
 
     // Any RASL picture associated with a CRA or BLA picture shall precede any
     // RADL picture associated with the CRA or BLA picture in output order
-#if !JVET_M0101_HLS
-    if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N ||
-       nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R)
-#else
     if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL)
-#endif
     {
-#if !JVET_M0101_HLS
-      if((this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_N_LP   ||
-          this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_W_LP   ||
-          this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL ||
-          this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_CRA) &&
-#else
       if ((this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_CRA) &&
-#endif
           this->getAssociatedIRAPPOC() == pcSlice->getAssociatedIRAPPOC())
       {
-#if !JVET_M0101_HLS
-        if(pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_N ||
-           pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_R)
-#else
         if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL)
-#endif
         {
           CHECK( pcPic->poc <= this->getPOC(), "Invalid POC");
         }
@@ -1109,22 +968,12 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
 
     // Any RASL picture associated with a CRA picture shall follow, in output
     // order, any IRAP picture that precedes the CRA picture in decoding order.
-#if !JVET_M0101_HLS
-    if(nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N ||
-       nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R)
-#else
     if (nalUnitType == NAL_UNIT_CODED_SLICE_RASL)
-#endif
     {
       if(this->getAssociatedIRAPType() == NAL_UNIT_CODED_SLICE_CRA)
       {
-        if(pcSlice->getPOC() < this->getAssociatedIRAPPOC() && 
+        if(pcSlice->getPOC() < this->getAssociatedIRAPPOC() &&
           (
-#if !JVET_M0101_HLS
-            pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP   ||
-            pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP   ||
-            pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL ||
-#endif
             pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP   ||
             pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL ||
             pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA))
@@ -1138,455 +987,409 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic) const
 
 
 
-/** Function for applying picture marking based on the Reference Picture Set in pReferencePictureSet.
-*/
-void Slice::applyReferencePictureSet( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet) const
+
+//Function for applying picture marking based on the Reference Picture List
+void Slice::applyReferencePictureListBasedMarking( PicList& rcListPic, const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, const int layerId ) const
 {
   int i, isReference;
-
   checkLeadingPictureRestrictions(rcListPic);
 
+  bool isNeedToCheck = (this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL) ? false : true;
+
   // loop through all pictures in the reference picture buffer
   PicList::iterator iterPic = rcListPic.begin();
-  while ( iterPic != rcListPic.end())
+  while (iterPic != rcListPic.end())
   {
     Picture* pcPic = *(iterPic++);
 
-    if( ! pcPic->referenced)
-    {
+    if (!pcPic->referenced)
       continue;
-    }
 
     isReference = 0;
     // loop through all pictures in the Reference Picture Set
     // to see if the picture should be kept as reference picture
-    for(i=0;i<pReferencePictureSet->getNumberOfPositivePictures()+pReferencePictureSet->getNumberOfNegativePictures();i++)
+    for( i = 0; isNeedToCheck && !isReference && i < pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures() + pRPL0->getNumberOfInterLayerPictures(); i++ )
     {
-      if( ! pcPic->longTerm && pcPic->poc == this->getPOC() + pReferencePictureSet->getDeltaPOC(i))
+      if( pRPL0->isInterLayerRefPic( i ) )
+      {
+        // Diagonal inter-layer prediction is not allowed
+        CHECK( pRPL0->getRefPicIdentifier( i ), "ILRP identifier should be 0" );
+
+        if( pcPic->poc == m_iPOC )
+        {
+          isReference = 1;
+          pcPic->longTerm = true;
+        }
+      }
+      else if (pcPic->layerId == layerId)
+      {
+      if (!(pRPL0->isRefPicLongterm(i)))
+      {
+        if (pcPic->poc == this->getPOC() - pRPL0->getRefPicIdentifier(i))
+        {
+          isReference = 1;
+          pcPic->longTerm = false;
+        }
+      }
+      else
       {
-        isReference = 1;
-        pcPic->usedByCurr = pReferencePictureSet->getUsed(i);
-        pcPic->longTerm = false;
+        int pocCycle = 1 << (pcPic->cs->sps->getBitsForPOC());
+        int curPoc = pcPic->poc & (pocCycle - 1);
+        if (pcPic->longTerm && curPoc == pRPL0->getRefPicIdentifier(i))
+        {
+          isReference = 1;
+          pcPic->longTerm = true;
+        }
+      }
       }
     }
-    for(;i<pReferencePictureSet->getNumberOfPictures();i++)
+
+    for( i = 0; isNeedToCheck && !isReference && i < pRPL1->getNumberOfShorttermPictures() + pRPL1->getNumberOfLongtermPictures() + pRPL1->getNumberOfInterLayerPictures(); i++ )
     {
-      if(pReferencePictureSet->getCheckLTMSBPresent(i)==true)
+      if( pRPL1->isInterLayerRefPic( i ) )
+      {
+        // Diagonal inter-layer prediction is not allowed
+        CHECK( pRPL1->getRefPicIdentifier( i ), "ILRP identifier should be 0" );
+
+        if( pcPic->poc == m_iPOC )
+        {
+          isReference = 1;
+          pcPic->longTerm = true;
+        }
+      }
+      else if( pcPic->layerId == layerId )
       {
-        if( pcPic->longTerm && pcPic->poc == pReferencePictureSet->getPOC(i))
+      if (!(pRPL1->isRefPicLongterm(i)))
+      {
+        if (pcPic->poc == this->getPOC() - pRPL1->getRefPicIdentifier(i))
         {
           isReference = 1;
-          pcPic->usedByCurr = pReferencePictureSet->getUsed(i);
+          pcPic->longTerm = false;
         }
       }
       else
       {
-        int pocCycle = 1 << pcPic->cs->sps->getBitsForPOC();
-        int curPoc = pcPic->poc & (pocCycle-1);
-        int refPoc = pReferencePictureSet->getPOC(i) & (pocCycle-1);
-        if( pcPic->longTerm && curPoc == refPoc)
+        int pocCycle = 1 << (pcPic->cs->sps->getBitsForPOC());
+        int curPoc = pcPic->poc & (pocCycle - 1);
+        if (pcPic->longTerm && curPoc == pRPL1->getRefPicIdentifier(i))
         {
           isReference = 1;
-          pcPic->usedByCurr = pReferencePictureSet->getUsed(i);
+          pcPic->longTerm = true;
         }
       }
+      }
     }
     // mark the picture as "unused for reference" if it is not in
-    // the Reference Picture Set
-    if( pcPic->poc != this->getPOC() && isReference == 0)
+    // the Reference Picture List
+    if( pcPic->layerId == layerId && pcPic->poc != m_iPOC && isReference == 0 )
     {
       pcPic->referenced = false;
-      pcPic->usedByCurr = false;
-      pcPic->longTerm   = false;
-      pcPic->getHashMap()->clearAll();
+      pcPic->longTerm = false;
     }
 
     // sanity checks
-    if( pcPic->referenced)
+    if (pcPic->referenced)
     {
       //check that pictures of higher temporal layers are not used
-      CHECK( pcPic->usedByCurr && !(pcPic->layer<=this->getTLayer()), "Invalid state");
-      //check that pictures of higher or equal temporal layer are not in the RPS if the current picture is a TSA picture
-#if !JVET_M0101_HLS
-      if( this->getNalUnitType() == NAL_UNIT_CODED_SLICE_TSA_R || this->getNalUnitType() == NAL_UNIT_CODED_SLICE_TSA_N)
-      {
-        CHECK( !(pcPic->layer<this->getTLayer()), "Invalid state");
-      }
-#endif
-#if !JVET_M0101_HLS
-      //check that pictures marked as temporal layer non-reference pictures are not used for reference
-      if( pcPic->poc != this->getPOC() && (pcPic->layer == this->getTLayer()))
-      {
-        CHECK( pcPic->usedByCurr && pcPic->slices[0]->getTemporalLayerNonReferenceFlag(), "Invalid state");
-      }
-#endif
+      CHECK(pcPic->usedByCurr && !(pcPic->layer <= this->getTLayer()), "Invalid state");
     }
   }
 }
 
-/** Function for applying picture marking based on the Reference Picture Set in pReferencePictureSet.
-*/
-int Slice::checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool printErrors, int pocRandomAccess, bool bUseRecoveryPoint) const
+int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList *pRPL, int rplIdx, bool printErrors) const
 {
-  int atLeastOneUnabledByRecoveryPoint = 0;
-  int atLeastOneFlushedByPreviousIDR = 0;
   Picture* rpcPic;
-  int i, isAvailable;
-  int atLeastOneLost = 0;
-  int atLeastOneRemoved = 0;
-  int iPocLost = 0;
+  int isAvailable = 0;
+  int notPresentPoc = 0;
+
+  if (this->isIDRorBLA()) return 0; //Assume that all pic in the DPB will be flushed anyway so no need to check.
 
-  // loop through all long-term pictures in the Reference Picture Set
-  // to see if the picture should be kept as reference picture
-  for(i=pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures();i<pReferencePictureSet->getNumberOfPictures();i++)
+  int numberOfPictures = pRPL->getNumberOfLongtermPictures() + pRPL->getNumberOfShorttermPictures() + pRPL->getNumberOfInterLayerPictures();
+  //Check long term ref pics
+  for (int ii = 0; pRPL->getNumberOfLongtermPictures() > 0 && ii < numberOfPictures; ii++)
   {
+    if( !pRPL->isRefPicLongterm( ii ) || pRPL->isInterLayerRefPic( ii ) )
+    {
+      continue;
+    }
+
+    notPresentPoc = pRPL->getRefPicIdentifier(ii);
     isAvailable = 0;
-    // loop through all pictures in the reference picture buffer
     PicList::iterator iterPic = rcListPic.begin();
-    while ( iterPic != rcListPic.end())
+    while (iterPic != rcListPic.end())
     {
       rpcPic = *(iterPic++);
-      if(pReferencePictureSet->getCheckLTMSBPresent(i)==true)
+      int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC());
+      int curPoc = rpcPic->getPOC() & (pocCycle - 1);
+      int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1);
+      if (rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced)
       {
-        if(rpcPic->longTerm && (rpcPic->getPOC()) == pReferencePictureSet->getPOC(i) && rpcPic->referenced)
-        {
-          if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess)
-          {
-            isAvailable = 0;
-          }
-          else
-          {
-            isAvailable = 1;
-          }
-        }
+        isAvailable = 1;
+        break;
       }
-      else
+    }
+    // if there was no such long-term check the short terms
+    if (!isAvailable)
+    {
+      iterPic = rcListPic.begin();
+      while (iterPic != rcListPic.end())
       {
-        int pocCycle = 1<<rpcPic->cs->sps->getBitsForPOC();
-        int curPoc = rpcPic->getPOC() & (pocCycle-1);
-        int refPoc = pReferencePictureSet->getPOC(i) & (pocCycle-1);
-        if(rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced)
+        rpcPic = *(iterPic++);
+        int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC());
+        int curPoc = rpcPic->getPOC() & (pocCycle - 1);
+        int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1);
+        if (!rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced)
         {
-          if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess)
-          {
-            isAvailable = 0;
-          }
-          else
-          {
-            isAvailable = 1;
-          }
+          isAvailable = 1;
+          rpcPic->longTerm = true;
+          break;
         }
       }
     }
-    // if there was no such long-term check the short terms
-    if(!isAvailable)
+    if (!isAvailable)
     {
-      iterPic = rcListPic.begin();
-      while ( iterPic != rcListPic.end())
+      if (printErrors)
       {
-        rpcPic = *(iterPic++);
-
-        int pocCycle = 1 << rpcPic->cs->sps->getBitsForPOC();
-        int curPoc = rpcPic->getPOC();
-        int refPoc = pReferencePictureSet->getPOC(i);
-        if (!pReferencePictureSet->getCheckLTMSBPresent(i))
-        {
-          curPoc = curPoc & (pocCycle - 1);
-          refPoc = refPoc & (pocCycle - 1);
-        }
-
-        if (rpcPic->referenced && curPoc == refPoc)
-        {
-          if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess)
-          {
-            isAvailable = 0;
-          }
-          else
-          {
-            isAvailable = 1;
-            rpcPic->longTerm = true;
-            break;
-          }
-        }
+        msg(ERROR, "\nCurrent picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
       }
+      return notPresentPoc;
     }
-    // report that a picture is lost if it is in the Reference Picture Set
-    // but not available as reference picture
-    if(isAvailable == 0)
+  }
+  //report that a picture is lost if it is in the Reference Picture List but not in the DPB
+
+  isAvailable = 0;
+  //Check short term ref pics
+  for (int ii = 0; ii < numberOfPictures; ii++)
+  {
+    if (pRPL->isRefPicLongterm(ii))
+      continue;
+
+    notPresentPoc = this->getPOC() - pRPL->getRefPicIdentifier(ii);
+    isAvailable = 0;
+    PicList::iterator iterPic = rcListPic.begin();
+    while (iterPic != rcListPic.end())
     {
-      if (this->getPOC() + pReferencePictureSet->getDeltaPOC(i) >= pocRandomAccess)
-      {
-        if(!pReferencePictureSet->getUsed(i) )
-        {
-          if(printErrors)
-          {
-            msg( ERROR, "\nLong-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC() + pReferencePictureSet->getDeltaPOC(i));
-          }
-          atLeastOneRemoved = 1;
-        }
-        else
-        {
-          if(printErrors)
-          {
-            msg( ERROR, "\nLong-term reference picture with POC = %3d is lost or not correctly decoded!", this->getPOC() + pReferencePictureSet->getDeltaPOC(i));
-          }
-          atLeastOneLost = 1;
-          iPocLost=this->getPOC() + pReferencePictureSet->getDeltaPOC(i);
-        }
-      }
-      else if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess)
+      rpcPic = *(iterPic++);
+      if (!rpcPic->longTerm && rpcPic->getPOC() == this->getPOC() - pRPL->getRefPicIdentifier(ii) && rpcPic->referenced)
       {
-        atLeastOneUnabledByRecoveryPoint = 1;
+        isAvailable = 1;
+        break;
       }
-      else if(bUseRecoveryPoint && (this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_W_RADL))
+    }
+    //report that a picture is lost if it is in the Reference Picture List but not in the DPB
+    if (isAvailable == 0 && pRPL->getNumberOfShorttermPictures() > 0)
+    {
+      if (printErrors)
       {
-        atLeastOneFlushedByPreviousIDR = 1;
+        msg(ERROR, "\nCurrent picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
       }
+      return notPresentPoc;
     }
   }
-  // loop through all short-term pictures in the Reference Picture Set
-  // to see if the picture should be kept as reference picture
-  for(i=0;i<pReferencePictureSet->getNumberOfNegativePictures()+pReferencePictureSet->getNumberOfPositivePictures();i++)
+  return 0;
+}
+
+int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList *pRPL, int rplIdx, bool printErrors, int *refPicIndex) const
+{
+  Picture* rpcPic;
+  int isAvailable = 0;
+  int notPresentPoc = 0;
+  *refPicIndex = 0;
+
+  if (this->isIDRorBLA()) return 0; //Assume that all pic in the DPB will be flushed anyway so no need to check.
+
+  int numberOfPictures = pRPL->getNumberOfLongtermPictures() + pRPL->getNumberOfShorttermPictures() + pRPL->getNumberOfInterLayerPictures();
+  //Check long term ref pics
+  for (int ii = 0; pRPL->getNumberOfLongtermPictures() > 0 && ii < numberOfPictures; ii++)
   {
+    if( !pRPL->isRefPicLongterm( ii ) || pRPL->isInterLayerRefPic( ii ) )
+    {
+      continue;
+    }
+
+    notPresentPoc = pRPL->getRefPicIdentifier(ii);
     isAvailable = 0;
-    // loop through all pictures in the reference picture buffer
     PicList::iterator iterPic = rcListPic.begin();
-    while ( iterPic != rcListPic.end())
+    while (iterPic != rcListPic.end())
     {
       rpcPic = *(iterPic++);
-
-      if( ! rpcPic->longTerm && rpcPic->getPOC() == this->getPOC() + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced)
+      int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC());
+      int curPoc = rpcPic->getPOC() & (pocCycle - 1);
+      int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1);
+      if (rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced)
       {
-        if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess)
-        {
-          isAvailable = 0;
-        }
-        else
+        isAvailable = 1;
+        break;
+      }
+    }
+    // if there was no such long-term check the short terms
+    if (!isAvailable)
+    {
+      iterPic = rcListPic.begin();
+      while (iterPic != rcListPic.end())
+      {
+        rpcPic = *(iterPic++);
+        int pocCycle = 1 << (rpcPic->cs->sps->getBitsForPOC());
+        int curPoc = rpcPic->getPOC() & (pocCycle - 1);
+        int refPoc = pRPL->getRefPicIdentifier(ii) & (pocCycle - 1);
+        if (!rpcPic->longTerm && curPoc == refPoc && rpcPic->referenced)
         {
           isAvailable = 1;
+          rpcPic->longTerm = true;
+          break;
         }
       }
     }
-    // report that a picture is lost if it is in the Reference Picture Set
-    // but not available as reference picture
-    if(isAvailable == 0)
+    if (!isAvailable)
     {
-      if (this->getPOC() + pReferencePictureSet->getDeltaPOC(i) >= pocRandomAccess)
+      if (printErrors)
       {
-        if(!pReferencePictureSet->getUsed(i) )
-        {
-          if(printErrors)
-          {
-            msg( ERROR, "\nShort-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC() + pReferencePictureSet->getDeltaPOC(i));
-          }
-          atLeastOneRemoved = 1;
-        }
-        else
-        {
-          if(printErrors)
-          {
-            msg( ERROR, "\nShort-term reference picture with POC = %3d is lost or not correctly decoded!", this->getPOC() + pReferencePictureSet->getDeltaPOC(i));
-          }
-          atLeastOneLost = 1;
-          iPocLost=this->getPOC() + pReferencePictureSet->getDeltaPOC(i);
-        }
+        msg(ERROR, "\nCurrent picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
       }
-      else if(bUseRecoveryPoint && this->getPOC() > pocRandomAccess)
+      *refPicIndex = ii;
+      return notPresentPoc;
+    }
+  }
+  //report that a picture is lost if it is in the Reference Picture List but not in the DPB
+
+  isAvailable = 0;
+  //Check short term ref pics
+  for (int ii = 0; ii < numberOfPictures; ii++)
+  {
+    if (pRPL->isRefPicLongterm(ii))
+      continue;
+
+    notPresentPoc = this->getPOC() - pRPL->getRefPicIdentifier(ii);
+    isAvailable = 0;
+    PicList::iterator iterPic = rcListPic.begin();
+    while (iterPic != rcListPic.end())
+    {
+      rpcPic = *(iterPic++);
+      if (!rpcPic->longTerm && rpcPic->getPOC() == this->getPOC() - pRPL->getRefPicIdentifier(ii) && rpcPic->referenced)
       {
-        atLeastOneUnabledByRecoveryPoint = 1;
+        isAvailable = 1;
+        break;
       }
-      else if(bUseRecoveryPoint && (this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getAssociatedIRAPType()==NAL_UNIT_CODED_SLICE_IDR_W_RADL))
+    }
+    //report that a picture is lost if it is in the Reference Picture List but not in the DPB
+    if (isAvailable == 0 && pRPL->getNumberOfShorttermPictures() > 0)
+    {
+      if (printErrors)
       {
-        atLeastOneFlushedByPreviousIDR = 1;
+        msg(ERROR, "\nCurrent picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
       }
+      *refPicIndex = ii;
+      return notPresentPoc;
     }
   }
+  return 0;
+}
 
-  if(atLeastOneUnabledByRecoveryPoint || atLeastOneFlushedByPreviousIDR)
-  {
-    return -1;
-  }
-  if(atLeastOneLost)
-  {
-    return iPocLost+1;
-  }
-  if(atLeastOneRemoved)
+bool Slice::isPOCInRefPicList(const ReferencePictureList *rpl, int poc )
+{
+  for( int i = 0; i < rpl->getNumberOfLongtermPictures() + rpl->getNumberOfShorttermPictures() + rpl->getNumberOfInterLayerPictures(); i++ )
   {
-    return -2;
+    if( rpl->isInterLayerRefPic( i ) )
+    {
+      // Diagonal inter-layer prediction is not allowed
+      CHECK( rpl->getRefPicIdentifier( i ), "ILRP identifier should be 0" );
+
+      if( poc == m_iPOC )
+      {
+        return true;
+      }
+    }
+    else
+    if (rpl->isRefPicLongterm(i))
+    {
+      if (poc == rpl->getRefPicIdentifier(i))
+      {
+        return true;
+      }
+    }
+    else
+    {
+      if (poc == getPOC() - rpl->getRefPicIdentifier(i))
+      {
+        return true;
+      }
+    }
   }
-  else
+  return false;
+}
+
+bool Slice::isPocRestrictedByDRAP( int poc, bool precedingDRAPInDecodingOrder )
+{
+  if (!getEnableDRAPSEI())
   {
-    return 0;
+    return false;
   }
+  return ( isDRAP() && poc != getAssociatedIRAPPOC() ) ||
+         ( cvsHasPreviousDRAP() && getPOC() > getLatestDRAPPOC() && (precedingDRAPInDecodingOrder || poc < getLatestDRAPPOC()) );
 }
 
-/** Function for constructing an explicit Reference Picture Set out of the available pictures in a referenced Reference Picture Set
-*/
-void Slice::createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled
-                                                         , bool isEncodeLtRef, bool isCompositeRefEnable
-)
+void Slice::checkConformanceForDRAP( uint32_t temporalId )
 {
-  Picture* rpcPic;
-  int i, j;
-  int k = 0;
-  int nrOfNegativePictures = 0;
-  int nrOfPositivePictures = 0;
-  ReferencePictureSet* pLocalRPS = this->getLocalRPS();
-  (*pLocalRPS)=ReferencePictureSet();
-
-  bool irapIsInRPS = false; // Used when bEfficientFieldIRAPEnabled==true
+  if (!(isDRAP() || cvsHasPreviousDRAP()))
+  {
+    return;
+  }
 
-  // loop through all pictures in the Reference Picture Set
-  for(i=0;i<pReferencePictureSet->getNumberOfPictures();i++)
+  if (isDRAP())
   {
-    j = 0;
-    // loop through all pictures in the reference picture buffer
-    PicList::iterator iterPic = rcListPic.begin();
-    while ( iterPic != rcListPic.end())
+    if (!(getNalUnitType() == NalUnitType::NAL_UNIT_CODED_SLICE_TRAIL ||
+          getNalUnitType() == NalUnitType::NAL_UNIT_CODED_SLICE_STSA))
     {
-      j++;
-      rpcPic = *(iterPic++);
-
-      if(rpcPic->getPOC() == this->getPOC() + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced)
+      msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture should be a trailing picture.\n");
+    }
+    if ( temporalId != 0)
+    {
+      msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture shall have a temporal sublayer identifier equal to 0.\n");
+    }
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_0); i++)
+    {
+      if (getRefPic(REF_PIC_LIST_0,i)->getPOC() != getAssociatedIRAPPOC())
       {
-        // This picture exists as a reference picture
-        // and should be added to the explicit Reference Picture Set
-        pLocalRPS->setDeltaPOC(k, pReferencePictureSet->getDeltaPOC(i));
-        pLocalRPS->setUsed(k, pReferencePictureSet->getUsed(i) && (!isRAP));
-        if (bEfficientFieldIRAPEnabled)
-        {
-          pLocalRPS->setUsed(k, pLocalRPS->getUsed(k) && !(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) < pocRandomAccess) );
-        }
-
-        if(pLocalRPS->getDeltaPOC(k) < 0)
-        {
-          nrOfNegativePictures++;
-        }
-        else
-        {
-          if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1))
-          {
-            irapIsInRPS = true;
-          }
-          nrOfPositivePictures++;
-        }
-        k++;
+        msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture shall not include any pictures in the active "
+                      "entries of its reference picture lists except the preceding IRAP picture in decoding order.\n");
       }
     }
-  }
-
-  bool useNewRPS = false;
-  // if current picture is complimentary field associated to IRAP, add the IRAP to its RPS.
-  if(bEfficientFieldIRAPEnabled && m_pcPic->fieldPic && !irapIsInRPS)
-  {
-    PicList::iterator iterPic = rcListPic.begin();
-    while ( iterPic != rcListPic.end())
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_1); i++)
     {
-      rpcPic = *(iterPic++);
-      if (rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1))
+      if (getRefPic(REF_PIC_LIST_1,i)->getPOC() != getAssociatedIRAPPOC())
       {
-        pLocalRPS->setDeltaPOC(k, 1);
-        pLocalRPS->setUsed(k, true);
-        nrOfPositivePictures++;
-        k ++;
-        useNewRPS = true;
+        msg( WARNING, "Warning, non-conforming bitstream. The DRAP picture shall not include any pictures in the active "
+                      "entries of its reference picture lists except the preceding IRAP picture in decoding order.\n");
       }
     }
   }
-  if (isCompositeRefEnable && isEncodeLtRef)
+
+  if (cvsHasPreviousDRAP() && getPOC() > getLatestDRAPPOC())
   {
-    useNewRPS = true;
-    nrOfNegativePictures = 0;
-    nrOfPositivePictures = 0;
-    for (i = 0; i<pReferencePictureSet->getNumberOfPictures(); i++)
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_0); i++)
     {
-      j = 0;
-      k = 0;
-
-      // loop through all pictures in the reference picture buffer
-      PicList::iterator iterPic = rcListPic.begin();
-      while (iterPic != rcListPic.end())
+      if (getRefPic(REF_PIC_LIST_0,i)->getPOC() < getLatestDRAPPOC() && getRefPic(REF_PIC_LIST_0,i)->getPOC() != getAssociatedIRAPPOC())
       {
-        j++;
-        rpcPic = *(iterPic++);
-
-        if (rpcPic->getPOC() == this->getPOC() + 1 + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced)
-        {
-          // This picture exists as a reference picture
-          // and should be added to the explicit Reference Picture Set
-          pLocalRPS->setDeltaPOC(k, pReferencePictureSet->getDeltaPOC(i) + 1);
-          pLocalRPS->setUsed(k, pReferencePictureSet->getUsed(i) && (!isRAP));
-          if (bEfficientFieldIRAPEnabled)
-          {
-            pLocalRPS->setUsed(k, pLocalRPS->getUsed(k) && !(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) + 1 < pocRandomAccess));
-          }
-
-          if (pLocalRPS->getDeltaPOC(k) < 0)
-          {
-            nrOfNegativePictures++;
-          }
-          else
-          {
-            if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + 2)
-            {
-              irapIsInRPS = true;
-            }
-            nrOfPositivePictures++;
-          }
-          k++;
-        }
+        msg( WARNING, "Warning, non-conforming bitstream. Any picture that follows the DRAP picture in both decoding order "
+                    "and output order shall not include, in the active entries of its reference picture lists, any picture "
+                    "that precedes the DRAP picture in decoding order or output order, with the exception of the preceding "
+                    "IRAP picture in decoding order. Problem is POC %d in RPL0.\n", getRefPic(REF_PIC_LIST_0,i)->getPOC());
       }
     }
-  }
-  pLocalRPS->setNumberOfNegativePictures(nrOfNegativePictures);
-  pLocalRPS->setNumberOfPositivePictures(nrOfPositivePictures);
-  pLocalRPS->setNumberOfPictures(nrOfNegativePictures+nrOfPositivePictures);
-  // This is a simplistic inter rps example. A smarter encoder will look for a better reference RPS to do the
-  // inter RPS prediction with.  Here we just use the reference used by pReferencePictureSet.
-  // If pReferencePictureSet is not inter_RPS_predicted, then inter_RPS_prediction is for the current RPS also disabled.
-  if (!pReferencePictureSet->getInterRPSPrediction() || useNewRPS )
-  {
-    pLocalRPS->setInterRPSPrediction(false);
-    pLocalRPS->setNumRefIdc(0);
-  }
-  else
-  {
-    int rIdx =  this->getRPSidx() - pReferencePictureSet->getDeltaRIdxMinus1() - 1;
-    int deltaRPS = pReferencePictureSet->getDeltaRPS();
-    const ReferencePictureSet* pcRefRPS = this->getSPS()->getRPSList()->getReferencePictureSet(rIdx);
-    int iRefPics = pcRefRPS->getNumberOfPictures();
-    int iNewIdc=0;
-    for(i=0; i<= iRefPics; i++)
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_1); i++)
     {
-      int deltaPOC = ((i != iRefPics)? pcRefRPS->getDeltaPOC(i) : 0);  // check if the reference abs POC is >= 0
-      int iRefIdc = 0;
-      for (j=0; j < pLocalRPS->getNumberOfPictures(); j++) // loop through the  pictures in the new RPS
+      if (getRefPic(REF_PIC_LIST_1,i)->getPOC() < getLatestDRAPPOC() && getRefPic(REF_PIC_LIST_1,i)->getPOC() != getAssociatedIRAPPOC())
       {
-        if ( (deltaPOC + deltaRPS) == pLocalRPS->getDeltaPOC(j))
-        {
-          if (pLocalRPS->getUsed(j))
-          {
-            iRefIdc = 1;
-          }
-          else
-          {
-            iRefIdc = 2;
-          }
-        }
+        msg( WARNING, "Warning, non-conforming bitstream. Any picture that follows the DRAP picture in both decoding order "
+                    "and output order shall not include, in the active entries of its reference picture lists, any picture "
+                    "that precedes the DRAP picture in decoding order or output order, with the exception of the preceding "
+                    "IRAP picture in decoding order. Problem is POC %d in RPL1", getRefPic(REF_PIC_LIST_1,i)->getPOC());
       }
-      pLocalRPS->setRefIdc(i, iRefIdc);
-      iNewIdc++;
     }
-    pLocalRPS->setInterRPSPrediction(true);
-    pLocalRPS->setNumRefIdc(iNewIdc);
-    pLocalRPS->setDeltaRPS(deltaRPS);
-    pLocalRPS->setDeltaRIdxMinus1(pReferencePictureSet->getDeltaRIdxMinus1() + this->getSPS()->getRPSList()->getNumberOfReferencePictureSets() - this->getRPSidx());
   }
-
-  this->setRPS(pLocalRPS);
-  this->setRPSidx(-1);
 }
 
+
 //! get AC and DC values for weighted pred
 void  Slice::getWpAcDcParam(const WPACDCParam *&wp) const
 {
@@ -1690,41 +1493,221 @@ unsigned Slice::getMinPictureDistance() const
     {
       for (int refIdx = 0; refIdx < getNumRefIdx(REF_PIC_LIST_1); refIdx++)
       {
-        minPicDist = std::min( minPicDist, std::abs(currPOC - getRefPic(REF_PIC_LIST_0, refIdx)->getPOC()));
+        minPicDist = std::min(minPicDist, std::abs(currPOC - getRefPic(REF_PIC_LIST_1, refIdx)->getPOC()));
       }
     }
   }
   return (unsigned) minPicDist;
 }
 
-#if HEVC_VPS
 // ------------------------------------------------------------------------------------------------
 // Video parameter set (VPS)
 // ------------------------------------------------------------------------------------------------
 VPS::VPS()
-: m_VPSId                     (  0)
-, m_uiMaxTLayers              (  1)
-, m_uiMaxLayers               (  1)
-, m_bTemporalIdNestingFlag    (false)
-, m_numHrdParameters          (  0)
-, m_maxNuhReservedZeroLayerId (  0)
-, m_hrdParameters             ()
-, m_hrdOpSetIdx               ()
-, m_cprmsPresentFlag          ()
+  : m_VPSId(0)
+  , m_uiMaxLayers(1)
+  , m_vpsMaxSubLayers(1)
+  , m_vpsAllLayersSameNumSubLayersFlag (true)
+  , m_vpsAllIndependentLayersFlag(true)
+  , m_vpsEachLayerIsAnOlsFlag (1)
+  , m_vpsOlsModeIdc (0)
+  , m_vpsNumOutputLayerSets (1)
+, m_vpsExtensionFlag()
 {
-
-  for( int i = 0; i < MAX_TLAYER; i++)
+  for (int i = 0; i < MAX_VPS_LAYERS; i++)
   {
-    m_numReorderPics[i] = 0;
-    m_uiMaxDecPicBuffering[i] = 1;
-    m_uiMaxLatencyIncrease[i] = 0;
+    m_vpsLayerId[i] = 0;
+    m_vpsIndependentLayerFlag[i] = 1;
+    for (int j = 0; j < MAX_VPS_LAYERS; j++)
+    {
+      m_vpsDirectRefLayerFlag[i][j] = 0;
+      m_directRefLayerIdx[i][j] = MAX_VPS_LAYERS;
+      m_interLayerRefIdx[i][i] = NOT_VALID;
+    }
+  }
+  for (int i = 0; i < MAX_NUM_OLSS; i++)
+  {
+    for (int j = 0; j < MAX_VPS_LAYERS; j++)
+    {
+      m_vpsOlsOutputLayerFlag[i][j] = 0;
+    }
   }
 }
 
 VPS::~VPS()
 {
 }
-#endif
+
+// ------------------------------------------------------------------------------------------------
+// Picture Header
+// ------------------------------------------------------------------------------------------------
+
+PicHeader::PicHeader()
+: m_valid                                         ( 0 )
+, m_nonReferencePictureFlag                       ( 0 )
+, m_gdrPicFlag                                    ( 0 )
+, m_noOutputOfPriorPicsFlag                       ( 0 )
+, m_recoveryPocCnt                                ( 0 )
+, m_spsId                                         ( -1 )
+, m_ppsId                                         ( -1 )
+, m_subPicIdSignallingPresentFlag                 ( 0 )
+, m_subPicIdLen                                   ( 0 )
+, m_loopFilterAcrossVirtualBoundariesDisabledFlag ( 0 )
+, m_numVerVirtualBoundaries                       ( 0 )
+, m_numHorVirtualBoundaries                       ( 0 )
+, m_colourPlaneId                                 ( 0 )
+, m_picOutputFlag                                 ( true )
+, m_picRplPresentFlag                             ( 0 )
+, m_pRPL0                                         ( 0 )
+, m_pRPL1                                         ( 0 )
+, m_rpl0Idx                                       ( 0 )
+, m_rpl1Idx                                       ( 0 )
+, m_splitConsOverrideFlag                         ( 0 )
+, m_cuQpDeltaSubdivIntra                          ( 0 )
+, m_cuQpDeltaSubdivInter                          ( 0 )
+, m_cuChromaQpOffsetSubdivIntra                   ( 0 )
+, m_cuChromaQpOffsetSubdivInter                   ( 0 )
+, m_enableTMVPFlag                                ( true )
+, m_mvdL1ZeroFlag                                 ( 0 )
+, m_maxNumMergeCand                               ( MRG_MAX_NUM_CANDS )
+, m_maxNumAffineMergeCand                         ( AFFINE_MRG_MAX_NUM_CANDS )
+, m_disFracMMVD                                   ( 0 )
+, m_disBdofFlag                                   ( 0 )
+, m_disDmvrFlag                                   ( 0 )
+, m_disProfFlag                                   ( 0 )
+, m_maxNumTriangleCand                            ( 0 )
+, m_maxNumIBCMergeCand                            ( IBC_MRG_MAX_NUM_CANDS )
+, m_jointCbCrSignFlag                             ( 0 )
+, m_saoEnabledPresentFlag                         ( 0 )
+, m_alfEnabledPresentFlag                         ( 0 )
+, m_numAlfAps                                     ( 0 )
+, m_alfApsId                                      ( 0 )
+, m_alfChromaApsId                                ( 0 )
+, m_depQuantEnabledFlag                           ( 0 )
+, m_signDataHidingEnabledFlag                     ( 0 )
+, m_deblockingFilterOverridePresentFlag           ( 0 )
+, m_deblockingFilterOverrideFlag                  ( 0 )
+, m_deblockingFilterDisable                       ( 0 )
+, m_deblockingFilterBetaOffsetDiv2                ( 0 )
+, m_deblockingFilterTcOffsetDiv2                  ( 0 )
+, m_lmcsEnabledFlag                               ( 0 )
+, m_lmcsApsId                                     ( -1 )
+, m_lmcsAps                                       ( nullptr )
+, m_lmcsChromaResidualScaleFlag                   ( 0 )
+, m_scalingListPresentFlag                        ( 0 )
+, m_scalingListApsId                              ( -1 )
+, m_scalingListAps                                ( nullptr )
+{
+  memset(m_subPicId,                                0,    sizeof(m_subPicId));
+  memset(m_virtualBoundariesPosX,                   0,    sizeof(m_virtualBoundariesPosX));
+  memset(m_virtualBoundariesPosY,                   0,    sizeof(m_virtualBoundariesPosY));
+  memset(m_saoEnabledFlag,                          0,    sizeof(m_saoEnabledFlag));
+  memset(m_alfEnabledFlag,                          0,    sizeof(m_alfEnabledFlag));
+  memset(m_minQT,                                   0,    sizeof(m_minQT));
+  memset(m_maxMTTHierarchyDepth,                    0,    sizeof(m_maxMTTHierarchyDepth));
+  memset(m_maxBTSize,                               0,    sizeof(m_maxBTSize));
+  memset(m_maxTTSize,                               0,    sizeof(m_maxTTSize));
+
+  m_localRPL0.setNumberOfActivePictures(0);
+  m_localRPL0.setNumberOfShorttermPictures(0);
+  m_localRPL0.setNumberOfLongtermPictures(0);
+  m_localRPL0.setLtrpInSliceHeaderFlag(0);
+  m_localRPL0.setNumberOfInterLayerPictures( 0 );
+
+  m_localRPL1.setNumberOfActivePictures(0);
+  m_localRPL1.setNumberOfShorttermPictures(0);
+  m_localRPL1.setNumberOfLongtermPictures(0);
+  m_localRPL1.setLtrpInSliceHeaderFlag(0);
+  m_localRPL1.setNumberOfInterLayerPictures( 0 );
+
+  m_alfApsId.resize(0);
+}
+
+PicHeader::~PicHeader()
+{
+  m_alfApsId.resize(0);
+}
+
+/**
+ - initialize picture header to defaut state
+ */
+void PicHeader::initPicHeader()
+{
+  m_valid                                         = 0;
+  m_nonReferencePictureFlag                       = 0;
+  m_gdrPicFlag                                    = 0;
+  m_noOutputOfPriorPicsFlag                       = 0;
+  m_recoveryPocCnt                                = 0;
+  m_spsId                                         = -1;
+  m_ppsId                                         = -1;
+  m_subPicIdSignallingPresentFlag                 = 0;
+  m_subPicIdLen                                   = 0;
+  m_loopFilterAcrossVirtualBoundariesDisabledFlag = 0;
+  m_numVerVirtualBoundaries                       = 0;
+  m_numHorVirtualBoundaries                       = 0;
+  m_colourPlaneId                                 = 0;
+  m_picOutputFlag                                 = true;
+  m_picRplPresentFlag                             = 0;
+  m_pRPL0                                         = 0;
+  m_pRPL1                                         = 0;
+  m_rpl0Idx                                       = 0;
+  m_rpl1Idx                                       = 0;
+  m_splitConsOverrideFlag                         = 0;
+  m_cuQpDeltaSubdivIntra                          = 0;
+  m_cuQpDeltaSubdivInter                          = 0;
+  m_cuChromaQpOffsetSubdivIntra                   = 0;
+  m_cuChromaQpOffsetSubdivInter                   = 0;
+  m_enableTMVPFlag                                = true;
+  m_mvdL1ZeroFlag                                 = 0;
+  m_maxNumMergeCand                               = MRG_MAX_NUM_CANDS;
+  m_maxNumAffineMergeCand                         = AFFINE_MRG_MAX_NUM_CANDS;
+  m_disFracMMVD                                   = 0;
+  m_disBdofFlag                                   = 0;
+  m_disDmvrFlag                                   = 0;
+  m_disProfFlag                                   = 0;
+  m_maxNumTriangleCand                            = 0;
+  m_maxNumIBCMergeCand                            = IBC_MRG_MAX_NUM_CANDS;
+  m_jointCbCrSignFlag                             = 0;
+  m_saoEnabledPresentFlag                         = 0;
+  m_alfEnabledPresentFlag                         = 0;
+  m_numAlfAps                                     = 0;
+  m_alfChromaApsId                                = 0;
+  m_depQuantEnabledFlag                           = 0;
+  m_signDataHidingEnabledFlag                     = 0;
+  m_deblockingFilterOverridePresentFlag           = 0;
+  m_deblockingFilterOverrideFlag                  = 0;
+  m_deblockingFilterDisable                       = 0;
+  m_deblockingFilterBetaOffsetDiv2                = 0;
+  m_deblockingFilterTcOffsetDiv2                  = 0;
+  m_lmcsEnabledFlag                               = 0;
+  m_lmcsApsId                                     = -1;
+  m_lmcsAps                                       = nullptr;
+  m_lmcsChromaResidualScaleFlag                   = 0;
+  m_scalingListPresentFlag                        = 0;
+  m_scalingListApsId                              = -1;
+  m_scalingListAps                                = nullptr;
+  memset(m_subPicId,                                0,    sizeof(m_subPicId));
+  memset(m_virtualBoundariesPosX,                   0,    sizeof(m_virtualBoundariesPosX));
+  memset(m_virtualBoundariesPosY,                   0,    sizeof(m_virtualBoundariesPosY));
+  memset(m_saoEnabledFlag,                          0,    sizeof(m_saoEnabledFlag));
+  memset(m_alfEnabledFlag,                          0,    sizeof(m_alfEnabledFlag));
+  memset(m_minQT,                                   0,    sizeof(m_minQT));
+  memset(m_maxMTTHierarchyDepth,                    0,    sizeof(m_maxMTTHierarchyDepth));
+  memset(m_maxBTSize,                               0,    sizeof(m_maxBTSize));
+  memset(m_maxTTSize,                               0,    sizeof(m_maxTTSize));
+
+  m_localRPL0.setNumberOfActivePictures(0);
+  m_localRPL0.setNumberOfShorttermPictures(0);
+  m_localRPL0.setNumberOfLongtermPictures(0);
+  m_localRPL0.setLtrpInSliceHeaderFlag(0);
+
+  m_localRPL1.setNumberOfActivePictures(0);
+  m_localRPL1.setNumberOfShorttermPictures(0);
+  m_localRPL1.setNumberOfLongtermPictures(0);
+  m_localRPL1.setLtrpInSliceHeaderFlag(0);
+
+  m_alfApsId.resize(0);
+}
 
 // ------------------------------------------------------------------------------------------------
 // Sequence parameter set (SPS)
@@ -1748,90 +1731,80 @@ SPSRExt::SPSRExt()
 
 SPS::SPS()
 : m_SPSId                     (  0)
-#if !JVET_M0101_HLS
-, m_bIntraOnlyConstraintFlag  (false)
-, m_maxBitDepthConstraintIdc  (  0)
-, m_maxChromaFormatConstraintIdc(CHROMA_420)
-, m_bFrameConstraintFlag      (false)
-, m_bNoQtbttDualTreeIntraConstraintFlag(false)
-, m_bNoSaoConstraintFlag      (false)
-, m_bNoAlfConstraintFlag      (false)
-, m_bNoPcmConstraintFlag      (false)
-, m_bNoRefWraparoundConstraintFlag(false)
-, m_bNoTemporalMvpConstraintFlag(false)
-, m_bNoSbtmvpConstraintFlag   (false)
-, m_bNoAmvrConstraintFlag     (false)
-, m_bNoBdofConstraintFlag     (false)
-, m_bNoCclmConstraintFlag     (false)
-, m_bNoMtsConstraintFlag      (false)
-, m_bNoAffineMotionConstraintFlag(false)
-, m_bNoGbiConstraintFlag      (false)
-, m_bNoMhIntraConstraintFlag  (false)
-, m_bNoTriangleConstraintFlag (false)
-, m_bNoLadfConstraintFlag     (false)
-, m_bNoCurrPicRefConstraintFlag(false)
-, m_bNoQpDeltaConstraintFlag  (false)
-, m_bNoDepQuantConstraintFlag (false)
-, m_bNoSignDataHidingConstraintFlag(false)
-#endif
+, m_decodingParameterSetId    (  0 )
+, m_VPSId                     ( 0 )
 , m_affineAmvrEnabledFlag     ( false )
 , m_DMVR                      ( false )
+, m_MMVD                      ( false )
 , m_SBT                       ( false )
-, m_MaxSbtSize                ( 32 )
-#if HEVC_VPS
-, m_VPSId                     (  0)
-#endif
+, m_ISP                       ( false )
 , m_chromaFormatIdc           (CHROMA_420)
+, m_separateColourPlaneFlag(0)
 , m_uiMaxTLayers              (  1)
 // Structure
-, m_picWidthInLumaSamples     (352)
-, m_picHeightInLumaSamples    (288)
+, m_maxWidthInLumaSamples     (352)
+, m_maxHeightInLumaSamples    (288)
+, m_subPicPresentFlag         (0)
+, m_numSubPics(1)
+, m_subPicIdPresentFlag(0)
+, m_subPicIdSignallingPresentFlag(0)
+, m_subPicIdLen(16)
 , m_log2MinCodingBlockSize    (  0)
 , m_log2DiffMaxMinCodingBlockSize(0)
 , m_CTUSize(0)
 , m_minQT{ 0, 0, 0 }
-, m_maxBTDepth{ MAX_BT_DEPTH, MAX_BT_DEPTH_INTER, MAX_BT_DEPTH_C }
+, m_maxMTTHierarchyDepth{ MAX_BT_DEPTH, MAX_BT_DEPTH_INTER, MAX_BT_DEPTH_C }
 , m_maxBTSize{ MAX_BT_SIZE,  MAX_BT_SIZE_INTER,  MAX_BT_SIZE_C }
 , m_maxTTSize{ MAX_TT_SIZE,  MAX_TT_SIZE_INTER,  MAX_TT_SIZE_C }
 , m_uiMaxCUWidth              ( 32)
 , m_uiMaxCUHeight             ( 32)
 , m_uiMaxCodingDepth          (  3)
+, m_numRPL0                   ( 0 )
+, m_numRPL1                   ( 0 )
+, m_rpl1CopyFromRpl0Flag      ( false )
+, m_rpl1IdxPresentFlag        ( false )
+, m_allRplEntriesHasSameSignFlag ( true )
 , m_bLongTermRefsPresent      (false)
 // Tool list
-, m_pcmEnabledFlag            (false)
-, m_pcmLog2MaxSize            (  5)
-, m_uiPCMLog2MinSize          (  7)
-, m_bPCMFilterDisableFlag     (false)
+, m_transformSkipEnabledFlag  (false)
+, m_BDPCMEnabled              (0)
+, m_JointCbCrEnabledFlag      (false)
 , m_sbtmvpEnabledFlag         (false)
 , m_bdofEnabledFlag           (false)
-, m_disFracMmvdEnabledFlag    ( false )
+, m_fpelMmvdEnabledFlag       ( false )
+, m_BdofControlPresentFlag    ( false )
+, m_DmvrControlPresentFlag    ( false )
+, m_ProfControlPresentFlag    ( false )
 , m_uiBitsForPOC              (  8)
 , m_numLongTermRefPicSPS      (  0)
-#if MAX_TB_SIZE_SIGNALLING
 , m_log2MaxTbSize             (  6)
-#endif
+, m_useWeightPred             (false)
+, m_useWeightedBiPred         (false)
 , m_saoEnabledFlag            (false)
 , m_bTemporalIdNestingFlag    (false)
-#if HEVC_USE_SCALING_LISTS
 , m_scalingListEnabledFlag    (false)
-#endif
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-, m_useStrongIntraSmoothing   (false)
-#endif
+, m_loopFilterAcrossVirtualBoundariesDisabledFlag(0)
+, m_numVerVirtualBoundaries(0)
+, m_numHorVirtualBoundaries(0)
+, m_hrdParametersPresentFlag  (false)
 , m_vuiParametersPresentFlag  (false)
 , m_vuiParameters             ()
 , m_wrapAroundEnabledFlag     (false)
 , m_wrapAroundOffset          (  0)
 , m_IBCFlag                   (  0)
-, m_lumaReshapeEnable         (false)
+, m_PLTMode                   (  0)
+, m_lmcsEnabled               (false)
 , m_AMVREnabledFlag                       ( false )
 , m_LMChroma                  ( false )
-, m_cclmCollocatedChromaFlag  ( false )
+, m_horCollocatedChromaFlag   ( true )
+, m_verCollocatedChromaFlag   ( false )
 , m_IntraMTS                  ( false )
 , m_InterMTS                  ( false )
+, m_LFNST                     ( false )
 , m_Affine                    ( false )
 , m_AffineType                ( false )
-, m_MHIntra                   ( false )
+, m_PROF                      ( false )
+, m_ciip                   ( false )
 , m_Triangle                  ( false )
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
 , m_LadfEnabled               ( false )
@@ -1839,11 +1812,16 @@ SPS::SPS()
 , m_LadfQpOffset              { 0 }
 , m_LadfIntervalLowerBound    { 0 }
 #endif
+, m_MRL                       ( false )
+, m_MIP                       ( false )
+, m_GDREnabledFlag            ( true )
+, m_SubLayerCbpParametersPresentFlag ( true )
+, m_rprEnabledFlag            ( false )
+
 {
   for(int ch=0; ch<MAX_NUM_CHANNEL_TYPE; ch++)
   {
     m_bitDepths.recon[ch] = 8;
-    m_pcmBitDepths[ch] = 8;
     m_qpBDOffset   [ch] = 0;
   }
 
@@ -1856,34 +1834,122 @@ SPS::SPS()
 
   ::memset(m_ltRefPicPocLsbSps, 0, sizeof(m_ltRefPicPocLsbSps));
   ::memset(m_usedByCurrPicLtSPSFlag, 0, sizeof(m_usedByCurrPicLtSPSFlag));
+  ::memset(m_virtualBoundariesPosX, 0, sizeof(m_virtualBoundariesPosX));
+  ::memset(m_virtualBoundariesPosY, 0, sizeof(m_virtualBoundariesPosY));
 }
 
 SPS::~SPS()
 {
-  m_RPSList.destroy();
 }
 
-void  SPS::createRPSList( int numRPS )
+void  SPS::createRPLList0(int numRPL)
 {
-  m_RPSList.destroy();
-  m_RPSList.create(numRPS);
+  m_RPLList0.destroy();
+  m_RPLList0.create(numRPL);
+  m_numRPL0 = numRPL;
+  m_rpl1IdxPresentFlag = (m_numRPL0 != m_numRPL1) ? true : false;
 }
+void  SPS::createRPLList1(int numRPL)
+{
+  m_RPLList1.destroy();
+  m_RPLList1.create(numRPL);
+  m_numRPL1 = numRPL;
 
+  m_rpl1IdxPresentFlag = (m_numRPL0 != m_numRPL1) ? true : false;
+}
 
 
 const int SPS::m_winUnitX[]={1,2,2,1};
 const int SPS::m_winUnitY[]={1,2,1,1};
 
+void ChromaQpMappingTable::setParams(const ChromaQpMappingTableParams &params, const int qpBdOffset)
+{
+  m_qpBdOffset = qpBdOffset;
+  m_sameCQPTableForAllChromaFlag = params.m_sameCQPTableForAllChromaFlag;
+  m_numQpTables = params.m_numQpTables;
+
+  for (int i = 0; i < MAX_NUM_CQP_MAPPING_TABLES; i++)
+  {
+    m_numPtsInCQPTableMinus1[i] = params.m_numPtsInCQPTableMinus1[i];
+    m_deltaQpInValMinus1[i] = params.m_deltaQpInValMinus1[i];
+    m_qpTableStartMinus26[i] = params.m_qpTableStartMinus26[i];
+    m_deltaQpOutVal[i] = params.m_deltaQpOutVal[i];
+  }
+}
+void ChromaQpMappingTable::derivedChromaQPMappingTables()
+{
+  for (int i = 0; i < getNumQpTables(); i++)
+  {
+    const int qpBdOffsetC = m_qpBdOffset;
+    const int numPtsInCQPTableMinus1 = getNumPtsInCQPTableMinus1(i);
+    std::vector<int> qpInVal(numPtsInCQPTableMinus1 + 2), qpOutVal(numPtsInCQPTableMinus1 + 2);
+
+    qpInVal[0] = getQpTableStartMinus26(i) + 26;
+    qpOutVal[0] = qpInVal[0];
+    for (int j = 0; j <= getNumPtsInCQPTableMinus1(i); j++)
+    {
+      qpInVal[j + 1] = qpInVal[j] + getDeltaQpInValMinus1(i, j) + 1;
+      qpOutVal[j + 1] = qpOutVal[j] + getDeltaQpOutVal(i, j);
+    }
+
+    for (int j = 0; j <= getNumPtsInCQPTableMinus1(i); j++)
+    {
+      CHECK(qpInVal[j]  < -qpBdOffsetC || qpInVal[j]  > MAX_QP, "qpInVal out of range");
+      CHECK(qpOutVal[j] < -qpBdOffsetC || qpOutVal[j] > MAX_QP, "qpOutVal out of range");
+    }
+
+    m_chromaQpMappingTables[i][qpInVal[0]] = qpOutVal[0];
+    for (int k = qpInVal[0] - 1; k >= -qpBdOffsetC; k--)
+    {
+      m_chromaQpMappingTables[i][k] = Clip3(-qpBdOffsetC, MAX_QP, m_chromaQpMappingTables[i][k + 1] - 1);
+    }
+    for (int j = 0; j <= numPtsInCQPTableMinus1; j++)
+    {
+      int sh = (getDeltaQpInValMinus1(i, j) + 1) >> 1;
+      for (int k = qpInVal[j] + 1, m = 1; k <= qpInVal[j + 1]; k++, m++)
+      {
+        m_chromaQpMappingTables[i][k] = m_chromaQpMappingTables[i][qpInVal[j]]
+          + ((qpOutVal[j + 1] - qpOutVal[j]) * m + sh) / (getDeltaQpInValMinus1(i, j) + 1);
+      }
+    }
+    for (int k = qpInVal[numPtsInCQPTableMinus1 + 1] + 1; k <= MAX_QP; k++)
+    {
+      m_chromaQpMappingTables[i][k] = Clip3(-qpBdOffsetC, MAX_QP, m_chromaQpMappingTables[i][k - 1] + 1);
+    }
+  }
+}
+
+SliceMap::SliceMap()
+: m_sliceID              (0)
+, m_numTilesInSlice      (0)
+, m_numCtuInSlice        (0)
+{
+  m_ctuAddrInSlice.clear();
+}
+
+SliceMap::~SliceMap()
+{
+  m_numCtuInSlice = 0;
+  m_ctuAddrInSlice.clear();
+}
+
+RectSlice::RectSlice()
+: m_tileIdx            (0)
+, m_sliceWidthInTiles  (0)
+, m_sliceHeightInTiles (0)
+, m_numSlicesInTile    (0)
+, m_sliceHeightInCtu   (0)
+{
+}
+
+RectSlice::~RectSlice()
+{
+}
+
 PPSRExt::PPSRExt()
-: m_log2MaxTransformSkipBlockSize      (2)
-, m_crossComponentPredictionEnabledFlag(false)
-, m_cuChromaQpOffsetSubdiv             (0)
-, m_chromaQpOffsetListLen              (0)
-// m_ChromaQpAdjTableIncludingNullEntry initialized below
+: m_crossComponentPredictionEnabledFlag(false)
 // m_log2SaoOffsetScale initialized below
 {
-  m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CbOffset = 0; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0. This is initialised here and never subsequently changed.
-  m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CrOffset = 0;
   for(int ch=0; ch<MAX_NUM_CHANNEL_TYPE; ch++)
   {
     m_log2SaoOffsetScale[ch] = 0;
@@ -1895,215 +1961,447 @@ PPS::PPS()
 , m_SPSId                            (0)
 , m_picInitQPMinus26                 (0)
 , m_useDQP                           (false)
-, m_bConstrainedIntraPred            (false)
 , m_bSliceChromaQpFlag               (false)
-, m_cuQpDeltaSubdiv                  (0)
 , m_chromaCbQpOffset                 (0)
 , m_chromaCrQpOffset                 (0)
+, m_chromaCbCrQpOffset               (0)
+, m_chromaQpOffsetListLen              (0)
 , m_numRefIdxL0DefaultActive         (1)
 , m_numRefIdxL1DefaultActive         (1)
-, m_TransquantBypassEnabledFlag      (false)
-, m_useTransformSkip                 (false)
-#if HEVC_DEPENDENT_SLICES
-, m_dependentSliceSegmentsEnabledFlag(false)
-#endif
-#if HEVC_TILES_WPP
-, m_tilesEnabledFlag                 (false)
+, m_rpl1IdxPresentFlag               (false)
+, m_numSubPics                       (1)
+, m_subPicIdSignallingPresentFlag    (0)
+, m_subPicIdLen                      (16)
+, m_noPicPartitionFlag               (1)
+, m_log2CtuSize                      (0)
+, m_ctuSize                          (0)
+, m_picWidthInCtu                    (0)
+, m_picHeightInCtu                   (0)
+, m_numTileCols                      (1)
+, m_numTileRows                      (1)
+, m_rectSliceFlag                    (1)  
+  , m_singleSlicePerSubPicFlag       (0)
+, m_numSlicesInPic                   (1)
+, m_tileIdxDeltaPresentFlag          (0)
+, m_loopFilterAcrossTilesEnabledFlag (1)
+, m_loopFilterAcrossSlicesEnabledFlag(0)
+, m_log2MaxTransformSkipBlockSize    (2)
 , m_entropyCodingSyncEnabledFlag     (false)
-, m_loopFilterAcrossTilesEnabledFlag (true)
-, m_uniformSpacingFlag               (false)
-, m_numTileColumnsMinus1             (0)
-, m_numTileRowsMinus1                (0)
-#endif
+, m_constantSliceHeaderParamsEnabledFlag (false)
+, m_PPSDepQuantEnabledIdc            (0)
+, m_PPSRefPicListSPSIdc0             (0)
+, m_PPSRefPicListSPSIdc1             (0)
+, m_PPSMvdL1ZeroIdc                  (0)
+, m_PPSCollocatedFromL0Idc           (0)
+, m_PPSSixMinusMaxNumMergeCandPlus1  (0)
+, m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 (0)
 , m_cabacInitPresentFlag             (false)
+, m_pictureHeaderExtensionPresentFlag(0)
 , m_sliceHeaderExtensionPresentFlag  (false)
-, m_loopFilterAcrossSlicesEnabledFlag(false)
 , m_listsModificationPresentFlag     (0)
-, m_numExtraSliceHeaderBits          (0)
+, m_picWidthInLumaSamples( 352 )
+, m_picHeightInLumaSamples( 288 )
 , m_ppsRangeExtension                ()
 , pcv                                (NULL)
 {
+  m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CbOffset = 0; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0. This is initialised here and never subsequently changed.
+  m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.CrOffset = 0;
+  m_ChromaQpAdjTableIncludingNullEntry[0].u.comp.JointCbCrOffset = 0;
+  m_tileColWidth.clear();
+  m_tileRowHeight.clear();
+  m_tileColBd.clear();
+  m_tileRowBd.clear();
+  m_ctuToTileCol.clear();
+  m_ctuToTileRow.clear();
+  m_ctuToSubPicIdx.clear();
+  m_rectSlices.clear();
+  m_sliceMap.clear();
+}
+
+PPS::~PPS()
+{
+  m_tileColWidth.clear();
+  m_tileRowHeight.clear();
+  m_tileColBd.clear();
+  m_tileRowBd.clear();
+  m_ctuToTileCol.clear();
+  m_ctuToTileRow.clear();
+  m_ctuToSubPicIdx.clear();
+  m_rectSlices.clear();
+  m_sliceMap.clear();
+
+  delete pcv;
+}
+
+/**
+ - reset tile and slice parameters and lists
+ */
+void PPS::resetTileSliceInfo()
+{
+  m_numExpTileCols = 0;
+  m_numExpTileRows = 0;
+  m_numTileCols    = 0;
+  m_numTileRows    = 0;
+  m_numSlicesInPic = 0;
+  m_tileColWidth.clear();
+  m_tileRowHeight.clear();
+  m_tileColBd.clear();
+  m_tileRowBd.clear();
+  m_ctuToTileCol.clear();
+  m_ctuToTileRow.clear();
+  m_ctuToSubPicIdx.clear();
+  m_rectSlices.clear();
+  m_sliceMap.clear();
+}
+
+/**
+ - initialize tile row/column sizes and boundaries
+ */
+void PPS::initTiles()
+{
+  int       colIdx, rowIdx;
+  int       ctuX, ctuY;
+  
+  // check explicit tile column sizes
+  uint32_t  remainingWidthInCtu  = m_picWidthInCtu;
+  for( colIdx = 0; colIdx < m_numExpTileCols; colIdx++ )
+  {
+    CHECK(m_tileColWidth[colIdx] > remainingWidthInCtu,    "Tile column width exceeds picture width");
+    remainingWidthInCtu -= m_tileColWidth[colIdx];
+  }
+
+  // divide remaining picture width into uniform tile columns
+  uint32_t  uniformTileColWidth = m_tileColWidth[colIdx-1];
+  while( remainingWidthInCtu > 0 ) 
+  {
+    CHECK(colIdx >= MAX_TILE_COLS, "Number of tile columns exceeds valid range");
+    uniformTileColWidth = std::min(remainingWidthInCtu, uniformTileColWidth);
+    m_tileColWidth.push_back( uniformTileColWidth );
+    remainingWidthInCtu -= uniformTileColWidth;
+    colIdx++;
+  }
+  m_numTileCols = colIdx;
+    
+  // check explicit tile row sizes
+  uint32_t  remainingHeightInCtu  = m_picHeightInCtu;
+  for( rowIdx = 0; rowIdx < m_numExpTileRows; rowIdx++ )
+  {
+    CHECK(m_tileRowHeight[rowIdx] > remainingHeightInCtu,     "Tile row height exceeds picture height");
+    remainingHeightInCtu -= m_tileRowHeight[rowIdx];
+  }
+    
+  // divide remaining picture height into uniform tile rows
+  uint32_t  uniformTileRowHeight = m_tileRowHeight[rowIdx - 1];
+  while( remainingHeightInCtu > 0 ) 
+  {
+    CHECK(rowIdx >= MAX_TILE_ROWS, "Number of tile rows exceeds valid range");
+    uniformTileRowHeight = std::min(remainingHeightInCtu, uniformTileRowHeight);
+    m_tileRowHeight.push_back( uniformTileRowHeight );
+    remainingHeightInCtu -= uniformTileRowHeight;
+    rowIdx++;
+  }
+  m_numTileRows = rowIdx;
+
+  // set left column bounaries
+  m_tileColBd.push_back( 0 );
+  for( colIdx = 0; colIdx < m_numTileCols; colIdx++ )
+  {
+    m_tileColBd.push_back( m_tileColBd[ colIdx ] + m_tileColWidth[ colIdx ] );
+  }
+  
+  // set top row bounaries
+  m_tileRowBd.push_back( 0 );
+  for( rowIdx = 0; rowIdx < m_numTileRows; rowIdx++ )
+  {
+    m_tileRowBd.push_back( m_tileRowBd[ rowIdx ] + m_tileRowHeight[ rowIdx ] );
+  }
+
+  // set mapping between horizontal CTU address and tile column index
+  colIdx = 0;
+  for( ctuX = 0; ctuX <= m_picWidthInCtu; ctuX++ ) 
+  {
+    if( ctuX == m_tileColBd[ colIdx + 1 ] )
+    {
+      colIdx++;
+    }
+    m_ctuToTileCol.push_back( colIdx );
+  }
+  
+  // set mapping between vertical CTU address and tile row index
+  rowIdx = 0;
+  for( ctuY = 0; ctuY <= m_picHeightInCtu; ctuY++ ) 
+  {
+    if( ctuY == m_tileRowBd[ rowIdx + 1 ] )
+    {
+      rowIdx++;
+    }
+    m_ctuToTileRow.push_back( rowIdx );
+  }
+}
+
+/**
+ - initialize memory for rectangular slice parameters
+ */
+void PPS::initRectSlices()
+{ 
+  CHECK(m_numSlicesInPic > MAX_SLICES, "Number of slices in picture exceeds valid range");
+  m_rectSlices.resize(m_numSlicesInPic);
+}
+
+/**
+ - initialize mapping between rectangular slices and CTUs
+ */
+void PPS::initRectSliceMap()
+{
+  uint32_t  ctuY;
+  uint32_t  tileX, tileY;
+    
+  // allocate new memory for slice list
+  CHECK(m_numSlicesInPic > MAX_SLICES, "Number of slices in picture exceeds valid range");
+  m_sliceMap.resize( m_numSlicesInPic );
+  if ((getNumSubPics() > 0) && getSingleSlicePerSubPicFlag())
+  {
+    for (uint32_t i = 0; i <= getNumSubPics() - 1; i++)
+    {
+      m_sliceMap[i].initSliceMap();
+    }
+    uint32_t picSizeInCtu = getPicWidthInCtu() * getPicHeightInCtu();
+    uint32_t sliceIdx;
+    for (uint32_t i = 0; i < picSizeInCtu; i++)
+    {
+      sliceIdx = getCtuToSubPicIdx(i);
+      m_sliceMap[sliceIdx].pushToCtuAddrInSlice(i);
+    }
+  }
+  else
+  {
+  // generate CTU maps for all rectangular slices in picture
+  for( uint32_t i = 0; i < m_numSlicesInPic; i++ )
+  {
+    m_sliceMap[ i ].initSliceMap();
+
+    // get position of first tile in slice
+    tileX =  m_rectSlices[ i ].getTileIdx() % m_numTileCols;
+    tileY =  m_rectSlices[ i ].getTileIdx() / m_numTileCols;
+    
+    // infer slice size for last slice in picture
+    if( i == m_numSlicesInPic-1 ) 
+    {
+      m_rectSlices[ i ].setSliceWidthInTiles ( m_numTileCols - tileX );
+      m_rectSlices[ i ].setSliceHeightInTiles( m_numTileRows - tileY );
+      m_rectSlices[ i ].setNumSlicesInTile( 1 );
+    }
+
+    // set slice index
+    m_sliceMap[ i ].setSliceID(i);
+    
+    // complete tiles within a single slice case
+    if( m_rectSlices[ i ].getSliceWidthInTiles( ) > 1 || m_rectSlices[ i ].getSliceHeightInTiles( ) > 1)
+    {
+      for( uint32_t j = 0; j < m_rectSlices[ i ].getSliceHeightInTiles( ); j++ )
+      {
+        for( uint32_t k = 0; k < m_rectSlices[ i ].getSliceWidthInTiles( ); k++ )
+        {
+          m_sliceMap[ i ].addCtusToSlice( getTileColumnBd(tileX + k), getTileColumnBd(tileX + k +1),
+                                          getTileRowBd(tileY + j), getTileRowBd(tileY + j +1), m_picWidthInCtu);
+        }
+      }
+    }
+    // multiple slices within a single tile case
+    else 
+    {
+      uint32_t  numSlicesInTile = m_rectSlices[ i ].getNumSlicesInTile( );
+
+      ctuY = getTileRowBd( tileY );
+      for( uint32_t j = 0; j < numSlicesInTile-1; j++ ) 
+      {
+        m_sliceMap[ i ].addCtusToSlice( getTileColumnBd(tileX), getTileColumnBd(tileX+1),
+                                        ctuY, ctuY + m_rectSlices[ i ].getSliceHeightInCtu(), m_picWidthInCtu);
+        ctuY += m_rectSlices[ i ].getSliceHeightInCtu();
+        i++;
+        m_sliceMap[ i ].setSliceID(i);
+      }
+
+      // infer slice height for last slice in tile
+      CHECK( ctuY >= getTileRowBd( tileY + 1 ), "Invalid rectangular slice signalling");
+      m_rectSlices[ i ].setSliceHeightInCtu( getTileRowBd( tileY + 1 ) - ctuY );
+      m_sliceMap[ i ].addCtusToSlice( getTileColumnBd(tileX), getTileColumnBd(tileX+1),
+                                      ctuY, getTileRowBd( tileY + 1 ), m_picWidthInCtu);
+    } 
+  }
+  }
+  // check for valid rectangular slice map
+  checkSliceMap();
+}
+
+void PPS::initRasterSliceMap( std::vector<uint32_t> numTilesInSlice )
+{
+  uint32_t tileIdx = 0;
+  setNumSlicesInPic( (uint32_t) numTilesInSlice.size() );
+
+  // allocate new memory for slice list
+  CHECK(m_numSlicesInPic > MAX_SLICES, "Number of slices in picture exceeds valid range");
+  m_sliceMap.resize( m_numSlicesInPic );
+
+  for( uint32_t sliceIdx = 0; sliceIdx < numTilesInSlice.size(); sliceIdx++ ) 
+  {
+    m_sliceMap[sliceIdx].initSliceMap();
+    m_sliceMap[sliceIdx].setSliceID( tileIdx );
+    m_sliceMap[sliceIdx].setNumTilesInSlice( numTilesInSlice[sliceIdx] );
+    for( uint32_t idx = 0; idx < numTilesInSlice[sliceIdx]; idx++ )
+    {
+      uint32_t tileX = tileIdx % getNumTileColumns();
+      uint32_t tileY = tileIdx / getNumTileColumns();
+      CHECK(tileY >= getNumTileRows(), "Number of tiles in slice exceeds the remaining number of tiles in picture");
+
+      m_sliceMap[sliceIdx].addCtusToSlice(getTileColumnBd(tileX), getTileColumnBd(tileX + 1),
+                                          getTileRowBd(tileY), getTileRowBd(tileY + 1), 
+                                          getPicWidthInCtu());
+      tileIdx++;
+    }
+  }
+
+  // check for valid raster-scan slice map
+  checkSliceMap();
 }
 
-PPS::~PPS()
+/**
+ - check if slice map covers the entire picture without skipping or duplicating any CTU positions
+ */
+void PPS::checkSliceMap()
 {
-  delete pcv;
+  uint32_t i;
+  std::vector<uint32_t>  ctuList, sliceList;
+  uint32_t picSizeInCtu = getPicWidthInCtu() * getPicHeightInCtu();
+  for( i = 0; i < m_numSlicesInPic; i++ )
+  {
+    sliceList = m_sliceMap[ i ].getCtuAddrList();
+    ctuList.insert( ctuList.end(), sliceList.begin(), sliceList.end() );
+  }  
+  CHECK( ctuList.size() < picSizeInCtu, "Slice map contains too few CTUs");
+  CHECK( ctuList.size() > picSizeInCtu, "Slice map contains too many CTUs");
+  std::sort( ctuList.begin(), ctuList.end() );   
+  for( i = 1; i < ctuList.size(); i++ )
+  {
+    CHECK( ctuList[i] > ctuList[i-1]+1, "CTU missing in slice map");
+    CHECK( ctuList[i] == ctuList[i-1],  "CTU duplicated in slice map");
+  }
 }
 
 APS::APS()
 : m_APSId(0)
+, m_temporalId( 0 )
+, m_layerId( 0 )
 {
 }
 
 APS::~APS()
 {
 }
-ReferencePictureSet::ReferencePictureSet()
-: m_numberOfPictures (0)
-, m_numberOfNegativePictures (0)
-, m_numberOfPositivePictures (0)
-, m_numberOfLongtermPictures (0)
-, m_interRPSPrediction (0)
-, m_deltaRIdxMinus1 (0)
-, m_deltaRPS (0)
-, m_numRefIdc (0)
-{
-  ::memset( m_deltaPOC, 0, sizeof(m_deltaPOC) );
-  ::memset( m_POC, 0, sizeof(m_POC) );
-  ::memset( m_used, 0, sizeof(m_used) );
-  ::memset( m_refIdc, 0, sizeof(m_refIdc) );
-  ::memset( m_bCheckLTMSB, 0, sizeof(m_bCheckLTMSB) );
-  ::memset( m_pocLSBLT, 0, sizeof(m_pocLSBLT) );
-  ::memset( m_deltaPOCMSBCycleLT, 0, sizeof(m_deltaPOCMSBCycleLT) );
-  ::memset( m_deltaPocMSBPresentFlag, 0, sizeof(m_deltaPocMSBPresentFlag) );
-}
 
-ReferencePictureSet::~ReferencePictureSet()
+ReferencePictureList::ReferencePictureList( const bool interLayerPicPresentFlag )
+  : m_numberOfShorttermPictures(0)
+  , m_numberOfLongtermPictures(0)
+  , m_numberOfActivePictures(MAX_INT)
+  , m_ltrp_in_slice_header_flag(0)
+  , m_interLayerPresentFlag( interLayerPicPresentFlag )
+  , m_numberOfInterLayerPictures( 0 )
 {
+  ::memset(m_isLongtermRefPic, 0, sizeof(m_isLongtermRefPic));
+  ::memset(m_refPicIdentifier, 0, sizeof(m_refPicIdentifier));
+  ::memset(m_POC, 0, sizeof(m_POC));
+  ::memset( m_isInterLayerRefPic, 0, sizeof( m_isInterLayerRefPic ) );
+  ::memset( m_interLayerRefPicIdx, 0, sizeof( m_interLayerRefPicIdx ) );
 }
 
-void ReferencePictureSet::setUsed(int bufferNum, bool used)
+ReferencePictureList::~ReferencePictureList()
 {
-  m_used[bufferNum] = used;
 }
 
-void ReferencePictureSet::setDeltaPOC(int bufferNum, int deltaPOC)
+void ReferencePictureList::setRefPicIdentifier( int idx, int identifier, bool isLongterm, bool isInterLayerRefPic, int interLayerIdx )
 {
-  m_deltaPOC[bufferNum] = deltaPOC;
-}
+  m_refPicIdentifier[idx] = identifier;
+  m_isLongtermRefPic[idx] = isLongterm;
 
-void ReferencePictureSet::setNumberOfPictures(int numberOfPictures)
-{
-  m_numberOfPictures = numberOfPictures;
-}
+  m_deltaPocMSBPresentFlag[idx] = false;
+  m_deltaPOCMSBCycleLT[idx] = 0;
 
-int ReferencePictureSet::getUsed(int bufferNum) const
-{
-  return m_used[bufferNum];
+  m_isInterLayerRefPic[idx] = isInterLayerRefPic;
+  m_interLayerRefPicIdx[idx] = interLayerIdx;
 }
 
-int ReferencePictureSet::getDeltaPOC(int bufferNum) const
+int ReferencePictureList::getRefPicIdentifier(int idx) const
 {
-  return m_deltaPOC[bufferNum];
+  return m_refPicIdentifier[idx];
 }
 
-int ReferencePictureSet::getNumberOfPictures() const
-{
-  return m_numberOfPictures;
-}
 
-int ReferencePictureSet::getPOC(int bufferNum) const
+bool ReferencePictureList::isRefPicLongterm(int idx) const
 {
-  return m_POC[bufferNum];
+  return m_isLongtermRefPic[idx];
 }
 
-void ReferencePictureSet::setPOC(int bufferNum, int POC)
+void ReferencePictureList::setNumberOfShorttermPictures(int numberOfStrp)
 {
-  m_POC[bufferNum] = POC;
+  m_numberOfShorttermPictures = numberOfStrp;
 }
 
-bool ReferencePictureSet::getCheckLTMSBPresent(int bufferNum) const
+int ReferencePictureList::getNumberOfShorttermPictures() const
 {
-  return m_bCheckLTMSB[bufferNum];
+  return m_numberOfShorttermPictures;
 }
 
-void ReferencePictureSet::setCheckLTMSBPresent(int bufferNum, bool b)
+void ReferencePictureList::setNumberOfLongtermPictures(int numberOfLtrp)
 {
-  m_bCheckLTMSB[bufferNum] = b;
+  m_numberOfLongtermPictures = numberOfLtrp;
 }
 
-//! set the reference idc value at uiBufferNum entry to the value of iRefIdc
-void ReferencePictureSet::setRefIdc(int bufferNum, int refIdc)
+int ReferencePictureList::getNumberOfLongtermPictures() const
 {
-  m_refIdc[bufferNum] = refIdc;
+  return m_numberOfLongtermPictures;
 }
 
-//! get the reference idc value at uiBufferNum
-int  ReferencePictureSet::getRefIdc(int bufferNum) const
+void ReferencePictureList::setPOC(int idx, int POC)
 {
-  return m_refIdc[bufferNum];
+  m_POC[idx] = POC;
 }
 
-/** Sorts the deltaPOC and Used by current values in the RPS based on the deltaPOC values.
- *  deltaPOC values are sorted with -ve values before the +ve values.  -ve values are in decreasing order.
- *  +ve values are in increasing order.
- * \returns void
- */
-void ReferencePictureSet::sortDeltaPOC()
+int ReferencePictureList::getPOC(int idx) const
 {
-  // sort in increasing order (smallest first)
-  for(int j=1; j < getNumberOfPictures(); j++)
-  {
-    int deltaPOC = getDeltaPOC(j);
-    bool used = getUsed(j);
-    for (int k=j-1; k >= 0; k--)
-    {
-      int temp = getDeltaPOC(k);
-      if (deltaPOC < temp)
-      {
-        setDeltaPOC(k+1, temp);
-        setUsed(k+1, getUsed(k));
-        setDeltaPOC(k, deltaPOC);
-        setUsed(k, used);
-      }
-    }
-  }
-  // flip the negative values to largest first
-  int numNegPics = getNumberOfNegativePictures();
-  for(int j=0, k=numNegPics-1; j < numNegPics>>1; j++, k--)
-  {
-    int deltaPOC = getDeltaPOC(j);
-    bool used = getUsed(j);
-    setDeltaPOC(j, getDeltaPOC(k));
-    setUsed(j, getUsed(k));
-    setDeltaPOC(k, deltaPOC);
-    setUsed(k, used);
-  }
+  return m_POC[idx];
 }
 
-/** Prints the deltaPOC and RefIdc (if available) values in the RPS.
- *  A "*" is added to the deltaPOC value if it is Used bu current.
- * \returns void
- */
-void ReferencePictureSet::printDeltaPOC() const
+void ReferencePictureList::setNumberOfActivePictures(int numberActive)
 {
-  DTRACE( g_trace_ctx, D_RPSINFO, "DeltaPOC = { " );
-  for(int j=0; j < getNumberOfPictures(); j++)
-  {
-    DTRACE( g_trace_ctx, D_RPSINFO, "%d%s ", getDeltaPOC( j ), ( getUsed( j ) == 1 ) ? "*" : "" );
-  }
-  if (getInterRPSPrediction())
-  {
-    DTRACE( g_trace_ctx, D_RPSINFO, "}, RefIdc = { " );
-    for(int j=0; j < getNumRefIdc(); j++)
-    {
-      DTRACE( g_trace_ctx, D_RPSINFO, "%d ", getRefIdc( j ) );
-    }
-  }
-  DTRACE( g_trace_ctx, D_RPSINFO, "}\n" );
+  m_numberOfActivePictures = numberActive;
 }
 
-RefPicListModification::RefPicListModification()
-: m_refPicListModificationFlagL0 (false)
-, m_refPicListModificationFlagL1 (false)
+int ReferencePictureList::getNumberOfActivePictures() const
 {
-  ::memset( m_RefPicSetIdxL0, 0, sizeof(m_RefPicSetIdxL0) );
-  ::memset( m_RefPicSetIdxL1, 0, sizeof(m_RefPicSetIdxL1) );
+  return m_numberOfActivePictures;
 }
 
-RefPicListModification::~RefPicListModification()
+void ReferencePictureList::printRefPicInfo() const
 {
+  //DTRACE(g_trace_ctx, D_RPSINFO, "RefPics = { ");
+  printf("RefPics = { ");
+  int numRefPic = getNumberOfShorttermPictures() + getNumberOfLongtermPictures();
+  for (int ii = 0; ii < numRefPic; ii++)
+  {
+    //DTRACE(g_trace_ctx, D_RPSINFO, "%d%s ", m_refPicIdentifier[ii], (m_isLongtermRefPic[ii] == 1) ? "[LT]" : "[ST]");
+    printf("%d%s ", m_refPicIdentifier[ii], (m_isLongtermRefPic[ii] == 1) ? "[LT]" : "[ST]");
+  }
+  //DTRACE(g_trace_ctx, D_RPSINFO, "}\n");
+  printf("}\n");
 }
 
-#if HEVC_USE_SCALING_LISTS
 ScalingList::ScalingList()
 {
-  for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
+  m_disableScalingMatrixForLfnstBlks = true;
+  for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++)
   {
-    for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
-    {
-      m_scalingListCoef[sizeId][listId].resize(std::min<int>(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]));
-    }
+    int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+    m_scalingListCoef[scalingListId].resize(matrixSize*matrixSize);
   }
 }
 
@@ -2111,34 +2409,40 @@ ScalingList::ScalingList()
 */
 void ScalingList::setDefaultScalingList()
 {
-  for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
+  for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++)
   {
-    for(uint32_t listId=0;listId<SCALING_LIST_NUM;listId++)
-    {
-      processDefaultMatrix(sizeId, listId);
-    }
+    processDefaultMatrix(scalingListId);
   }
 }
 /** check if use default quantization matrix
- * \returns true if use default quantization matrix in all size
+ * \returns true if the scaling list is not equal to the default quantization matrix
 */
-bool ScalingList::checkDefaultScalingList()
+bool ScalingList::isNotDefaultScalingList()
 {
-  uint32_t defaultCounter=0;
-
-  for( uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++ )
+  bool isAllDefault = true;
+  for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++)
   {
-    for(uint32_t listId=0;listId<SCALING_LIST_NUM;listId++)
+    int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+    if (scalingListId < SCALING_LIST_1D_START_16x16)
+    {
+      if (::memcmp(getScalingListAddress(scalingListId), getScalingListDefaultAddress(scalingListId), sizeof(int) * matrixSize * matrixSize))
+      {
+        isAllDefault = false;
+        break;
+      }
+    }
+    else
     {
-      if( !::memcmp(getScalingListAddress(sizeId,listId), getScalingListDefaultAddress(sizeId, listId),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])) // check value of matrix
-     && ((sizeId < SCALING_LIST_16x16) || (getScalingListDC(sizeId,listId) == 16))) // check DC value
+      if ((::memcmp(getScalingListAddress(scalingListId), getScalingListDefaultAddress(scalingListId), sizeof(int) * MAX_MATRIX_COEF_NUM)) || (getScalingListDC(scalingListId) != 16))
       {
-        defaultCounter++;
+        isAllDefault = false;
+        break;
       }
     }
+    if (!isAllDefault) break;
   }
 
-  return (defaultCounter == (SCALING_LIST_NUM * SCALING_LIST_SIZE_NUM )) ? false : true;
+  return !isAllDefault;
 }
 
 /** get scaling matrix from RefMatrixID
@@ -2146,27 +2450,167 @@ bool ScalingList::checkDefaultScalingList()
  * \param listId    index of input matrix
  * \param refListId index of reference matrix
  */
-void ScalingList::processRefMatrix( uint32_t sizeId, uint32_t listId , uint32_t refListId )
+int ScalingList::lengthUvlc(int uiCode)
 {
-  ::memcpy(getScalingListAddress(sizeId, listId),((listId == refListId)? getScalingListDefaultAddress(sizeId, refListId): getScalingListAddress(sizeId, refListId)),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]));
+  if (uiCode < 0) printf("Error UVLC! \n");
+
+  int uiLength = 1;
+  int uiTemp = ++uiCode;
+
+  CHECK(!uiTemp, "Integer overflow");
+
+  while (1 != uiTemp)
+  {
+    uiTemp >>= 1;
+    uiLength += 2;
+  }
+  return (uiLength >> 1) + ((uiLength + 1) >> 1);
 }
+int ScalingList::lengthSvlc(int uiCode)
+{
+  uint32_t uiCode2 = uint32_t(uiCode <= 0 ? (-uiCode) << 1 : (uiCode << 1) - 1);
+  int uiLength = 1;
+  int uiTemp = ++uiCode2;
 
-void ScalingList::checkPredMode(uint32_t sizeId, uint32_t listId)
+  CHECK(!uiTemp, "Integer overflow");
+
+  while (1 != uiTemp)
+  {
+    uiTemp >>= 1;
+    uiLength += 2;
+  }
+  return (uiLength >> 1) + ((uiLength + 1) >> 1);
+}
+void ScalingList::codePredScalingList(int* scalingList, const int* scalingListPred, int scalingListDC, int scalingListPredDC, int scalingListId, int& bitsCost) //sizeId, listId is current to-be-coded matrix idx
 {
-  int predListStep = (sizeId == SCALING_LIST_32x32? (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) : 1); // if 32x32, skip over chroma entries.
+  int deltaValue = 0;
+  int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+  int coefNum = matrixSize*matrixSize;
+  ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)];
+  int nextCoef = 0;
 
-  for(int predListIdx = (int)listId ; predListIdx >= 0; predListIdx-=predListStep)
+  int8_t data;
+  const int *src = scalingList;
+  const int *srcPred = scalingListPred;
+  if (scalingListDC!=-1 && scalingListPredDC!=-1)
   {
-    if( !::memcmp(getScalingListAddress(sizeId,listId),((listId == predListIdx) ?
-      getScalingListDefaultAddress(sizeId, predListIdx): getScalingListAddress(sizeId, predListIdx)),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId])) // check value of matrix
-     && ((sizeId < SCALING_LIST_16x16) || (getScalingListDC(sizeId,listId) == getScalingListDC(sizeId,predListIdx)))) // check DC value
-    {
-      setRefMatrixId(sizeId, listId, predListIdx);
-      setScalingListPredModeFlag(sizeId, listId, false);
+    bitsCost += lengthSvlc((int8_t)(scalingListDC - scalingListPredDC - nextCoef));
+    nextCoef =  scalingListDC - scalingListPredDC;
+  }
+  else if ((scalingListDC != -1 && scalingListPredDC == -1))
+  {
+    bitsCost += lengthSvlc((int8_t)(scalingListDC - srcPred[scan[0].idx] - nextCoef));
+    nextCoef =  scalingListDC - srcPred[scan[0].idx];
+  }
+  else if ((scalingListDC == -1 && scalingListPredDC == -1))
+  {
+  }
+  else
+  {
+    printf("Predictor DC mismatch! \n");
+  }
+  for (int i = 0; i < coefNum; i++)
+  {
+    if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4)
+      continue;
+    deltaValue = (src[scan[i].idx] - srcPred[scan[i].idx]);
+    data = (int8_t)(deltaValue - nextCoef);
+    nextCoef = deltaValue;
+
+    bitsCost += lengthSvlc(data);
+  }
+}
+void ScalingList::codeScalingList(int* scalingList, int scalingListDC, int scalingListId, int& bitsCost) //sizeId, listId is current to-be-coded matrix idx
+{
+  int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+  int coefNum = matrixSize * matrixSize;
+  ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)];
+  int nextCoef = SCALING_LIST_START_VALUE;
+  int8_t data;
+  const int *src = scalingList;
+
+  if (scalingListId >= SCALING_LIST_1D_START_16x16)
+  {
+    bitsCost += lengthSvlc(int8_t(getScalingListDC(scalingListId) - nextCoef));
+    nextCoef = getScalingListDC(scalingListId);
+  }
+
+  for (int i = 0; i < coefNum; i++)
+  {
+    if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4)
+      continue;
+    data = int8_t(src[scan[i].idx] - nextCoef);
+    nextCoef = src[scan[i].idx];
+
+    bitsCost += lengthSvlc(data);
+  }
+}
+void ScalingList::CheckBestPredScalingList(int scalingListId, int predListId, int& BitsCount)
+{
+  //check previously coded matrix as a predictor, code "lengthUvlc" function
+  int *scalingList = getScalingListAddress(scalingListId);
+  const int *scalingListPred = (scalingListId == predListId) ? ((predListId < SCALING_LIST_1D_START_8x8) ? g_quantTSDefault4x4 : g_quantIntraDefault8x8) : getScalingListAddress(predListId);
+  int scalingListDC = (scalingListId >= SCALING_LIST_1D_START_16x16) ? getScalingListDC(scalingListId) : -1;
+  int scalingListPredDC = (predListId >= SCALING_LIST_1D_START_16x16) ? ((scalingListId == predListId) ? 16 : getScalingListDC(predListId)) : -1;
+
+  int bitsCost = 0;
+  int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+  int predMatrixSize = (predListId < SCALING_LIST_1D_START_4x4) ? 2 : (predListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+
+  if (matrixSize != predMatrixSize) printf("Predictor size mismatch! \n");
+
+  bitsCost = 2 + lengthUvlc(scalingListId - predListId);
+  //copy-flag + predictor-mode-flag + deltaListId
+  codePredScalingList(scalingList, scalingListPred, scalingListDC, scalingListPredDC, scalingListId, bitsCost);
+  BitsCount = bitsCost;
+}
+void ScalingList::processRefMatrix(uint32_t scalinListId, uint32_t refListId)
+{
+  int matrixSize = (scalinListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalinListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+  ::memcpy(getScalingListAddress(scalinListId), ((scalinListId == refListId) ? getScalingListDefaultAddress(refListId) : getScalingListAddress(refListId)), sizeof(int)*matrixSize*matrixSize);
+}
+void ScalingList::checkPredMode(uint32_t scalingListId)
+{
+  int bestBitsCount = MAX_INT;
+  int BitsCount = 2;
+  setScalingListPreditorModeFlag(scalingListId, false);
+  codeScalingList(getScalingListAddress(scalingListId), ((scalingListId >= SCALING_LIST_1D_START_16x16) ? getScalingListDC(scalingListId) : -1), scalingListId, BitsCount);
+  bestBitsCount = BitsCount;
+
+  for (int predListIdx = (int)scalingListId; predListIdx >= 0; predListIdx--)
+  {
+
+    int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+    int predMatrixSize = (predListIdx < SCALING_LIST_1D_START_4x4) ? 2 : (predListIdx < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+    if (((scalingListId == SCALING_LIST_1D_START_2x2 || scalingListId == SCALING_LIST_1D_START_4x4 || scalingListId == SCALING_LIST_1D_START_8x8) && predListIdx != (int)scalingListId) || matrixSize != predMatrixSize)
+      continue;
+    const int* refScalingList = (scalingListId == predListIdx) ? getScalingListDefaultAddress(predListIdx) : getScalingListAddress(predListIdx);
+    const int refDC = (predListIdx < SCALING_LIST_1D_START_16x16) ? refScalingList[0] : (scalingListId == predListIdx) ? 16 : getScalingListDC(predListIdx);
+    if (!::memcmp(getScalingListAddress(scalingListId), refScalingList, sizeof(int)*matrixSize*matrixSize) // check value of matrix
+      // check DC value
+      && (scalingListId < SCALING_LIST_1D_START_16x16 || getScalingListDC(scalingListId) == refDC))
+    {
+      //copy mode
+      setRefMatrixId(scalingListId, predListIdx);
+      setScalingListCopyModeFlag(scalingListId, true);
+      setScalingListPreditorModeFlag(scalingListId, false);
       return;
     }
+    else
+    {
+      //predictor mode
+      //use previously coded matrix as a predictor
+      CheckBestPredScalingList(scalingListId, predListIdx, BitsCount);
+      if (BitsCount < bestBitsCount)
+      {
+        bestBitsCount = BitsCount;
+        setScalingListCopyModeFlag(scalingListId, false);
+        setScalingListPreditorModeFlag(scalingListId, true);
+        setRefMatrixId(scalingListId, predListIdx);
+      }
+    }
   }
-  setScalingListPredModeFlag(sizeId, listId, true);
+  setScalingListCopyModeFlag(scalingListId, false);
 }
 
 static void outputScalingListHelp(std::ostream &os)
@@ -2187,11 +2631,11 @@ static void outputScalingListHelp(std::ostream &os)
          "  <value>\n";
 
   os << "The permitted matrix names are:\n";
-  for(uint32_t sizeIdc = 0; sizeIdc < SCALING_LIST_SIZE_NUM; sizeIdc++)
+  for (uint32_t sizeIdc = SCALING_LIST_2x2; sizeIdc <= SCALING_LIST_64x64; sizeIdc++)
   {
-    for(uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++)
+    for (uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++)
     {
-      if ((sizeIdc!=SCALING_LIST_32x32) || (listIdc%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) == 0))
+      if (!(((sizeIdc == SCALING_LIST_64x64) && (listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0)) || ((sizeIdc == SCALING_LIST_2x2) && (listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) == 0))))
       {
         os << "  " << MatrixType[sizeIdc][listIdc] << '\n';
       }
@@ -2201,14 +2645,15 @@ static void outputScalingListHelp(std::ostream &os)
 
 void ScalingList::outputScalingLists(std::ostream &os) const
 {
-  for(uint32_t sizeIdc = 0; sizeIdc < SCALING_LIST_SIZE_NUM; sizeIdc++)
+  int scalingListId = 0;
+  for (uint32_t sizeIdc = SCALING_LIST_2x2; sizeIdc <= SCALING_LIST_64x64; sizeIdc++)
   {
-    const uint32_t size = std::min(8,4<<(sizeIdc));
+    const uint32_t size = (sizeIdc == 1) ? 2 : ((sizeIdc == 2) ? 4 : 8);
     for(uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++)
     {
-      if ((sizeIdc!=SCALING_LIST_32x32) || (listIdc%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) == 0))
+      if (!((sizeIdc== SCALING_LIST_64x64 && listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) || (sizeIdc == SCALING_LIST_2x2 && listIdc < 4)))
       {
-        const int *src = getScalingListAddress(sizeIdc, listIdc);
+        const int *src = getScalingListAddress(scalingListId);
         os << (MatrixType[sizeIdc][listIdc]) << " =\n  ";
         for(uint32_t y=0; y<size; y++)
         {
@@ -2220,9 +2665,10 @@ void ScalingList::outputScalingLists(std::ostream &os) const
         }
         if(sizeIdc > SCALING_LIST_8x8)
         {
-          os << MatrixType_DC[sizeIdc][listIdc] << " = \n  " << std::setw(3) << getScalingListDC(sizeIdc, listIdc) << "\n";
+          os << MatrixType_DC[sizeIdc][listIdc] << " = \n  " << std::setw(3) << getScalingListDC(scalingListId) << "\n";
         }
         os << "\n";
+        scalingListId++;
       }
     }
   }
@@ -2248,25 +2694,21 @@ bool ScalingList::xParseScalingList(const std::string &fileName)
     return true;
   }
 
-  for(uint32_t sizeIdc = SCALING_LIST_FIRST_CODED; sizeIdc < SCALING_LIST_SIZE_NUM; sizeIdc++)
+  int scalingListId = 0;
+  for (uint32_t sizeIdc = SCALING_LIST_2x2; sizeIdc <= SCALING_LIST_64x64; sizeIdc++)//2x2-128x128
   {
     const uint32_t size = std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeIdc]);
 
     for(uint32_t listIdc = 0; listIdc < SCALING_LIST_NUM; listIdc++)
     {
-      int * const src = getScalingListAddress(sizeIdc, listIdc);
 
-      if ((sizeIdc==SCALING_LIST_32x32) && (listIdc%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) != 0)) // derive chroma32x32 from chroma16x16
+      if ((sizeIdc == SCALING_LIST_64x64 && listIdc % (SCALING_LIST_NUM / SCALING_LIST_PRED_MODES) != 0) || (sizeIdc == SCALING_LIST_2x2 && listIdc < 4))
       {
-        const int *srcNextSmallerSize = getScalingListAddress(sizeIdc-1, listIdc);
-        for(uint32_t i=0; i<size; i++)
-        {
-          src[i] = srcNextSmallerSize[i];
-        }
-        setScalingListDC(sizeIdc,listIdc,(sizeIdc > SCALING_LIST_8x8) ? getScalingListDC(sizeIdc-1, listIdc) : src[0]);
+        continue;
       }
       else
       {
+        int * const src = getScalingListAddress(scalingListId);
         {
           fseek(fp, 0, SEEK_SET);
           bool bFound=false;
@@ -2284,6 +2726,7 @@ bool ScalingList::xParseScalingList(const std::string &fileName)
           {
             msg( ERROR, "Error: cannot find Matrix %s from scaling list file %s\n", MatrixType[sizeIdc][listIdc], fileName.c_str());
             return true;
+
           }
         }
         for (uint32_t i=0; i<size; i++)
@@ -2303,7 +2746,7 @@ bool ScalingList::xParseScalingList(const std::string &fileName)
         }
 
         //set DC value for default matrix check
-        setScalingListDC(sizeIdc,listIdc,src[0]);
+        setScalingListDC(scalingListId, src[0]);
 
         if(sizeIdc > SCALING_LIST_8x8)
         {
@@ -2338,9 +2781,10 @@ bool ScalingList::xParseScalingList(const std::string &fileName)
             return true;
           }
           //overwrite DC value when size of matrix is larger than 16x16
-          setScalingListDC(sizeIdc,listIdc,data);
+          setScalingListDC(scalingListId, data);
         }
       }
+      scalingListId++;
     }
   }
 //  std::cout << "\n\nRead scaling lists of:\n\n";
@@ -2356,11 +2800,13 @@ bool ScalingList::xParseScalingList(const std::string &fileName)
  * \param listId list index
  * \returns pointer of quantization matrix
  */
-const int* ScalingList::getScalingListDefaultAddress(uint32_t sizeId, uint32_t listId)
+const int* ScalingList::getScalingListDefaultAddress(uint32_t scalingListId)
 {
   const int *src = 0;
-  switch(sizeId)
+  int sizeId = (scalingListId < SCALING_LIST_1D_START_8x8) ? 2 : 3;
+  switch (sizeId)
   {
+    case SCALING_LIST_1x1:
     case SCALING_LIST_2x2:
     case SCALING_LIST_4x4:
       src = g_quantTSDefault4x4;
@@ -2370,7 +2816,7 @@ const int* ScalingList::getScalingListDefaultAddress(uint32_t sizeId, uint32_t l
     case SCALING_LIST_32x32:
     case SCALING_LIST_64x64:
     case SCALING_LIST_128x128:
-      src = (listId < (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) ) ? g_quantIntraDefault8x8 : g_quantInterDefault8x8;
+      src = g_quantInterDefault8x8;
       break;
     default:
       THROW( "Invalid scaling list" );
@@ -2384,43 +2830,36 @@ const int* ScalingList::getScalingListDefaultAddress(uint32_t sizeId, uint32_t l
  * \param sizeId size index
  * \param listId index of input matrix
  */
-void ScalingList::processDefaultMatrix(uint32_t sizeId, uint32_t listId)
+void ScalingList::processDefaultMatrix(uint32_t scalingListId)
 {
-  ::memcpy(getScalingListAddress(sizeId, listId),getScalingListDefaultAddress(sizeId,listId),sizeof(int)*std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]));
-  setScalingListDC(sizeId,listId,SCALING_LIST_DC);
+  int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+  ::memcpy(getScalingListAddress(scalingListId), getScalingListDefaultAddress(scalingListId), sizeof(int)*matrixSize*matrixSize);
+  setScalingListDC(scalingListId, SCALING_LIST_DC);
 }
 
 /** check DC value of matrix for default matrix signaling
  */
 void ScalingList::checkDcOfMatrix()
 {
-  for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
+  for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++)
   {
-    for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
+    //check default matrix?
+    if (getScalingListDC(scalingListId) == 0)
     {
-      //check default matrix?
-      if(getScalingListDC(sizeId,listId) == 0)
-      {
-        processDefaultMatrix(sizeId, listId);
-      }
+      processDefaultMatrix(scalingListId);
     }
   }
 }
-#endif
 
 ParameterSetManager::ParameterSetManager()
-#if HEVC_VPS
-: m_vpsMap(MAX_NUM_VPS)
-, m_spsMap(MAX_NUM_SPS)
-#else
 : m_spsMap(MAX_NUM_SPS)
-#endif
 , m_ppsMap(MAX_NUM_PPS)
-, m_apsMap(MAX_NUM_APS)
-#if HEVC_VPS
-, m_activeVPSId(-1)
-#endif
+, m_apsMap(MAX_NUM_APS * MAX_NUM_APS_TYPE)
+, m_dpsMap(MAX_NUM_DPS)
+, m_vpsMap(MAX_NUM_VPS)
+, m_activeDPSId(-1)
 , m_activeSPSId(-1)
+, m_activeVPSId(-1)
 {
 }
 
@@ -2456,11 +2895,7 @@ ParameterSetManager::~ParameterSetManager()
 //  return false;
 //}
 
-#if HEVC_VPS
-//! activate a PPS and depending on isIDR parameter also SPS and VPS
-#else
 //! activate a PPS and depending on isIDR parameter also SPS
-#endif
 //! \returns true, if activation is successful
 bool ParameterSetManager::activatePPS(int ppsId, bool isIRAP)
 {
@@ -2477,36 +2912,62 @@ bool ParameterSetManager::activatePPS(int ppsId, bool isIRAP)
       SPS *sps = m_spsMap.getPS(spsId);
       if (sps)
       {
-
-#if HEVC_VPS
-        int vpsId = sps->getVPSId();
-        if (!isIRAP && (vpsId != m_activeVPSId ))
+        int dpsId = sps->getDecodingParameterSetId();
+        if ((m_activeDPSId!=-1) && (dpsId != m_activeDPSId ))
         {
-          msg( WARNING, "Warning: tried to activate PPS referring to a inactive VPS at non-IDR.");
+          msg( WARNING, "Warning: tried to activate DPS with different ID than the currently active DPS. This should not happen within the same bitstream!");
         }
         else
         {
-#endif
-          m_spsMap.setActive(spsId);
-#if HEVC_VPS
-          VPS *vps =m_vpsMap.getPS(vpsId);
-          if (vps)
+          if (dpsId != 0)
           {
-            m_activeVPSId = vpsId;
-            m_activeSPSId = spsId;
-            m_ppsMap.setActive(ppsId);
-            return true;
+            DPS *dps =m_dpsMap.getPS(dpsId);
+            if (dps)
+            {
+              m_activeDPSId = dpsId;
+              m_dpsMap.setActive(dpsId);
+            }
+            else
+            {
+              msg( WARNING, "Warning: tried to activate PPS that refers to a non-existing DPS.");
+            }
           }
           else
           {
-            msg( WARNING, "Warning: tried to activate PPS that refers to a non-existing VPS.");
+            // set zero as active DPS ID (special reserved value, no actual DPS)
+            m_activeDPSId = dpsId;
+            m_dpsMap.setActive(dpsId);
           }
         }
-#else
+
+        int vpsId = sps->getVPSId();
+        if(vpsId != 0)
+        {
+          VPS *vps = m_vpsMap.getPS(vpsId);
+          if(vps)
+          {
+            m_activeVPSId = vpsId;
+            m_vpsMap.setActive(vpsId);
+          }  
+          else
+          {
+            msg( WARNING, "Warning: tried to activate PPS that refers to non-existing VPS." );
+          }
+        }
+        else
+        {
+          m_vpsMap.clear();
+          m_vpsMap.allocatePS(0);
+          m_activeVPSId = 0;
+          m_vpsMap.setActive(0);
+        }
+
+          m_spsMap.clear();
+          m_spsMap.setActive(spsId);
         m_activeSPSId = spsId;
+        m_ppsMap.clear();
         m_ppsMap.setActive(ppsId);
         return true;
-#endif
       }
       else
       {
@@ -2521,18 +2982,16 @@ bool ParameterSetManager::activatePPS(int ppsId, bool isIRAP)
 
   // Failed to activate if reach here.
   m_activeSPSId=-1;
-#if HEVC_VPS
-  m_activeVPSId=-1;
-#endif
+  m_activeDPSId=-1;
   return false;
 }
 
-bool ParameterSetManager::activateAPS(int apsId)
+bool ParameterSetManager::activateAPS(int apsId, int apsType)
 {
-  APS *aps = m_apsMap.getPS(apsId);
+  APS *aps = m_apsMap.getPS(apsId + (MAX_NUM_APS * apsType));
   if (aps)
   {
-    m_apsMap.setActive(apsId);
+    m_apsMap.setActive(apsId + (MAX_NUM_APS * apsType));
     return true;
   }
   else
@@ -2559,35 +3018,22 @@ void ParameterSetMap<SPS>::setID(SPS* parameterSet, const int psId)
   parameterSet->setSPSId(psId);
 }
 
-#if !JVET_M0101_HLS
-ProfileTierLevel::ProfileTierLevel()
-  : m_profileSpace    (0)
-  , m_tierFlag        (Level::MAIN)
-  , m_profileIdc      (Profile::NONE)
-  , m_levelIdc        (Level::NONE)
-  , m_progressiveSourceFlag  (false)
-  , m_interlacedSourceFlag   (false)
-  , m_nonPackedConstraintFlag(false)
-  , m_frameOnlyConstraintFlag(false)
+template <>
+void ParameterSetMap<VPS>::setID(VPS* parameterSet, const int psId)
 {
-  ::memset(m_profileCompatibilityFlag, 0, sizeof(m_profileCompatibilityFlag));
+  parameterSet->setVPSId(psId);
 }
 
-PTL::PTL()
-{
-  ::memset(m_subLayerProfilePresentFlag, 0, sizeof(m_subLayerProfilePresentFlag));
-  ::memset(m_subLayerLevelPresentFlag,   0, sizeof(m_subLayerLevelPresentFlag  ));
-}
-#else
 ProfileTierLevel::ProfileTierLevel()
   : m_tierFlag        (Level::MAIN)
   , m_profileIdc      (Profile::NONE)
+  , m_numSubProfile(0)
+  , m_subProfileIdc(0)
   , m_levelIdc        (Level::NONE)
 {
   ::memset(m_subLayerLevelPresentFlag,   0, sizeof(m_subLayerLevelPresentFlag  ));
   ::memset(m_subLayerLevelIdc, Level::NONE, sizeof(m_subLayerLevelIdc          ));
 }
-#endif
 
 void calculateParameterSetChangedFlag(bool &bChanged, const std::vector<uint8_t> *pOldData, const std::vector<uint8_t> *pNewData)
 {
@@ -2621,13 +3067,13 @@ void calculateParameterSetChangedFlag(bool &bChanged, const std::vector<uint8_t>
 
 uint32_t PreCalcValues::getValIdx( const Slice &slice, const ChannelType chType ) const
 {
-  return slice.isIRAP() ? ( ISingleTree ? 0 : ( chType << 1 ) ) : 1;
+  return slice.isIntra() ? ( ISingleTree ? 0 : ( chType << 1 ) ) : 1;
 }
 
 uint32_t PreCalcValues::getMaxBtDepth( const Slice &slice, const ChannelType chType ) const
 {
-  if ( slice.getSplitConsOverrideFlag() )
-    return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMaxBTDepth() : slice.getMaxBTDepthIChroma();
+  if ( slice.getPicHeader()->getSplitConsOverrideFlag() )    
+    return slice.getPicHeader()->getMaxMTTHierarchyDepth( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
   else
   return maxBtDepth[getValIdx( slice, chType )];
 }
@@ -2639,8 +3085,8 @@ uint32_t PreCalcValues::getMinBtSize( const Slice &slice, const ChannelType chTy
 
 uint32_t PreCalcValues::getMaxBtSize( const Slice &slice, const ChannelType chType ) const
 {
-  if (slice.getSplitConsOverrideFlag())
-    return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMaxBTSize() : slice.getMaxBTSizeIChroma();
+  if (slice.getPicHeader()->getSplitConsOverrideFlag())
+    return slice.getPicHeader()->getMaxBTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
   else
     return maxBtSize[getValIdx(slice, chType)];
 }
@@ -2652,26 +3098,209 @@ uint32_t PreCalcValues::getMinTtSize( const Slice &slice, const ChannelType chTy
 
 uint32_t PreCalcValues::getMaxTtSize( const Slice &slice, const ChannelType chType ) const
 {
-  if ( slice.getSplitConsOverrideFlag() )
-    return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMaxTTSize() : slice.getMaxTTSizeIChroma();
+  if (slice.getPicHeader()->getSplitConsOverrideFlag())
+    return slice.getPicHeader()->getMaxTTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
   else
   return maxTtSize[getValIdx( slice, chType )];
 }
 uint32_t PreCalcValues::getMinQtSize( const Slice &slice, const ChannelType chType ) const
 {
-  if ( slice.getSplitConsOverrideFlag() )
-    return (!slice.isIRAP() || isLuma(chType) || ISingleTree) ? slice.getMinQTSize() : slice.getMinQTSizeIChroma();
+  if (slice.getPicHeader()->getSplitConsOverrideFlag())
+    return slice.getPicHeader()->getMinQTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
   else
   return minQtSize[getValIdx( slice, chType )];
 }
 
+void Slice::scaleRefPicList( Picture *scaledRefPic[ ], PicHeader *picHeader, APS** apss, APS* lmcsAps, APS* scalingListAps, const bool isDecoder )
+{
+  int i;
+  const SPS* sps = getSPS();
+  const PPS* pps = getPPS();
+
+  bool refPicIsSameRes = false;
+   
+  // this is needed for IBC
+  m_pcPic->unscaledPic = m_pcPic;
+
+  if( m_eSliceType == I_SLICE )
+  {
+    return;
+  }
+
+  freeScaledRefPicList( scaledRefPic );
+
+  for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ )
+  {
+    if( refList == 1 && m_eSliceType != B_SLICE )
+    {
+      continue;
+    }
+
+    for( int rIdx = 0; rIdx < m_aiNumRefIdx[refList]; rIdx++ )
+    {
+      // if rescaling is needed, otherwise just reuse the original picture pointer; it is needed for motion field, otherwise motion field requires a copy as well
+      // reference resampling for the whole picture is not applied at decoder
+
+      int xScale, yScale;
+      CU::getRprScaling( sps, pps, m_apcRefPicList[refList][rIdx], xScale, yScale );
+      m_scalingRatio[refList][rIdx] = std::pair<int, int>( xScale, yScale );
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+      if( m_apcRefPicList[refList][rIdx]->isRefScaled( pps ) == false )
+#else
+      if( m_scalingRatio[refList][rIdx] == SCALE_1X && pps->getPicWidthInLumaSamples() == m_apcRefPicList[refList][rIdx]->getPicWidthInLumaSamples() && pps->getPicHeightInLumaSamples() == m_apcRefPicList[refList][rIdx]->getPicHeightInLumaSamples() )
+#endif
+      {
+        refPicIsSameRes = true;
+      }
+
+      if( m_scalingRatio[refList][rIdx] == SCALE_1X || isDecoder )
+      {
+        m_scaledRefPicList[refList][rIdx] = m_apcRefPicList[refList][rIdx];
+      }
+      else
+      {
+        int poc = m_apcRefPicList[refList][rIdx]->getPOC();
+        // check whether the reference picture has already been scaled
+        for( i = 0; i < MAX_NUM_REF; i++ )
+        {
+          if( scaledRefPic[i] != nullptr && scaledRefPic[i]->poc == poc )
+          {
+            break;
+          }
+        }
+
+        if( i == MAX_NUM_REF )
+        {
+          int j;
+          // search for unused Picture structure in scaledRefPic
+          for( j = 0; j < MAX_NUM_REF; j++ )
+          {
+            if( scaledRefPic[j] == nullptr )
+            {
+              break;
+            }
+          }
+
+          CHECK( j >= MAX_NUM_REF, "scaledRefPic can not hold all reference pictures!" );
+
+          if( j >= MAX_NUM_REF )
+          {
+            j = 0;
+          }
+
+          if( scaledRefPic[j] == nullptr )
+          {
+            scaledRefPic[j] = new Picture;
+
+            scaledRefPic[j]->setBorderExtension( false );
+            scaledRefPic[j]->reconstructed = false;
+            scaledRefPic[j]->referenced = true;
+
+            scaledRefPic[j]->finalInit( m_pcPic->cs->vps, *sps, *pps, picHeader, apss, lmcsAps, scalingListAps );
+
+            scaledRefPic[j]->poc = -1;
+
+            scaledRefPic[j]->create( sps->getChromaFormatIdc(), Size( pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples() ), sps->getMaxCUWidth(), sps->getMaxCUWidth() + 16, isDecoder, m_pcPic->layerId );
+          }
+
+          scaledRefPic[j]->poc = poc;
+          scaledRefPic[j]->longTerm = m_apcRefPicList[refList][rIdx]->longTerm;
+
+          // rescale the reference picture
+          const bool downsampling = m_apcRefPicList[refList][rIdx]->getRecoBuf().Y().width >= scaledRefPic[j]->getRecoBuf().Y().width && m_apcRefPicList[refList][rIdx]->getRecoBuf().Y().height >= scaledRefPic[j]->getRecoBuf().Y().height;
+          Picture::rescalePicture( m_scalingRatio[refList][rIdx], 
+                                   m_apcRefPicList[refList][rIdx]->getRecoBuf(), m_apcRefPicList[refList][rIdx]->slices[0]->getPPS()->getScalingWindow(), 
+                                   scaledRefPic[j]->getRecoBuf(), pps->getScalingWindow(), 
+                                   sps->getChromaFormatIdc(), sps->getBitDepths(), true, downsampling,
+                                   sps->getHorCollocatedChromaFlag(), sps->getVerCollocatedChromaFlag() );
+          scaledRefPic[j]->extendPicBorder();
+
+          m_scaledRefPicList[refList][rIdx] = scaledRefPic[j];
+        }
+        else
+        {
+          m_scaledRefPicList[refList][rIdx] = scaledRefPic[i];
+        }
+      }
+    }
+  }
+
+  // make the scaled reference picture list as the default reference picture list
+  for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ )
+  {
+    if( refList == 1 && m_eSliceType != B_SLICE )
+    {
+      continue;
+    }
+
+    for( int rIdx = 0; rIdx < m_aiNumRefIdx[refList]; rIdx++ )
+    {
+      m_savedRefPicList[refList][rIdx] = m_apcRefPicList[refList][rIdx];
+      m_apcRefPicList[refList][rIdx] = m_scaledRefPicList[refList][rIdx];
+
+      // allow the access of the unscaled version in xPredInterBlk()
+      m_apcRefPicList[refList][rIdx]->unscaledPic = m_savedRefPicList[refList][rIdx];
+    }
+  }
+  
+  //Make sure that TMVP is disabled when there are no reference pictures with the same resolution
+  if(!refPicIsSameRes)
+  {
+    CHECK(getPicHeader()->getEnableTMVPFlag() != 0, "TMVP cannot be enabled in pictures that have no reference pictures with the same resolution")
+  }
+}
+
+void Slice::freeScaledRefPicList( Picture *scaledRefPic[] )
+{
+  if( m_eSliceType == I_SLICE )
+  {
+    return;
+  }
+  for( int i = 0; i < MAX_NUM_REF; i++ )
+  {
+    if( scaledRefPic[i] != nullptr )
+    {
+      scaledRefPic[i]->destroy();
+      scaledRefPic[i] = nullptr;
+    }
+  }
+}
+
+bool Slice::checkRPR()
+{
+  const PPS* pps = getPPS();
+
+  for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ )
+  {
+
+    if( refList == 1 && m_eSliceType != B_SLICE )
+    {
+      continue;
+    }
+
+    for( int rIdx = 0; rIdx < m_aiNumRefIdx[refList]; rIdx++ )
+    {
+      if( m_scaledRefPicList[refList][rIdx]->cs->pcv->lumaWidth != pps->getPicWidthInLumaSamples() || m_scaledRefPicList[refList][rIdx]->cs->pcv->lumaHeight != pps->getPicHeightInLumaSamples() )
+      {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 #if ENABLE_TRACING
-#if HEVC_VPS
 void xTraceVPSHeader()
 {
   DTRACE( g_trace_ctx, D_HEADER, "=========== Video Parameter Set     ===========\n" );
 }
-#endif
+
+void xTraceDPSHeader()
+{
+  DTRACE( g_trace_ctx, D_HEADER, "=========== Decoding Parameter Set     ===========\n" );
+}
 
 void xTraceSPSHeader()
 {
@@ -2688,6 +3317,11 @@ void xTraceAPSHeader()
   DTRACE(g_trace_ctx, D_HEADER, "=========== Adaptation Parameter Set  ===========\n");
 }
 
+void xTracePictureHeader()
+{
+  DTRACE( g_trace_ctx, D_HEADER, "=========== Picture Header ===========\n" );
+}
+
 void xTraceSliceHeader()
 {
   DTRACE( g_trace_ctx, D_HEADER, "=========== Slice ===========\n" );
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 3ccca6122a9690e81be248ce587ec8d6b9ddcedb..fa723a03d733cc43496646418f2c66d28ed43eac 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,9 @@
 #include "Rom.h"
 #include "ChromaFormat.h"
 #include "Common.h"
+#include "HRD.h"
+#include <unordered_map>
+#include "AlfParameters.h"
 
 //! \ingroup CommonLib
 //! \{
@@ -68,130 +71,161 @@ typedef std::list<Picture*> PicList;
 // Class definition
 // ====================================================================================================================
 
-/// Reference Picture Set class
-class ReferencePictureSet
+
+class ReferencePictureList
 {
 private:
-  int  m_numberOfPictures;
-  int  m_numberOfNegativePictures;
-  int  m_numberOfPositivePictures;
-  int  m_numberOfLongtermPictures;
-  int  m_deltaPOC[MAX_NUM_REF_PICS];
-  int  m_POC[MAX_NUM_REF_PICS];
-  bool m_used[MAX_NUM_REF_PICS];
-  bool m_interRPSPrediction;
-  int  m_deltaRIdxMinus1;
-  int  m_deltaRPS;
-  int  m_numRefIdc;
-  int  m_refIdc[MAX_NUM_REF_PICS+1];
-  bool m_bCheckLTMSB[MAX_NUM_REF_PICS];
-  int  m_pocLSBLT[MAX_NUM_REF_PICS];
-  int  m_deltaPOCMSBCycleLT[MAX_NUM_REF_PICS];
-  bool m_deltaPocMSBPresentFlag[MAX_NUM_REF_PICS];
+  int   m_numberOfShorttermPictures;
+  int   m_numberOfLongtermPictures;
+  int   m_isLongtermRefPic[MAX_NUM_REF_PICS];
+  int   m_refPicIdentifier[MAX_NUM_REF_PICS];  //This can be delta POC for STRP or POC LSB for LTRP
+  int   m_POC[MAX_NUM_REF_PICS];
+  int   m_numberOfActivePictures;
+  bool  m_deltaPocMSBPresentFlag[MAX_NUM_REF_PICS];
+  int   m_deltaPOCMSBCycleLT[MAX_NUM_REF_PICS];
+  bool  m_ltrp_in_slice_header_flag;
+  bool  m_interLayerPresentFlag;
+  bool  m_isInterLayerRefPic[MAX_NUM_REF_PICS];
+  int   m_interLayerRefPicIdx[MAX_NUM_REF_PICS];
+  int   m_numberOfInterLayerPictures;
 
 public:
-          ReferencePictureSet();
-  virtual ~ReferencePictureSet();
-  int     getPocLSBLT(int i) const                     { return m_pocLSBLT[i];               }
-  void    setPocLSBLT(int i, int x)                    { m_pocLSBLT[i] = x;                  }
-  int     getDeltaPocMSBCycleLT(int i) const           { return m_deltaPOCMSBCycleLT[i];     }
-  void    setDeltaPocMSBCycleLT(int i, int x)          { m_deltaPOCMSBCycleLT[i] = x;        }
-  bool    getDeltaPocMSBPresentFlag(int i) const       { return m_deltaPocMSBPresentFlag[i]; }
-  void    setDeltaPocMSBPresentFlag(int i, bool x)     { m_deltaPocMSBPresentFlag[i] = x;    }
-  void    setUsed(int bufferNum, bool used);
-  void    setDeltaPOC(int bufferNum, int deltaPOC);
-  void    setPOC(int bufferNum, int deltaPOC);
-  void    setNumberOfPictures(int numberOfPictures);
-  void    setCheckLTMSBPresent(int bufferNum, bool b );
-  bool    getCheckLTMSBPresent(int bufferNum) const;
-
-  int     getUsed(int bufferNum) const;
-  int     getDeltaPOC(int bufferNum) const;
-  int     getPOC(int bufferNum) const;
-  int     getNumberOfPictures() const;
-
-  void    setNumberOfNegativePictures(int number)      { m_numberOfNegativePictures = number; }
-  int     getNumberOfNegativePictures() const          { return m_numberOfNegativePictures;   }
-  void    setNumberOfPositivePictures(int number)      { m_numberOfPositivePictures = number; }
-  int     getNumberOfPositivePictures() const          { return m_numberOfPositivePictures;   }
-  void    setNumberOfLongtermPictures(int number)      { m_numberOfLongtermPictures = number; }
-  int     getNumberOfLongtermPictures() const          { return m_numberOfLongtermPictures;   }
-
-  void    setInterRPSPrediction(bool flag)             { m_interRPSPrediction = flag;         }
-  bool    getInterRPSPrediction() const                { return m_interRPSPrediction;         }
-  void    setDeltaRIdxMinus1(int x)                    { m_deltaRIdxMinus1 = x;               }
-  int     getDeltaRIdxMinus1() const                   { return m_deltaRIdxMinus1;            }
-  void    setDeltaRPS(int x)                           { m_deltaRPS = x;                      }
-  int     getDeltaRPS() const                          { return m_deltaRPS;                   }
-  void    setNumRefIdc(int x)                          { m_numRefIdc = x;                     }
-  int     getNumRefIdc() const                         { return m_numRefIdc;                  }
-
-  void    setRefIdc(int bufferNum, int refIdc);
-  int     getRefIdc(int bufferNum) const ;
-
-  void    sortDeltaPOC();
-  void    printDeltaPOC() const;
+  ReferencePictureList( const bool interLayerPicPresentFlag = false );
+  virtual ~ReferencePictureList();
+
+  void    setRefPicIdentifier( int idx, int identifier, bool isLongterm, bool isInterLayerRefPic, int interLayerIdx );
+  int     getRefPicIdentifier(int idx) const;
+  bool    isRefPicLongterm(int idx) const;
+
+  void    setNumberOfShorttermPictures(int numberOfStrp);
+  int     getNumberOfShorttermPictures() const;
+
+  void    setNumberOfLongtermPictures(int numberOfLtrp);
+  int     getNumberOfLongtermPictures() const;
+
+  void    setLtrpInSliceHeaderFlag(bool flag) { m_ltrp_in_slice_header_flag = flag; }
+  bool    getLtrpInSliceHeaderFlag() const { return m_ltrp_in_slice_header_flag; }
+
+  void    setNumberOfInterLayerPictures( const int numberOfIlrp ) { m_numberOfInterLayerPictures = numberOfIlrp; }
+  int     getNumberOfInterLayerPictures() const { return m_numberOfInterLayerPictures; }
+
+  int     getNumRefEntries() const { return m_numberOfShorttermPictures + m_numberOfLongtermPictures + m_numberOfInterLayerPictures; }
+
+  void    setPOC(int idx, int POC);
+  int     getPOC(int idx) const;
+
+  void    setNumberOfActivePictures(int numberOfLtrp);
+  int     getNumberOfActivePictures() const;
+
+  int     getDeltaPocMSBCycleLT(int i) const { return m_deltaPOCMSBCycleLT[i]; }
+  void    setDeltaPocMSBCycleLT(int i, int x) { m_deltaPOCMSBCycleLT[i] = x; }
+  bool    getDeltaPocMSBPresentFlag(int i) const { return m_deltaPocMSBPresentFlag[i]; }
+  void    setDeltaPocMSBPresentFlag(int i, bool x) { m_deltaPocMSBPresentFlag[i] = x; }
+
+  void    printRefPicInfo() const;
+
+  bool      getInterLayerPresentFlag()                   const { return m_interLayerPresentFlag; }
+  void      setInterLayerPresentFlag( bool b )                 { m_interLayerPresentFlag = b; }
+  bool      isInterLayerRefPic( int idx )                const { return m_isInterLayerRefPic[idx]; }
+  int       getInterLayerRefPicIdx( int idx )            const { return m_interLayerRefPicIdx[idx]; }
+  void      setInterLayerRefPicIdx( int idx, int layerIdc )    { m_interLayerRefPicIdx[idx] = layerIdc; }
 };
 
-/// Reference Picture Set set class
-class RPSList
+/// Reference Picture List set class
+class RPLList
 {
 private:
-  std::vector<ReferencePictureSet> m_referencePictureSets;
+  std::vector<ReferencePictureList> m_referencePictureLists;
 
 public:
-                                 RPSList()                                            { }
-  virtual                        ~RPSList()                                           { }
+  RPLList() { }
+  virtual                        ~RPLList() { }
 
-  void                           create  (int numberOfEntries)                            { m_referencePictureSets.resize(numberOfEntries);         }
-  void                           destroy ()                                               { }
+  void                           create(int numberOfEntries) { m_referencePictureLists.resize(numberOfEntries); }
+  void                           destroy() { }
 
 
-  ReferencePictureSet*       getReferencePictureSet(int referencePictureSetNum)       { return &m_referencePictureSets[referencePictureSetNum]; }
-  const ReferencePictureSet* getReferencePictureSet(int referencePictureSetNum) const { return &m_referencePictureSets[referencePictureSetNum]; }
+  ReferencePictureList*          getReferencePictureList(int referencePictureListIdx) { return &m_referencePictureLists[referencePictureListIdx]; }
+  const ReferencePictureList*    getReferencePictureList(int referencePictureListIdx) const { return &m_referencePictureLists[referencePictureListIdx]; }
 
-  int                            getNumberOfReferencePictureSets() const                  { return int(m_referencePictureSets.size());              }
+  int                            getNumberOfReferencePictureLists() const { return int(m_referencePictureLists.size()); }
 };
 
-#if HEVC_USE_SCALING_LISTS
 /// SCALING_LIST class
 class ScalingList
 {
 public:
              ScalingList();
   virtual    ~ScalingList()                                                 { }
-  int*       getScalingListAddress(uint32_t sizeId, uint32_t listId)                    { return &(m_scalingListCoef[sizeId][listId][0]);            } //!< get matrix coefficient
-  const int* getScalingListAddress(uint32_t sizeId, uint32_t listId) const              { return &(m_scalingListCoef[sizeId][listId][0]);            } //!< get matrix coefficient
-  void       checkPredMode(uint32_t sizeId, uint32_t listId);
-
-  void       setRefMatrixId(uint32_t sizeId, uint32_t listId, uint32_t u)                   { m_refMatrixId[sizeId][listId] = u;                         } //!< set reference matrix ID
-  uint32_t       getRefMatrixId(uint32_t sizeId, uint32_t listId) const                     { return m_refMatrixId[sizeId][listId];                      } //!< get reference matrix ID
+  bool       getDisableScalingMatrixForLfnstBlks() const     { return m_disableScalingMatrixForLfnstBlks;}
+  void       setDisableScalingMatrixForLfnstBlks(bool flag)  { m_disableScalingMatrixForLfnstBlks = flag;}
+  int*       getScalingListAddress(uint32_t scalingListId)                    { return &(m_scalingListCoef[scalingListId][0]);            } //!< get matrix coefficient
+  const int* getScalingListAddress(uint32_t scalingListId) const              { return &(m_scalingListCoef[scalingListId][0]);            } //!< get matrix coefficient
+  void       checkPredMode(uint32_t scalingListId);
+
+  void       setRefMatrixId(uint32_t scalingListId, uint32_t u)               { m_refMatrixId[scalingListId] = u;                         } //!< set reference matrix ID
+  uint32_t       getRefMatrixId(uint32_t scalingListId) const                 { return m_refMatrixId[scalingListId];                      } //!< get reference matrix ID
+  
+  static const int* getScalingListDefaultAddress(uint32_t scalinListId);                                                                           //!< get default matrix coefficient
+  void       processDefaultMatrix(uint32_t scalinListId);
 
-  const int* getScalingListDefaultAddress(uint32_t sizeId, uint32_t listId);                                                                           //!< get default matrix coefficient
-  void       processDefaultMatrix(uint32_t sizeId, uint32_t listId);
+  void       setScalingListDC(uint32_t scalinListId, uint32_t u)              { m_scalingListDC[scalinListId] = u;                        } //!< set DC value
+  int        getScalingListDC(uint32_t scalinListId) const                    { return m_scalingListDC[scalinListId];                     } //!< get DC value
 
-  void       setScalingListDC(uint32_t sizeId, uint32_t listId, uint32_t u)                 { m_scalingListDC[sizeId][listId] = u;                       } //!< set DC value
-  int        getScalingListDC(uint32_t sizeId, uint32_t listId) const                   { return m_scalingListDC[sizeId][listId];                    } //!< get DC value
+  void       setScalingListCopyModeFlag(uint32_t scalinListId, bool bIsCopy)  { m_scalingListPredModeFlagIsCopy[scalinListId] = bIsCopy;  }
+  bool       getScalingListCopyModeFlag(uint32_t scalinListId) const          { return m_scalingListPredModeFlagIsCopy[scalinListId];     } //getScalingListPredModeFlag
+  void       processRefMatrix(uint32_t scalingListId, uint32_t refListId);
 
-  void       setScalingListPredModeFlag(uint32_t sizeId, uint32_t listId, bool bIsDPCM) { m_scalingListPredModeFlagIsDPCM[sizeId][listId] = bIsDPCM; }
-  bool       getScalingListPredModeFlag(uint32_t sizeId, uint32_t listId) const         { return m_scalingListPredModeFlagIsDPCM[sizeId][listId];    }
+  int        lengthUvlc(int uiCode);
+  int        lengthSvlc(int uiCode);
+  void       CheckBestPredScalingList(int scalingListId, int predListIdx, int& BitsCount);
+  void       codePredScalingList(int* scalingList, const int* scalingListPred, int scalingListDC, int scalingListPredDC, int scalinListId, int& bitsCost);
+  void       codeScalingList(int* scalingList, int scalingListDC, int scalinListId, int& bitsCost);
+  void       setScalingListPreditorModeFlag(uint32_t scalingListId, bool bIsPred) { m_scalingListPreditorModeFlag[scalingListId] = bIsPred; }
+  bool       getScalingListPreditorModeFlag(uint32_t scalingListId) const { return m_scalingListPreditorModeFlag[scalingListId]; }
 
   void       checkDcOfMatrix();
-  void       processRefMatrix(uint32_t sizeId, uint32_t listId , uint32_t refListId );
   bool       xParseScalingList(const std::string &fileName);
   void       setDefaultScalingList();
-  bool       checkDefaultScalingList();
+  bool       isNotDefaultScalingList();
+
+  bool operator==( const ScalingList& other )
+  {
+    if (memcmp(m_scalingListPredModeFlagIsCopy, other.m_scalingListPredModeFlagIsCopy, sizeof(m_scalingListPredModeFlagIsCopy)))
+    {
+      return false;
+    }
+    if( memcmp( m_scalingListDC, other.m_scalingListDC, sizeof( m_scalingListDC ) ) )
+    {
+      return false;
+    }
+    if( memcmp( m_refMatrixId, other.m_refMatrixId, sizeof( m_refMatrixId ) ) )
+    {
+      return false;
+    }
+    if( memcmp( m_scalingListCoef, other.m_scalingListCoef, sizeof( m_scalingListCoef ) ) )
+    {
+      return false;
+    }
+
+    return true;
+  }
+
+  bool operator!=( const ScalingList& other )
+  {
+    return !( *this == other );
+  }
 
 private:
-  void       outputScalingLists(std::ostream &os) const;
-  bool             m_scalingListPredModeFlagIsDPCM [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< reference list index
-  int              m_scalingListDC                 [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< the DC value of the matrix coefficient for 16x16
-  uint32_t             m_refMatrixId                   [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< RefMatrixID
-  std::vector<int> m_scalingListCoef               [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; //!< quantization matrix
+  void             outputScalingLists(std::ostream &os) const;
+  bool             m_disableScalingMatrixForLfnstBlks;
+  bool             m_scalingListPredModeFlagIsCopy [30]; //!< reference list index
+  int              m_scalingListDC                 [30]; //!< the DC value of the matrix coefficient for 16x16
+  uint32_t         m_refMatrixId                   [30]; //!< RefMatrixID
+  bool             m_scalingListPreditorModeFlag   [30]; //!< reference list index
+  std::vector<int> m_scalingListCoef               [30]; //!< quantization matrix
 };
-#endif
 
-#if JVET_M0101_HLS
 class ConstraintInfo
 {
   bool              m_progressiveSourceFlag;
@@ -205,25 +239,39 @@ class ConstraintInfo
   bool              m_lowerBitRateConstraintFlag;
 
   bool              m_noQtbttDualTreeIntraConstraintFlag;
+  bool              m_noPartitionConstraintsOverrideConstraintFlag;
   bool              m_noSaoConstraintFlag;
   bool              m_noAlfConstraintFlag;
-  bool              m_noPcmConstraintFlag;
   bool              m_noRefWraparoundConstraintFlag;
   bool              m_noTemporalMvpConstraintFlag;
   bool              m_noSbtmvpConstraintFlag;
   bool              m_noAmvrConstraintFlag;
   bool              m_noBdofConstraintFlag;
+  bool              m_noDmvrConstraintFlag;
   bool              m_noCclmConstraintFlag;
   bool              m_noMtsConstraintFlag;
+  bool              m_noSbtConstraintFlag;
   bool              m_noAffineMotionConstraintFlag;
-  bool              m_noGbiConstraintFlag;
-  bool              m_noMhIntraConstraintFlag;
+  bool              m_noBcwConstraintFlag;
+  bool              m_noIbcConstraintFlag;
+  bool              m_noCiipConstraintFlag;
+  bool              m_noFPelMmvdConstraintFlag;
   bool              m_noTriangleConstraintFlag;
   bool              m_noLadfConstraintFlag;
-  bool              m_noCurrPicRefConstraintFlag;
+  bool              m_noTransformSkipConstraintFlag;
+  bool              m_noBDPCMConstraintFlag;
+  bool              m_noJointCbCrConstraintFlag;
   bool              m_noQpDeltaConstraintFlag;
   bool              m_noDepQuantConstraintFlag;
   bool              m_noSignDataHidingConstraintFlag;
+  bool              m_noTrailConstraintFlag;
+  bool              m_noStsaConstraintFlag;
+  bool              m_noRaslConstraintFlag;
+  bool              m_noRadlConstraintFlag;
+  bool              m_noIdrConstraintFlag;
+  bool              m_noCraConstraintFlag;
+  bool              m_noGdrConstraintFlag;
+  bool              m_noApsConstraintFlag;
 
 public:
   ConstraintInfo()
@@ -235,27 +283,41 @@ public:
     , m_maxBitDepthConstraintIdc  (  0)
     , m_maxChromaFormatConstraintIdc(CHROMA_420)
     , m_noQtbttDualTreeIntraConstraintFlag(false)
+    , m_noPartitionConstraintsOverrideConstraintFlag(false)
     , m_noSaoConstraintFlag      (false)
     , m_noAlfConstraintFlag      (false)
-    , m_noPcmConstraintFlag      (false)
     , m_noRefWraparoundConstraintFlag(false)
     , m_noTemporalMvpConstraintFlag(false)
     , m_noSbtmvpConstraintFlag   (false)
     , m_noAmvrConstraintFlag     (false)
     , m_noBdofConstraintFlag     (false)
+    , m_noDmvrConstraintFlag     (false)
     , m_noCclmConstraintFlag     (false)
     , m_noMtsConstraintFlag      (false)
+    , m_noSbtConstraintFlag      (false)
     , m_noAffineMotionConstraintFlag(false)
-    , m_noGbiConstraintFlag      (false)
-    , m_noMhIntraConstraintFlag  (false)
+    , m_noBcwConstraintFlag      (false)
+    , m_noIbcConstraintFlag      (false)
+    , m_noCiipConstraintFlag  (false)
+    , m_noFPelMmvdConstraintFlag (false)
     , m_noTriangleConstraintFlag (false)
     , m_noLadfConstraintFlag     (false)
-    , m_noCurrPicRefConstraintFlag(false)
+    , m_noTransformSkipConstraintFlag(false)
+    , m_noBDPCMConstraintFlag    (false)
+    , m_noJointCbCrConstraintFlag (false)
     , m_noQpDeltaConstraintFlag  (false)
     , m_noDepQuantConstraintFlag (false)
     , m_noSignDataHidingConstraintFlag(false)
+    , m_noTrailConstraintFlag (false)
+    , m_noStsaConstraintFlag (false)
+    , m_noRaslConstraintFlag (false)
+    , m_noRadlConstraintFlag (false)
+    , m_noIdrConstraintFlag (false)
+    , m_noCraConstraintFlag (false)
+    , m_noGdrConstraintFlag (false)
+    , m_noApsConstraintFlag (false)
   {}
-  
+
   bool          getProgressiveSourceFlag() const { return m_progressiveSourceFlag; }
   void          setProgressiveSourceFlag(bool b) { m_progressiveSourceFlag = b; }
 
@@ -285,12 +347,14 @@ public:
 
   bool          getNoQtbttDualTreeIntraConstraintFlag() const { return m_noQtbttDualTreeIntraConstraintFlag; }
   void          setNoQtbttDualTreeIntraConstraintFlag(bool bVal) { m_noQtbttDualTreeIntraConstraintFlag = bVal; }
+  bool          getNoPartitionConstraintsOverrideConstraintFlag() const { return m_noPartitionConstraintsOverrideConstraintFlag; }
+  void          setNoPartitionConstraintsOverrideConstraintFlag(bool bVal) { m_noPartitionConstraintsOverrideConstraintFlag = bVal; }
   bool          getNoSaoConstraintFlag() const { return m_noSaoConstraintFlag; }
   void          setNoSaoConstraintFlag(bool bVal) { m_noSaoConstraintFlag = bVal; }
   bool          getNoAlfConstraintFlag() const { return m_noAlfConstraintFlag; }
   void          setNoAlfConstraintFlag(bool bVal) { m_noAlfConstraintFlag = bVal; }
-  bool          getNoPcmConstraintFlag() const { return m_noPcmConstraintFlag; }
-  void          setNoPcmConstraintFlag(bool bVal) { m_noPcmConstraintFlag = bVal; }
+  bool          getNoJointCbCrConstraintFlag() const { return m_noJointCbCrConstraintFlag; }
+  void          setNoJointCbCrConstraintFlag(bool bVal) { m_noJointCbCrConstraintFlag = bVal; }
   bool          getNoRefWraparoundConstraintFlag() const { return m_noRefWraparoundConstraintFlag; }
   void          setNoRefWraparoundConstraintFlag(bool bVal) { m_noRefWraparoundConstraintFlag = bVal; }
   bool          getNoTemporalMvpConstraintFlag() const { return m_noTemporalMvpConstraintFlag; }
@@ -301,65 +365,70 @@ public:
   void          setNoAmvrConstraintFlag(bool bVal) { m_noAmvrConstraintFlag = bVal; }
   bool          getNoBdofConstraintFlag() const { return m_noBdofConstraintFlag; }
   void          setNoBdofConstraintFlag(bool bVal) { m_noBdofConstraintFlag = bVal; }
+  bool          getNoDmvrConstraintFlag() const { return m_noDmvrConstraintFlag; }
+  void          setNoDmvrConstraintFlag(bool bVal) { m_noDmvrConstraintFlag = bVal; }
   bool          getNoCclmConstraintFlag() const { return m_noCclmConstraintFlag; }
   void          setNoCclmConstraintFlag(bool bVal) { m_noCclmConstraintFlag = bVal; }
   bool          getNoMtsConstraintFlag() const { return m_noMtsConstraintFlag; }
   void          setNoMtsConstraintFlag(bool bVal) { m_noMtsConstraintFlag = bVal; }
+  bool          getNoSbtConstraintFlag() const { return m_noSbtConstraintFlag; }
+  void          setNoSbtConstraintFlag(bool bVal) { m_noSbtConstraintFlag = bVal; }
   bool          getNoAffineMotionConstraintFlag() const { return m_noAffineMotionConstraintFlag; }
   void          setNoAffineMotionConstraintFlag(bool bVal) { m_noAffineMotionConstraintFlag = bVal; }
-  bool          getNoGbiConstraintFlag() const { return m_noGbiConstraintFlag; }
-  void          setNoGbiConstraintFlag(bool bVal) { m_noGbiConstraintFlag = bVal; }
-  bool          getNoMhIntraConstraintFlag() const { return m_noMhIntraConstraintFlag; }
-  void          setNoMhIntraConstraintFlag(bool bVal) { m_noMhIntraConstraintFlag = bVal; }
+  bool          getNoBcwConstraintFlag() const { return m_noBcwConstraintFlag; }
+  void          setNoBcwConstraintFlag(bool bVal) { m_noBcwConstraintFlag = bVal; }
+  bool          getNoIbcConstraintFlag() const { return m_noIbcConstraintFlag; }
+  void          setNoIbcConstraintFlag(bool bVal) { m_noIbcConstraintFlag = bVal; }
+  bool          getNoCiipConstraintFlag() const { return m_noCiipConstraintFlag; }
+  void          setNoCiipConstraintFlag(bool bVal) { m_noCiipConstraintFlag = bVal; }
+  bool          getNoFPelMmvdConstraintFlag() const { return m_noFPelMmvdConstraintFlag; }
+  void          setNoFPelMmvdConstraintFlag(bool bVal) { m_noFPelMmvdConstraintFlag = bVal; }
   bool          getNoTriangleConstraintFlag() const { return m_noTriangleConstraintFlag; }
   void          setNoTriangleConstraintFlag(bool bVal) { m_noTriangleConstraintFlag = bVal; }
   bool          getNoLadfConstraintFlag() const { return m_noLadfConstraintFlag; }
   void          setNoLadfConstraintFlag(bool bVal) { m_noLadfConstraintFlag = bVal; }
-  bool          getNoCurrPicRefConstraintFlag() const { return m_noCurrPicRefConstraintFlag; }
-  void          setNoCurrPicRefConstraintFlag(bool bVal) { m_noCurrPicRefConstraintFlag = bVal; }
+  bool          getNoTransformSkipConstraintFlag() const { return m_noTransformSkipConstraintFlag; }
+  void          setNoTransformSkipConstraintFlag(bool bVal) { m_noTransformSkipConstraintFlag = bVal; }
+  bool          getNoBDPCMConstraintFlag() const { return m_noBDPCMConstraintFlag; }
+  void          setNoBDPCMConstraintFlag(bool bVal) { m_noBDPCMConstraintFlag = bVal; }
   bool          getNoQpDeltaConstraintFlag() const { return m_noQpDeltaConstraintFlag; }
   void          setNoQpDeltaConstraintFlag(bool bVal) { m_noQpDeltaConstraintFlag = bVal; }
   bool          getNoDepQuantConstraintFlag() const { return m_noDepQuantConstraintFlag; }
   void          setNoDepQuantConstraintFlag(bool bVal) { m_noDepQuantConstraintFlag = bVal; }
   bool          getNoSignDataHidingConstraintFlag() const { return m_noSignDataHidingConstraintFlag; }
   void          setNoSignDataHidingConstraintFlag(bool bVal) { m_noSignDataHidingConstraintFlag = bVal; }
+  bool          getNoTrailConstraintFlag() const { return m_noTrailConstraintFlag; }
+  void          setNoTrailConstraintFlag(bool bVal) { m_noTrailConstraintFlag = bVal; }
+  bool          getNoStsaConstraintFlag() const { return m_noStsaConstraintFlag; }
+  void          setNoStsaConstraintFlag(bool bVal) { m_noStsaConstraintFlag = bVal; }
+  bool          getNoRaslConstraintFlag() const { return m_noRaslConstraintFlag; }
+  void          setNoRaslConstraintFlag(bool bVal) { m_noRaslConstraintFlag = bVal; }
+  bool          getNoRadlConstraintFlag() const { return m_noRadlConstraintFlag; }
+  void          setNoRadlConstraintFlag(bool bVal) { m_noRadlConstraintFlag = bVal; }
+  bool          getNoIdrConstraintFlag() const { return m_noIdrConstraintFlag; }
+  void          setNoIdrConstraintFlag(bool bVal) { m_noIdrConstraintFlag = bVal; }
+  bool          getNoCraConstraintFlag() const { return m_noCraConstraintFlag; }
+  void          setNoCraConstraintFlag(bool bVal) { m_noCraConstraintFlag = bVal; }
+  bool          getNoGdrConstraintFlag() const { return m_noGdrConstraintFlag; }
+  void          setNoGdrConstraintFlag(bool bVal) { m_noGdrConstraintFlag = bVal; }
+  bool          getNoApsConstraintFlag() const { return m_noApsConstraintFlag; }
+  void          setNoApsConstraintFlag(bool bVal) { m_noApsConstraintFlag = bVal; }
 };
-#endif
 
 class ProfileTierLevel
 {
-#if !JVET_M0101_HLS
-  int               m_profileSpace;
-#endif
   Level::Tier       m_tierFlag;
   Profile::Name     m_profileIdc;
-#if !JVET_M0101_HLS
-  bool              m_profileCompatibilityFlag[32];
-#endif
+  uint8_t           m_numSubProfile;
+  std::vector<uint32_t>          m_subProfileIdc;
   Level::Name       m_levelIdc;
 
-#if !JVET_M0101_HLS
-  bool              m_progressiveSourceFlag;
-  bool              m_interlacedSourceFlag;
-  bool              m_nonPackedConstraintFlag;
-  bool              m_frameOnlyConstraintFlag;
-  uint32_t              m_bitDepthConstraintValue;
-  ChromaFormat      m_chromaFormatConstraintValue;
-  bool              m_intraConstraintFlag;
-  bool              m_onePictureOnlyConstraintFlag;
-  bool              m_lowerBitRateConstraintFlag;
-#else
   ConstraintInfo    m_constraintInfo;
   bool              m_subLayerLevelPresentFlag[MAX_TLAYER - 1];
-  Level::Name       m_subLayerLevelIdc[MAX_TLAYER - 1];
-#endif
+  Level::Name       m_subLayerLevelIdc[MAX_TLAYER];
 
 public:
                 ProfileTierLevel();
-#if !JVET_M0101_HLS
-  int           getProfileSpace() const                     { return m_profileSpace;                }
-  void          setProfileSpace(int x)                      { m_profileSpace = x;                   }
-#endif
 
   Level::Tier   getTierFlag() const                         { return m_tierFlag;                    }
   void          setTierFlag(Level::Tier x)                  { m_tierFlag = x;                       }
@@ -367,44 +436,16 @@ public:
   Profile::Name getProfileIdc() const                       { return m_profileIdc;                  }
   void          setProfileIdc(Profile::Name x)              { m_profileIdc = x;                     }
 
-#if !JVET_M0101_HLS
-  bool          getProfileCompatibilityFlag(int i) const    { return m_profileCompatibilityFlag[i]; }
-  void          setProfileCompatibilityFlag(int i, bool x)  { m_profileCompatibilityFlag[i] = x;    }
-#endif
+  uint32_t      getSubProfileIdc(int i) const               { return m_subProfileIdc[i]; }
+  void          setSubProfileIdc(int i, uint32_t x)         { m_subProfileIdc[i] = x; }
+
+  uint8_t       getNumSubProfile() const                    { return m_numSubProfile; }
+  void          setNumSubProfile(uint8_t x)                 { m_numSubProfile = x; m_subProfileIdc.resize(m_numSubProfile); }
 
   Level::Name   getLevelIdc() const                         { return m_levelIdc;                    }
   void          setLevelIdc(Level::Name x)                  { m_levelIdc = x;                       }
 
-#if !JVET_M0101_HLS
-  bool          getProgressiveSourceFlag() const            { return m_progressiveSourceFlag;       }
-  void          setProgressiveSourceFlag(bool b)            { m_progressiveSourceFlag = b;          }
-
-  bool          getInterlacedSourceFlag() const             { return m_interlacedSourceFlag;        }
-  void          setInterlacedSourceFlag(bool b)             { m_interlacedSourceFlag = b;           }
-
-  bool          getNonPackedConstraintFlag() const          { return m_nonPackedConstraintFlag;     }
-  void          setNonPackedConstraintFlag(bool b)          { m_nonPackedConstraintFlag = b;        }
-
-  bool          getFrameOnlyConstraintFlag() const          { return m_frameOnlyConstraintFlag;     }
-  void          setFrameOnlyConstraintFlag(bool b)          { m_frameOnlyConstraintFlag = b;        }
-
-  uint32_t          getBitDepthConstraint() const               { return m_bitDepthConstraintValue;     }
-  void          setBitDepthConstraint(uint32_t bitDepth)        { m_bitDepthConstraintValue=bitDepth;   }
 
-  ChromaFormat  getChromaFormatConstraint() const           { return m_chromaFormatConstraintValue; }
-  void          setChromaFormatConstraint(ChromaFormat fmt) { m_chromaFormatConstraintValue=fmt;    }
-
-  bool          getIntraConstraintFlag() const              { return m_intraConstraintFlag;         }
-  void          setIntraConstraintFlag(bool b)              { m_intraConstraintFlag = b;            }
-
-  bool          getOnePictureOnlyConstraintFlag() const     { return m_onePictureOnlyConstraintFlag;}
-  void          setOnePictureOnlyConstraintFlag(bool b)     { m_onePictureOnlyConstraintFlag = b;   }
-
-  bool          getLowerBitRateConstraintFlag() const       { return m_lowerBitRateConstraintFlag;  }
-  void          setLowerBitRateConstraintFlag(bool b)       { m_lowerBitRateConstraintFlag = b;     }
-#endif
-
-#if JVET_M0101_HLS
   ConstraintInfo*         getConstraintInfo()              { return &m_constraintInfo; }
   const ConstraintInfo*   getConstraintInfo() const        { return &m_constraintInfo; }
 
@@ -413,47 +454,10 @@ public:
 
   Level::Name             getSubLayerLevelIdc(int i) const             { return m_subLayerLevelIdc[i];   }
   void                    setSubLayerLevelIdc(int i, Level::Name x)    { m_subLayerLevelIdc[i] = x;      }
-#endif
 
 };
 
 
-#if !JVET_M0101_HLS
-class PTL
-{
-  ProfileTierLevel m_generalPTL;
-  ProfileTierLevel m_subLayerPTL    [MAX_TLAYER-1];      // max. value of max_sub_layers_minus1 is MAX_TLAYER-1 (= 6)
-  bool m_subLayerProfilePresentFlag [MAX_TLAYER-1];
-  bool m_subLayerLevelPresentFlag   [MAX_TLAYER-1];
-
-public:
-                          PTL();
-  bool                    getSubLayerProfilePresentFlag(int i) const   { return m_subLayerProfilePresentFlag[i]; }
-  void                    setSubLayerProfilePresentFlag(int i, bool x) { m_subLayerProfilePresentFlag[i] = x;    }
-
-  bool                    getSubLayerLevelPresentFlag(int i) const     { return m_subLayerLevelPresentFlag[i];   }
-  void                    setSubLayerLevelPresentFlag(int i, bool x)   { m_subLayerLevelPresentFlag[i] = x;      }
-
-  ProfileTierLevel*       getGeneralPTL()                              { return &m_generalPTL;                   }
-  const ProfileTierLevel* getGeneralPTL() const                        { return &m_generalPTL;                   }
-  ProfileTierLevel*       getSubLayerPTL(int i)                        { return &m_subLayerPTL[i];               }
-  const ProfileTierLevel* getSubLayerPTL(int i) const                  { return &m_subLayerPTL[i];               }
-};
-#endif
-
-struct HrdSubLayerInfo
-{
-  bool fixedPicRateFlag;
-  bool fixedPicRateWithinCvsFlag;
-  uint32_t picDurationInTcMinus1;
-  bool lowDelayHrdFlag;
-  uint32_t cpbCntMinus1;
-  uint32_t bitRateValueMinus1[MAX_CPB_CNT][2];
-  uint32_t cpbSizeValue      [MAX_CPB_CNT][2];
-  uint32_t ducpbSizeValue    [MAX_CPB_CNT][2];
-  bool cbrFlag           [MAX_CPB_CNT][2];
-  uint32_t duBitRateValue    [MAX_CPB_CNT][2];
-};
 
 class SliceReshapeInfo
 {
@@ -465,19 +469,65 @@ public:
   uint32_t  reshaperModelMaxBinIdx;
   int       reshaperModelBinCWDelta[PIC_CODE_CW_BINS];
   int       maxNbitsNeededDeltaCW;
+  int       chrResScalingOffset;
   void      setUseSliceReshaper(bool b)                                { sliceReshaperEnableFlag = b;            }
   bool      getUseSliceReshaper() const                                { return sliceReshaperEnableFlag;         }
   void      setSliceReshapeModelPresentFlag(bool b)                    { sliceReshaperModelPresentFlag = b;      }
   bool      getSliceReshapeModelPresentFlag() const                    { return   sliceReshaperModelPresentFlag; }
   void      setSliceReshapeChromaAdj(unsigned adj)                     { enableChromaAdj = adj;                  }
   unsigned  getSliceReshapeChromaAdj() const                           { return enableChromaAdj;                 }
+
+  bool operator==( const SliceReshapeInfo& other )
+  {
+    if( sliceReshaperEnableFlag != other.sliceReshaperEnableFlag )
+    {
+      return false;
+    }
+    if( sliceReshaperModelPresentFlag != other.sliceReshaperModelPresentFlag )
+    {
+      return false;
+    }
+    if( enableChromaAdj != other.enableChromaAdj )
+    {
+      return false;
+    }
+    if( reshaperModelMinBinIdx != other.reshaperModelMinBinIdx )
+    {
+      return false;
+    }
+    if( reshaperModelMaxBinIdx != other.reshaperModelMaxBinIdx )
+    {
+      return false;
+    }
+    if( maxNbitsNeededDeltaCW != other.maxNbitsNeededDeltaCW )
+    {
+      return false;
+    }
+    if (chrResScalingOffset != other.chrResScalingOffset)
+    {
+      return false;
+    }
+    if( memcmp( reshaperModelBinCWDelta, other.reshaperModelBinCWDelta, sizeof( reshaperModelBinCWDelta ) ) )
+    {
+      return false;
+    }
+
+    return true;
+  }
+
+  bool operator!=( const SliceReshapeInfo& other )
+  {
+    return !( *this == other );
+  }
 };
 
 struct ReshapeCW
 {
   std::vector<uint32_t> binCW;
+  int       updateCtrl;
+  int       adpOption;
+  uint32_t  initialCW;
   int rspPicSize;
-  int rspIntraPeriod;
   int rspFps;
   int rspBaseQP;
   int rspTid;
@@ -485,235 +535,234 @@ struct ReshapeCW
   int rspFpsToIp;
 };
 
-class HRD
+struct ChromaQpAdj
 {
-private:
-  bool m_nalHrdParametersPresentFlag;
-  bool m_vclHrdParametersPresentFlag;
-  bool m_subPicCpbParamsPresentFlag;
-  uint32_t m_tickDivisorMinus2;
-  uint32_t m_duCpbRemovalDelayLengthMinus1;
-  bool m_subPicCpbParamsInPicTimingSEIFlag;
-  uint32_t m_dpbOutputDelayDuLengthMinus1;
-  uint32_t m_bitRateScale;
-  uint32_t m_cpbSizeScale;
-  uint32_t m_ducpbSizeScale;
-  uint32_t m_initialCpbRemovalDelayLengthMinus1;
-  uint32_t m_cpbRemovalDelayLengthMinus1;
-  uint32_t m_dpbOutputDelayLengthMinus1;
-  HrdSubLayerInfo m_HRD[MAX_TLAYER];
-
-public:
-  HRD()
-  :m_nalHrdParametersPresentFlag       (0)
-  ,m_vclHrdParametersPresentFlag       (0)
-  ,m_subPicCpbParamsPresentFlag        (false)
-  ,m_tickDivisorMinus2                 (0)
-  ,m_duCpbRemovalDelayLengthMinus1     (0)
-  ,m_subPicCpbParamsInPicTimingSEIFlag (false)
-  ,m_dpbOutputDelayDuLengthMinus1      (0)
-  ,m_bitRateScale                      (0)
-  ,m_cpbSizeScale                      (0)
-  ,m_initialCpbRemovalDelayLengthMinus1(23)
-  ,m_cpbRemovalDelayLengthMinus1       (23)
-  ,m_dpbOutputDelayLengthMinus1        (23)
-  {}
-
-  virtual ~HRD() {}
-
-  void    setNalHrdParametersPresentFlag( bool flag )                                { m_nalHrdParametersPresentFlag = flag;                      }
-  bool    getNalHrdParametersPresentFlag( ) const                                    { return m_nalHrdParametersPresentFlag;                      }
-
-  void    setVclHrdParametersPresentFlag( bool flag )                                { m_vclHrdParametersPresentFlag = flag;                      }
-  bool    getVclHrdParametersPresentFlag( ) const                                    { return m_vclHrdParametersPresentFlag;                      }
-
-  void    setSubPicCpbParamsPresentFlag( bool flag )                                 { m_subPicCpbParamsPresentFlag = flag;                       }
-  bool    getSubPicCpbParamsPresentFlag( ) const                                     { return m_subPicCpbParamsPresentFlag;                       }
-
-  void    setTickDivisorMinus2( uint32_t value )                                         { m_tickDivisorMinus2 = value;                               }
-  uint32_t    getTickDivisorMinus2( ) const                                              { return m_tickDivisorMinus2;                                }
-
-  void    setDuCpbRemovalDelayLengthMinus1( uint32_t value )                             { m_duCpbRemovalDelayLengthMinus1 = value;                   }
-  uint32_t    getDuCpbRemovalDelayLengthMinus1( ) const                                  { return m_duCpbRemovalDelayLengthMinus1;                    }
-
-  void    setSubPicCpbParamsInPicTimingSEIFlag( bool flag)                           { m_subPicCpbParamsInPicTimingSEIFlag = flag;                }
-  bool    getSubPicCpbParamsInPicTimingSEIFlag( ) const                              { return m_subPicCpbParamsInPicTimingSEIFlag;                }
-
-  void    setDpbOutputDelayDuLengthMinus1(uint32_t value )                               { m_dpbOutputDelayDuLengthMinus1 = value;                    }
-  uint32_t    getDpbOutputDelayDuLengthMinus1( ) const                                   { return m_dpbOutputDelayDuLengthMinus1;                     }
-
-  void    setBitRateScale( uint32_t value )                                              { m_bitRateScale = value;                                    }
-  uint32_t    getBitRateScale( ) const                                                   { return m_bitRateScale;                                     }
-
-  void    setCpbSizeScale( uint32_t value )                                              { m_cpbSizeScale = value;                                    }
-  uint32_t    getCpbSizeScale( ) const                                                   { return m_cpbSizeScale;                                     }
-  void    setDuCpbSizeScale( uint32_t value )                                            { m_ducpbSizeScale = value;                                  }
-  uint32_t    getDuCpbSizeScale( ) const                                                 { return m_ducpbSizeScale;                                   }
-
-  void    setInitialCpbRemovalDelayLengthMinus1( uint32_t value )                        { m_initialCpbRemovalDelayLengthMinus1 = value;              }
-  uint32_t    getInitialCpbRemovalDelayLengthMinus1( ) const                             { return m_initialCpbRemovalDelayLengthMinus1;               }
-
-  void    setCpbRemovalDelayLengthMinus1( uint32_t value )                               { m_cpbRemovalDelayLengthMinus1 = value;                     }
-  uint32_t    getCpbRemovalDelayLengthMinus1( ) const                                    { return m_cpbRemovalDelayLengthMinus1;                      }
-
-  void    setDpbOutputDelayLengthMinus1( uint32_t value )                                { m_dpbOutputDelayLengthMinus1 = value;                      }
-  uint32_t    getDpbOutputDelayLengthMinus1( ) const                                     { return m_dpbOutputDelayLengthMinus1;                       }
-
-  void    setFixedPicRateFlag( int layer, bool flag )                                { m_HRD[layer].fixedPicRateFlag = flag;                      }
-  bool    getFixedPicRateFlag( int layer ) const                                     { return m_HRD[layer].fixedPicRateFlag;                      }
-
-  void    setFixedPicRateWithinCvsFlag( int layer, bool flag )                       { m_HRD[layer].fixedPicRateWithinCvsFlag = flag;             }
-  bool    getFixedPicRateWithinCvsFlag( int layer ) const                            { return m_HRD[layer].fixedPicRateWithinCvsFlag;             }
-
-  void    setPicDurationInTcMinus1( int layer, uint32_t value )                          { m_HRD[layer].picDurationInTcMinus1 = value;                }
-  uint32_t    getPicDurationInTcMinus1( int layer ) const                                { return m_HRD[layer].picDurationInTcMinus1;                 }
+  union
+  {
+    struct {
+      int CbOffset;
+      int CrOffset;
+      int JointCbCrOffset;
+    } comp;
+    int offset[3];
+  } u;
+};
+struct ChromaQpMappingTableParams {
+  int               m_qpBdOffset;
+  bool              m_sameCQPTableForAllChromaFlag;
+  int               m_numQpTables;
+  int               m_qpTableStartMinus26[MAX_NUM_CQP_MAPPING_TABLES];
+  int               m_numPtsInCQPTableMinus1[MAX_NUM_CQP_MAPPING_TABLES];
+  std::vector<int>  m_deltaQpInValMinus1[MAX_NUM_CQP_MAPPING_TABLES];
+  std::vector<int>  m_deltaQpOutVal[MAX_NUM_CQP_MAPPING_TABLES];
+
+  ChromaQpMappingTableParams()
+  {
+    m_qpBdOffset = 12;
+    m_sameCQPTableForAllChromaFlag = true;
+    m_numQpTables = 1;
+    m_numPtsInCQPTableMinus1[0] = 0;
+    m_qpTableStartMinus26[0] = 0;
+    m_deltaQpInValMinus1[0] = { 0 };
+    m_deltaQpOutVal[0] = { 0 };
+  }
 
-  void    setLowDelayHrdFlag( int layer, bool flag )                                 { m_HRD[layer].lowDelayHrdFlag = flag;                       }
-  bool    getLowDelayHrdFlag( int layer ) const                                      { return m_HRD[layer].lowDelayHrdFlag;                       }
+  void      setSameCQPTableForAllChromaFlag(bool b)                             { m_sameCQPTableForAllChromaFlag = b; }
+  bool      getSameCQPTableForAllChromaFlag()                             const { return m_sameCQPTableForAllChromaFlag; }
+  void      setNumQpTables(int n)                                     { m_numQpTables = n; }
+  int       getNumQpTables()                                     const { return m_numQpTables; }
+  void      setQpTableStartMinus26(int tableIdx, int n)                         { m_qpTableStartMinus26[tableIdx] = n; }
+  int       getQpTableStartMinus26(int tableIdx)                          const { return m_qpTableStartMinus26[tableIdx]; }
+  void      setNumPtsInCQPTableMinus1(int tableIdx, int n)                      { m_numPtsInCQPTableMinus1[tableIdx] = n; }
+  int       getNumPtsInCQPTableMinus1(int tableIdx)                       const { return m_numPtsInCQPTableMinus1[tableIdx]; }
+  void      setDeltaQpInValMinus1(int tableIdx, std::vector<int> &inVals)       { m_deltaQpInValMinus1[tableIdx] = inVals; }
+  void      setDeltaQpInValMinus1(int tableIdx, int idx, int n)                 { m_deltaQpInValMinus1[tableIdx][idx] = n; }
+  int       getDeltaQpInValMinus1(int tableIdx, int idx)                  const { return m_deltaQpInValMinus1[tableIdx][idx]; }
+  void      setDeltaQpOutVal(int tableIdx, std::vector<int> &outVals)           { m_deltaQpOutVal[tableIdx] = outVals; }
+  void      setDeltaQpOutVal(int tableIdx, int idx, int n)                      { m_deltaQpOutVal[tableIdx][idx] = n; }
+  int       getDeltaQpOutVal(int tableIdx, int idx)                       const { return m_deltaQpOutVal[tableIdx][idx]; }
+};
+struct ChromaQpMappingTable : ChromaQpMappingTableParams
+{
+  std::map<int, int> m_chromaQpMappingTables[MAX_NUM_CQP_MAPPING_TABLES];
 
-  void    setCpbCntMinus1( int layer, uint32_t value )                                   { m_HRD[layer].cpbCntMinus1 = value;                         }
-  uint32_t    getCpbCntMinus1( int layer ) const                                         { return m_HRD[layer].cpbCntMinus1;                          }
+  int       getMappedChromaQpValue(ComponentID compID, const int qpVal)  const { return m_chromaQpMappingTables[m_sameCQPTableForAllChromaFlag ? 0 : (int)compID - 1].at(qpVal); }
+  void      derivedChromaQPMappingTables();
+  void      setParams(const ChromaQpMappingTableParams &params, const int qpBdOffset);
+};
 
-  void    setBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value )   { m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl] = value; }
-  uint32_t    getBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const         { return m_HRD[layer].bitRateValueMinus1[cpbcnt][nalOrVcl];  }
+class SliceMap
+{
+private:
+  uint32_t               m_sliceID;                           //!< slice identifier (slice index for rectangular slices, slice address for raser-scan slices)
+  uint32_t               m_numTilesInSlice;                   //!< number of tiles in slice (raster-scan slices only)
+  uint32_t               m_numCtuInSlice;                     //!< number of CTUs in the slice
+  std::vector<uint32_t>  m_ctuAddrInSlice;                    //!< raster-scan addresses of all the CTUs in the slice
 
-  void    setCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value )   { m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl] = value;       }
-  uint32_t    getCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const         { return m_HRD[layer].cpbSizeValue[cpbcnt][nalOrVcl];        }
-  void    setDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl] = value;     }
-  uint32_t    getDuCpbSizeValueMinus1( int layer, int cpbcnt, int nalOrVcl ) const       { return m_HRD[layer].ducpbSizeValue[cpbcnt][nalOrVcl];      }
-  void    setDuBitRateValueMinus1( int layer, int cpbcnt, int nalOrVcl, uint32_t value ) { m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl] = value;     }
-  uint32_t    getDuBitRateValueMinus1(int layer, int cpbcnt, int nalOrVcl ) const        { return m_HRD[layer].duBitRateValue[cpbcnt][nalOrVcl];      }
-  void    setCbrFlag( int layer, int cpbcnt, int nalOrVcl, bool value )              { m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl] = value;            }
-  bool    getCbrFlag( int layer, int cpbcnt, int nalOrVcl ) const                    { return m_HRD[layer].cbrFlag[cpbcnt][nalOrVcl];             }
+public:
+  SliceMap();
+  virtual ~SliceMap();
+
+  void                   setSliceID( uint32_t u )             { m_sliceID = u;            }
+  uint32_t               getSliceID() const                   { return m_sliceID;         }
+  void                   setNumTilesInSlice( uint32_t u )     { m_numTilesInSlice = u;    }
+  uint32_t               getNumTilesInSlice() const           { return m_numTilesInSlice; }
+  void                   setNumCtuInSlice( uint32_t u )       { m_numCtuInSlice = u;      }
+  uint32_t               getNumCtuInSlice() const             { return m_numCtuInSlice;   }
+  std::vector<uint32_t>  getCtuAddrList( ) const              { return m_ctuAddrInSlice;  }
+  uint32_t               getCtuAddrInSlice( int idx ) const   { CHECK(idx >= m_ctuAddrInSlice.size(), "CTU index exceeds number of CTUs in slice."); return m_ctuAddrInSlice[idx]; }
+  void                   pushToCtuAddrInSlice( uint32_t u )   { m_ctuAddrInSlice.push_back(u); m_numCtuInSlice++;}
+
+  void  initSliceMap() 
+  {
+    m_sliceID = 0;
+    m_numTilesInSlice = 0;
+    m_numCtuInSlice = 0;
+    m_ctuAddrInSlice.clear();
+  }
 
-  bool    getCpbDpbDelaysPresentFlag( ) const                      { return getNalHrdParametersPresentFlag() || getVclHrdParametersPresentFlag(); }
+  void  addCtusToSlice( uint32_t startX, uint32_t stopX, uint32_t startY, uint32_t stopY, uint32_t picWidthInCtbsY ) 
+  {
+    CHECK( startX >= stopX || startY >= stopY, "Invalid slice definition");
+    for( uint32_t ctbY = startY; ctbY < stopY; ctbY++ ) 
+    {
+      for( uint32_t ctbX = startX; ctbX < stopX; ctbX++ ) 
+      {
+        m_ctuAddrInSlice.push_back( ctbY * picWidthInCtbsY + ctbX );
+        m_numCtuInSlice++;
+      }
+    }
+  }
 };
 
-class TimingInfo
+class RectSlice
 {
-  bool m_timingInfoPresentFlag;
-  uint32_t m_numUnitsInTick;
-  uint32_t m_timeScale;
-  bool m_pocProportionalToTimingFlag;
-  int  m_numTicksPocDiffOneMinus1;
+private:
+  uint32_t         m_tileIdx;                           //!< tile index corresponding to the first CTU in the slice
+  uint32_t         m_sliceWidthInTiles;                 //!< slice width in units of tiles
+  uint32_t         m_sliceHeightInTiles;                //!< slice height in units of tiles
+  uint32_t         m_numSlicesInTile;                   //!< number of slices in current tile for the special case of multiple slices inside a single tile
+  uint32_t         m_sliceHeightInCtu;                  //!< slice height in units of CTUs for the special case of multiple slices inside a single tile
+  
 public:
-  TimingInfo()
-  : m_timingInfoPresentFlag      (false)
-  , m_numUnitsInTick             (1001)
-  , m_timeScale                  (60000)
-  , m_pocProportionalToTimingFlag(false)
-  , m_numTicksPocDiffOneMinus1   (0)
-  {}
+  RectSlice();
+  virtual ~RectSlice();
+  
+  void             setSliceWidthInTiles( uint32_t u )   { m_sliceWidthInTiles = u;      }
+  uint32_t         getSliceWidthInTiles( ) const        { return  m_sliceWidthInTiles;  }
+  void             setSliceHeightInTiles( uint32_t u )  { m_sliceHeightInTiles = u;     }
+  uint32_t         getSliceHeightInTiles( ) const       { return  m_sliceHeightInTiles; }
+  void             setNumSlicesInTile( uint32_t u )     { m_numSlicesInTile = u;        }
+  uint32_t         getNumSlicesInTile( ) const          { return  m_numSlicesInTile;    }
+  void             setSliceHeightInCtu( uint32_t u )    { m_sliceHeightInCtu = u;       }
+  uint32_t         getSliceHeightInCtu( ) const         { return  m_sliceHeightInCtu;   }
+  void             setTileIdx( uint32_t u )             { m_tileIdx = u;                }
+  uint32_t         getTileIdx( ) const                  { return  m_tileIdx;            }
 
-  void setTimingInfoPresentFlag( bool flag )   { m_timingInfoPresentFlag = flag;       }
-  bool getTimingInfoPresentFlag( ) const       { return m_timingInfoPresentFlag;       }
+};
 
-  void setNumUnitsInTick( uint32_t value )         { m_numUnitsInTick = value;             }
-  uint32_t getNumUnitsInTick( ) const              { return m_numUnitsInTick;              }
+class DPS
+{
+private:
+  int m_decodingParameterSetId;
+  int m_maxSubLayersMinus1;
+  std::vector<ProfileTierLevel> m_profileTierLevel;
 
-  void setTimeScale( uint32_t value )              { m_timeScale = value;                  }
-  uint32_t getTimeScale( ) const                   { return m_timeScale;                   }
+public:
+  DPS()
+    : m_decodingParameterSetId(-1)
+    , m_maxSubLayersMinus1 (0)
+  {};
 
-  void setPocProportionalToTimingFlag(bool x)  { m_pocProportionalToTimingFlag = x;    }
-  bool getPocProportionalToTimingFlag( ) const { return m_pocProportionalToTimingFlag; }
+  virtual ~DPS() {};
 
-  void setNumTicksPocDiffOneMinus1(int x)      { m_numTicksPocDiffOneMinus1 = x;       }
-  int  getNumTicksPocDiffOneMinus1( ) const    { return m_numTicksPocDiffOneMinus1;    }
-};
+  int  getDecodingParameterSetId() const { return m_decodingParameterSetId; }
+  void setDecodingParameterSetId(int val) { m_decodingParameterSetId = val; }
+  int  getMaxSubLayersMinus1() const { return m_maxSubLayersMinus1; }
+  void setMaxSubLayersMinus1(int val) { m_maxSubLayersMinus1 = val; }
 
-struct ChromaQpAdj
-{
-  union
-  {
-    struct {
-      int CbOffset;
-      int CrOffset;
-    } comp;
-    int offset[2]; /* two chroma components */
-  } u;
+  size_t getNumPTLs() const { return m_profileTierLevel.size(); }
+  void  setProfileTierLevel(const std::vector<ProfileTierLevel> &val)   { m_profileTierLevel = val; }
+  const ProfileTierLevel& getProfileTierLevel(int idx) const            { return m_profileTierLevel[idx]; }
 };
 
-#if HEVC_VPS
+
+
 class VPS
 {
 private:
   int                   m_VPSId;
-  uint32_t                  m_uiMaxTLayers;
-  uint32_t                  m_uiMaxLayers;
-  bool                  m_bTemporalIdNestingFlag;
-
-  uint32_t                  m_numReorderPics[MAX_TLAYER];
-  uint32_t                  m_uiMaxDecPicBuffering[MAX_TLAYER];
-  uint32_t                  m_uiMaxLatencyIncrease[MAX_TLAYER]; // Really max latency increase plus 1 (value 0 expresses no limit)
-
-  uint32_t                  m_numHrdParameters;
-  uint32_t                  m_maxNuhReservedZeroLayerId;
-  std::vector<HRD>      m_hrdParameters;
-  std::vector<uint32_t>     m_hrdOpSetIdx;
-  std::vector<bool>     m_cprmsPresentFlag;
-  uint32_t                  m_numOpSets;
-  bool                  m_layerIdIncludedFlag[MAX_VPS_OP_SETS_PLUS1][MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1];
-
-  PTL                   m_pcPTL;
-  TimingInfo            m_timingInfo;
+  uint32_t              m_uiMaxLayers;
+
+  uint32_t              m_vpsMaxSubLayers;
+  uint32_t              m_vpsLayerId[MAX_VPS_LAYERS];
+  bool                  m_vpsAllLayersSameNumSubLayersFlag;
+  bool                  m_vpsAllIndependentLayersFlag;
+  bool                  m_vpsIndependentLayerFlag[MAX_VPS_LAYERS];
+  bool                  m_vpsDirectRefLayerFlag[MAX_VPS_LAYERS][MAX_VPS_LAYERS];
+  bool                  m_vpsEachLayerIsAnOlsFlag;
+  uint32_t              m_vpsOlsModeIdc;
+  uint32_t              m_vpsNumOutputLayerSets;
+  bool                  m_vpsOlsOutputLayerFlag[MAX_NUM_OLSS][MAX_VPS_LAYERS];
+  uint32_t              m_directRefLayerIdx[MAX_VPS_LAYERS][MAX_VPS_LAYERS];
+  uint32_t              m_generalLayerIdx[MAX_VPS_LAYERS];
+
+  // stores index ( ilrp_idx within 0 .. NumDirectRefLayers ) of the dependent reference layers 
+  uint32_t              m_interLayerRefIdx[MAX_VPS_LAYERS][MAX_VPS_LAYERS];
+  bool                  m_vpsExtensionFlag;
 
 public:
                     VPS();
 
   virtual           ~VPS();
 
-  void              createHrdParamBuffer()
-  {
-    m_hrdParameters   .resize(getNumHrdParameters());
-    m_hrdOpSetIdx     .resize(getNumHrdParameters());
-    m_cprmsPresentFlag.resize(getNumHrdParameters());
-  }
-
-  HRD*              getHrdParameters( uint32_t i )                           { return &m_hrdParameters[ i ];                                    }
-  const HRD*        getHrdParameters( uint32_t i ) const                     { return &m_hrdParameters[ i ];                                    }
-  uint32_t              getHrdOpSetIdx( uint32_t i ) const                       { return m_hrdOpSetIdx[ i ];                                       }
-  void              setHrdOpSetIdx( uint32_t val, uint32_t i )                   { m_hrdOpSetIdx[ i ] = val;                                        }
-  bool              getCprmsPresentFlag( uint32_t i ) const                  { return m_cprmsPresentFlag[ i ];                                  }
-  void              setCprmsPresentFlag( bool val, uint32_t i )              { m_cprmsPresentFlag[ i ] = val;                                   }
-
   int               getVPSId() const                                     { return m_VPSId;                                                  }
   void              setVPSId(int i)                                      { m_VPSId = i;                                                     }
 
-  uint32_t              getMaxTLayers() const                                { return m_uiMaxTLayers;                                           }
-  void              setMaxTLayers(uint32_t t)                                { m_uiMaxTLayers = t;                                              }
+  uint32_t          getMaxLayers() const                                 { return m_uiMaxLayers;                                            }
+  void              setMaxLayers(uint32_t l)                             { m_uiMaxLayers = l;                                               }
+
+  uint32_t          getMaxSubLayers() const                              { return m_vpsMaxSubLayers;                                        }
+  void              setMaxSubLayers(uint32_t value)                      { m_vpsMaxSubLayers = value;                                       }
+  bool              getAllLayersSameNumSublayersFlag() const { return m_vpsAllLayersSameNumSubLayersFlag; }
+  void              setAllLayersSameNumSublayersFlag(bool t) { m_vpsAllLayersSameNumSubLayersFlag = t; }
+
+  uint32_t          getLayerId(uint32_t layerIdx) const { return m_vpsLayerId[layerIdx]; }
+  void              setLayerId(uint32_t layerIdx, uint32_t layerId) { m_vpsLayerId[layerIdx] = layerId; }
+
+  bool              getAllIndependentLayersFlag() const { return m_vpsAllIndependentLayersFlag; }
+  void              setAllIndependentLayersFlag(bool t) { m_vpsAllIndependentLayersFlag = t; }
+
+  bool              getIndependentLayerFlag(uint32_t layerIdx) const { return m_vpsIndependentLayerFlag[layerIdx]; }
+  void              setIndependentLayerFlag(uint32_t layerIdx, bool t) { m_vpsIndependentLayerFlag[layerIdx] = t; }
 
-  uint32_t              getMaxLayers() const                                 { return m_uiMaxLayers;                                            }
-  void              setMaxLayers(uint32_t l)                                 { m_uiMaxLayers = l;                                               }
+  bool              getDirectRefLayerFlag(uint32_t layerIdx, uint32_t refLayerIdx) const { return m_vpsDirectRefLayerFlag[layerIdx][refLayerIdx]; }
+  void              setDirectRefLayerFlag(uint32_t layerIdx, uint32_t refLayerIdx, bool t) { m_vpsDirectRefLayerFlag[layerIdx][refLayerIdx] = t; }
 
-  bool              getTemporalNestingFlag() const                       { return m_bTemporalIdNestingFlag;                                 }
-  void              setTemporalNestingFlag(bool t)                       { m_bTemporalIdNestingFlag = t;                                    }
+  uint32_t          getDirectRefLayerIdx( uint32_t layerIdx, uint32_t refLayerIdc ) const { return m_directRefLayerIdx[layerIdx][refLayerIdc]; }
+  void              setDirectRefLayerIdx( uint32_t layerIdx, uint32_t refLayerIdc, uint32_t refLayerIdx ) { m_directRefLayerIdx[layerIdx][refLayerIdc] = refLayerIdx; }
 
-  void              setNumReorderPics(uint32_t v, uint32_t tLayer)               { m_numReorderPics[tLayer] = v;                                    }
-  uint32_t              getNumReorderPics(uint32_t tLayer) const                 { return m_numReorderPics[tLayer];                                 }
+  uint32_t          getInterLayerRefIdc( uint32_t layerIdx, uint32_t refLayerIdx ) const { return m_interLayerRefIdx[layerIdx][refLayerIdx]; }
+  void              setInterLayerRefIdc( uint32_t layerIdx, uint32_t refLayerIdx, uint32_t refLayerIdc ) { m_interLayerRefIdx[layerIdx][refLayerIdx] = refLayerIdc; }
 
-  void              setMaxDecPicBuffering(uint32_t v, uint32_t tLayer)           { CHECK(tLayer >= MAX_TLAYER, "Invalid T-layer"); m_uiMaxDecPicBuffering[tLayer] = v; }
-  uint32_t              getMaxDecPicBuffering(uint32_t tLayer) const             { return m_uiMaxDecPicBuffering[tLayer];                           }
+  uint32_t          getGeneralLayerIdx(uint32_t layerId) const { return m_generalLayerIdx[layerId]; }
+  void              setGeneralLayerIdx(uint32_t layerId, uint32_t layerIdc) { m_generalLayerIdx[layerId] = layerIdc; }
 
-  void              setMaxLatencyIncrease(uint32_t v, uint32_t tLayer)           { m_uiMaxLatencyIncrease[tLayer] = v;                              }
-  uint32_t              getMaxLatencyIncrease(uint32_t tLayer) const             { return m_uiMaxLatencyIncrease[tLayer];                           }
+  bool              getEachLayerIsAnOlsFlag() const { return m_vpsEachLayerIsAnOlsFlag; }
+  void              setEachLayerIsAnOlsFlag(bool t) { m_vpsEachLayerIsAnOlsFlag = t; }
 
-  uint32_t              getNumHrdParameters() const                          { return m_numHrdParameters;                                       }
-  void              setNumHrdParameters(uint32_t v)                          { m_numHrdParameters = v;                                          }
+  uint32_t          getOlsModeIdc() const { return m_vpsOlsModeIdc; }
+  void              setOlsModeIdc(uint32_t t) { m_vpsOlsModeIdc = t; }
 
-  uint32_t              getMaxNuhReservedZeroLayerId() const                 { return m_maxNuhReservedZeroLayerId;                              }
-  void              setMaxNuhReservedZeroLayerId(uint32_t v)                 { m_maxNuhReservedZeroLayerId = v;                                 }
+  uint32_t          getNumOutputLayerSets() const { return m_vpsNumOutputLayerSets; }
+  void              setNumOutputLayerSets(uint8_t t) { m_vpsNumOutputLayerSets = t; }
 
-  uint32_t              getMaxOpSets() const                                 { return m_numOpSets;                                              }
-  void              setMaxOpSets(uint32_t v)                                 { m_numOpSets = v;                                                 }
-  bool              getLayerIdIncludedFlag(uint32_t opsIdx, uint32_t id) const   { return m_layerIdIncludedFlag[opsIdx][id];                        }
-  void              setLayerIdIncludedFlag(bool v, uint32_t opsIdx, uint32_t id) { m_layerIdIncludedFlag[opsIdx][id] = v;                           }
+  bool              getOlsOutputLayerFlag(uint32_t ols, uint32_t layer) const { return m_vpsOlsOutputLayerFlag[ols][layer]; }
+  void              setOlsOutputLayerFlag(uint32_t ols, uint32_t layer, bool t) { m_vpsOlsOutputLayerFlag[ols][layer] = t; }
 
-  PTL*              getPTL()                                             { return &m_pcPTL;                                                 }
-  const PTL*        getPTL() const                                       { return &m_pcPTL;                                                 }
-  TimingInfo*       getTimingInfo()                                      { return &m_timingInfo;                                            }
-  const TimingInfo* getTimingInfo() const                                { return &m_timingInfo;                                            }
+  bool              getVPSExtensionFlag() const                          { return m_vpsExtensionFlag;                                 }
+  void              setVPSExtensionFlag(bool t)                          { m_vpsExtensionFlag = t;                                    }
 };
-#endif
 
 class Window
 {
@@ -757,79 +806,50 @@ class VUI
 {
 private:
   bool       m_aspectRatioInfoPresentFlag;
+  bool       m_aspectRatioConstantFlag;
   int        m_aspectRatioIdc;
   int        m_sarWidth;
   int        m_sarHeight;
-  bool       m_overscanInfoPresentFlag;
-  bool       m_overscanAppropriateFlag;
-  bool       m_videoSignalTypePresentFlag;
-  int        m_videoFormat;
-  bool       m_videoFullRangeFlag;
   bool       m_colourDescriptionPresentFlag;
   int        m_colourPrimaries;
   int        m_transferCharacteristics;
   int        m_matrixCoefficients;
+  bool       m_fieldSeqFlag;
   bool       m_chromaLocInfoPresentFlag;
   int        m_chromaSampleLocTypeTopField;
   int        m_chromaSampleLocTypeBottomField;
-  bool       m_neutralChromaIndicationFlag;
-  bool       m_fieldSeqFlag;
-  Window     m_defaultDisplayWindow;
-  bool       m_frameFieldInfoPresentFlag;
-  bool       m_hrdParametersPresentFlag;
-  bool       m_bitstreamRestrictionFlag;
-#if HEVC_TILES_WPP
-  bool       m_tilesFixedStructureFlag;
-#endif
-  bool       m_motionVectorsOverPicBoundariesFlag;
-  bool       m_restrictedRefPicListsFlag;
-  int        m_minSpatialSegmentationIdc;
-  int        m_maxBytesPerPicDenom;
-  int        m_maxBitsPerMinCuDenom;
-  int        m_log2MaxMvLengthHorizontal;
-  int        m_log2MaxMvLengthVertical;
-  HRD    m_hrdParameters;
-  TimingInfo m_timingInfo;
+  int        m_chromaSampleLocType;
+  bool       m_overscanInfoPresentFlag;
+  bool       m_overscanAppropriateFlag;
+  bool       m_videoFullRangeFlag;
 
 public:
   VUI()
     : m_aspectRatioInfoPresentFlag        (false) //TODO: This initialiser list contains magic numbers
+    , m_aspectRatioConstantFlag           (true)
     , m_aspectRatioIdc                    (0)
     , m_sarWidth                          (0)
     , m_sarHeight                         (0)
-    , m_overscanInfoPresentFlag           (false)
-    , m_overscanAppropriateFlag           (false)
-    , m_videoSignalTypePresentFlag        (false)
-    , m_videoFormat                       (5)
-    , m_videoFullRangeFlag                (false)
     , m_colourDescriptionPresentFlag      (false)
     , m_colourPrimaries                   (2)
     , m_transferCharacteristics           (2)
     , m_matrixCoefficients                (2)
+    , m_fieldSeqFlag                      (false)
     , m_chromaLocInfoPresentFlag          (false)
     , m_chromaSampleLocTypeTopField       (0)
     , m_chromaSampleLocTypeBottomField    (0)
-    , m_neutralChromaIndicationFlag       (false)
-    , m_fieldSeqFlag                      (false)
-    , m_frameFieldInfoPresentFlag         (false)
-    , m_hrdParametersPresentFlag          (false)
-    , m_bitstreamRestrictionFlag          (false)
-#if HEVC_TILES_WPP
-    , m_tilesFixedStructureFlag           (false)
-#endif
-    , m_motionVectorsOverPicBoundariesFlag(true)
-    , m_restrictedRefPicListsFlag         (1)
-    , m_minSpatialSegmentationIdc         (0)
-    , m_maxBytesPerPicDenom               (2)
-    , m_maxBitsPerMinCuDenom              (1)
-    , m_log2MaxMvLengthHorizontal         (15)
-    , m_log2MaxMvLengthVertical           (15)
+    , m_chromaSampleLocType               (0)
+    , m_overscanInfoPresentFlag           (false)
+    , m_overscanAppropriateFlag           (false)
+    , m_videoFullRangeFlag                (false)
   {}
 
   virtual           ~VUI() {}
 
   bool              getAspectRatioInfoPresentFlag() const                  { return m_aspectRatioInfoPresentFlag;           }
   void              setAspectRatioInfoPresentFlag(bool i)                  { m_aspectRatioInfoPresentFlag = i;              }
+  bool              getAspectRatioConstantFlag() const                     { return m_aspectRatioConstantFlag;           }
+  void              setAspectRatioConstantFlag(bool b)                     { m_aspectRatioConstantFlag = b;              }
 
   int               getAspectRatioIdc() const                              { return m_aspectRatioIdc;                       }
   void              setAspectRatioIdc(int i)                               { m_aspectRatioIdc = i;                          }
@@ -840,21 +860,6 @@ public:
   int               getSarHeight() const                                   { return m_sarHeight;                            }
   void              setSarHeight(int i)                                    { m_sarHeight = i;                               }
 
-  bool              getOverscanInfoPresentFlag() const                     { return m_overscanInfoPresentFlag;              }
-  void              setOverscanInfoPresentFlag(bool i)                     { m_overscanInfoPresentFlag = i;                 }
-
-  bool              getOverscanAppropriateFlag() const                     { return m_overscanAppropriateFlag;              }
-  void              setOverscanAppropriateFlag(bool i)                     { m_overscanAppropriateFlag = i;                 }
-
-  bool              getVideoSignalTypePresentFlag() const                  { return m_videoSignalTypePresentFlag;           }
-  void              setVideoSignalTypePresentFlag(bool i)                  { m_videoSignalTypePresentFlag = i;              }
-
-  int               getVideoFormat() const                                 { return m_videoFormat;                          }
-  void              setVideoFormat(int i)                                  { m_videoFormat = i;                             }
-
-  bool              getVideoFullRangeFlag() const                          { return m_videoFullRangeFlag;                   }
-  void              setVideoFullRangeFlag(bool i)                          { m_videoFullRangeFlag = i;                      }
-
   bool              getColourDescriptionPresentFlag() const                { return m_colourDescriptionPresentFlag;         }
   void              setColourDescriptionPresentFlag(bool i)                { m_colourDescriptionPresentFlag = i;            }
 
@@ -867,6 +872,9 @@ public:
   int               getMatrixCoefficients() const                          { return m_matrixCoefficients;                   }
   void              setMatrixCoefficients(int i)                           { m_matrixCoefficients = i;                      }
 
+  bool              getFieldSeqFlag() const                                { return m_fieldSeqFlag;                         }
+  void              setFieldSeqFlag(bool i)                                { m_fieldSeqFlag = i;                            }
+
   bool              getChromaLocInfoPresentFlag() const                    { return m_chromaLocInfoPresentFlag;             }
   void              setChromaLocInfoPresentFlag(bool i)                    { m_chromaLocInfoPresentFlag = i;                }
 
@@ -876,56 +884,18 @@ public:
   int               getChromaSampleLocTypeBottomField() const              { return m_chromaSampleLocTypeBottomField;       }
   void              setChromaSampleLocTypeBottomField(int i)               { m_chromaSampleLocTypeBottomField = i;          }
 
-  bool              getNeutralChromaIndicationFlag() const                 { return m_neutralChromaIndicationFlag;          }
-  void              setNeutralChromaIndicationFlag(bool i)                 { m_neutralChromaIndicationFlag = i;             }
-
-  bool              getFieldSeqFlag() const                                { return m_fieldSeqFlag;                         }
-  void              setFieldSeqFlag(bool i)                                { m_fieldSeqFlag = i;                            }
-
-  bool              getFrameFieldInfoPresentFlag() const                   { return m_frameFieldInfoPresentFlag;            }
-  void              setFrameFieldInfoPresentFlag(bool i)                   { m_frameFieldInfoPresentFlag = i;               }
-
-  Window&           getDefaultDisplayWindow()                              { return m_defaultDisplayWindow;                 }
-  const Window&     getDefaultDisplayWindow() const                        { return m_defaultDisplayWindow;                 }
-  void              setDefaultDisplayWindow(Window& defaultDisplayWindow ) { m_defaultDisplayWindow = defaultDisplayWindow; }
-
-  bool              getHrdParametersPresentFlag() const                    { return m_hrdParametersPresentFlag;             }
-  void              setHrdParametersPresentFlag(bool i)                    { m_hrdParametersPresentFlag = i;                }
-
-  bool              getBitstreamRestrictionFlag() const                    { return m_bitstreamRestrictionFlag;             }
-  void              setBitstreamRestrictionFlag(bool i)                    { m_bitstreamRestrictionFlag = i;                }
-
-#if HEVC_TILES_WPP
-  bool              getTilesFixedStructureFlag() const                     { return m_tilesFixedStructureFlag;              }
-  void              setTilesFixedStructureFlag(bool i)                     { m_tilesFixedStructureFlag = i;                 }
-#endif
-
-  bool              getMotionVectorsOverPicBoundariesFlag() const          { return m_motionVectorsOverPicBoundariesFlag;   }
-  void              setMotionVectorsOverPicBoundariesFlag(bool i)          { m_motionVectorsOverPicBoundariesFlag = i;      }
-
-  bool              getRestrictedRefPicListsFlag() const                   { return m_restrictedRefPicListsFlag;            }
-  void              setRestrictedRefPicListsFlag(bool b)                   { m_restrictedRefPicListsFlag = b;               }
-
-  int               getMinSpatialSegmentationIdc() const                   { return m_minSpatialSegmentationIdc;            }
-  void              setMinSpatialSegmentationIdc(int i)                    { m_minSpatialSegmentationIdc = i;               }
-
-  int               getMaxBytesPerPicDenom() const                         { return m_maxBytesPerPicDenom;                  }
-  void              setMaxBytesPerPicDenom(int i)                          { m_maxBytesPerPicDenom = i;                     }
+  int               getChromaSampleLocType() const                         { return m_chromaSampleLocType;          }
+  void              setChromaSampleLocType(int i)                          { m_chromaSampleLocType = i;             }
 
-  int               getMaxBitsPerMinCuDenom() const                        { return m_maxBitsPerMinCuDenom;                 }
-  void              setMaxBitsPerMinCuDenom(int i)                         { m_maxBitsPerMinCuDenom = i;                    }
-
-  int               getLog2MaxMvLengthHorizontal() const                   { return m_log2MaxMvLengthHorizontal;            }
-  void              setLog2MaxMvLengthHorizontal(int i)                    { m_log2MaxMvLengthHorizontal = i;               }
+  bool              getOverscanInfoPresentFlag() const                     { return m_overscanInfoPresentFlag;              }
+  void              setOverscanInfoPresentFlag(bool i)                     { m_overscanInfoPresentFlag = i;                 }
 
-  int               getLog2MaxMvLengthVertical() const                     { return m_log2MaxMvLengthVertical;              }
-  void              setLog2MaxMvLengthVertical(int i)                      { m_log2MaxMvLengthVertical = i;                 }
+  bool              getOverscanAppropriateFlag() const                     { return m_overscanAppropriateFlag;              }
+  void              setOverscanAppropriateFlag(bool i)                     { m_overscanAppropriateFlag = i;                 }
 
-  HRD*              getHrdParameters()                                     { return &m_hrdParameters;                       }
-  const HRD*        getHrdParameters()  const                              { return &m_hrdParameters;                       }
+  bool              getVideoFullRangeFlag() const                          { return m_videoFullRangeFlag;                   }
+  void              setVideoFullRangeFlag(bool i)                          { m_videoFullRangeFlag = i;                      }
 
-  TimingInfo*       getTimingInfo()                                        { return &m_timingInfo;                          }
-  const TimingInfo* getTimingInfo() const                                  { return &m_timingInfo;                          }
 };
 
 /// SPS RExt class
@@ -989,105 +959,102 @@ class SPS
 {
 private:
   int               m_SPSId;
-#if !JVET_M0101_HLS
-  bool              m_bIntraOnlyConstraintFlag;
-  uint32_t          m_maxBitDepthConstraintIdc;
-  uint32_t          m_maxChromaFormatConstraintIdc;
-  bool              m_bFrameConstraintFlag;
-  bool              m_bNoQtbttDualTreeIntraConstraintFlag;
-  bool              m_bNoSaoConstraintFlag;
-  bool              m_bNoAlfConstraintFlag;
-  bool              m_bNoPcmConstraintFlag;
-  bool              m_bNoRefWraparoundConstraintFlag;
-  bool              m_bNoTemporalMvpConstraintFlag;
-  bool              m_bNoSbtmvpConstraintFlag;
-  bool              m_bNoAmvrConstraintFlag;
-  bool              m_bNoBdofConstraintFlag;
-  bool              m_bNoCclmConstraintFlag;
-  bool              m_bNoMtsConstraintFlag;
-  bool              m_bNoAffineMotionConstraintFlag;
-  bool              m_bNoGbiConstraintFlag;
-  bool              m_bNoMhIntraConstraintFlag;
-  bool              m_bNoTriangleConstraintFlag;
-  bool              m_bNoLadfConstraintFlag;
-  bool              m_bNoCurrPicRefConstraintFlag;
-  bool              m_bNoQpDeltaConstraintFlag;
-  bool              m_bNoDepQuantConstraintFlag;
-  bool              m_bNoSignDataHidingConstraintFlag;
-#endif
+  int               m_decodingParameterSetId;
+  int               m_VPSId;
 
   bool              m_affineAmvrEnabledFlag;
   bool              m_DMVR;
+  bool              m_MMVD;
   bool              m_SBT;
-  uint8_t           m_MaxSbtSize;
-#if HEVC_VPS
-  int               m_VPSId;
-#endif
+  bool              m_ISP;
   ChromaFormat      m_chromaFormatIdc;
+  bool              m_separateColourPlaneFlag;     //!< separate colour plane flag
 
   uint32_t              m_uiMaxTLayers;           // maximum number of temporal layers
 
   // Structure
-  uint32_t              m_picWidthInLumaSamples;
-  uint32_t              m_picHeightInLumaSamples;
+  uint32_t              m_maxWidthInLumaSamples;
+  uint32_t              m_maxHeightInLumaSamples;
+  bool                  m_subPicPresentFlag;                    // indicates the presence of sub-pictures
+  uint8_t               m_numSubPics;                        //!< number of sub-pictures used
+  uint32_t              m_subPicCtuTopLeftX[MAX_NUM_SUB_PICS];                
+  uint32_t              m_subPicCtuTopLeftY[MAX_NUM_SUB_PICS];                
+  uint32_t              m_SubPicWidth[MAX_NUM_SUB_PICS];                      
+  uint32_t              m_SubPicHeight[MAX_NUM_SUB_PICS];                     
+  bool                  m_subPicTreatedAsPicFlag[MAX_NUM_SUB_PICS];           
+  bool                  m_loopFilterAcrossSubpicEnabledFlag[MAX_NUM_SUB_PICS];
+  bool                  m_subPicIdPresentFlag;               //!< indicates the presence of sub-picture IDs
+  bool                  m_subPicIdSignallingPresentFlag;     //!< indicates the presence of sub-picture ID signalling in the SPS
+  uint32_t              m_subPicIdLen;                       //!< sub-picture ID length in bits
+  uint8_t               m_subPicId[MAX_NUM_SUB_PICS];        //!< sub-picture ID for each sub-picture in the sequence
 
   int               m_log2MinCodingBlockSize;
   int               m_log2DiffMaxMinCodingBlockSize;
   unsigned    m_CTUSize;
   unsigned    m_partitionOverrideEnalbed;       // enable partition constraints override function
   unsigned    m_minQT[3];   // 0: I slice luma; 1: P/B slice; 2: I slice chroma
-  unsigned    m_maxBTDepth[3];
+  unsigned    m_maxMTTHierarchyDepth[3];
   unsigned    m_maxBTSize[3];
   unsigned    m_maxTTSize[3];
+  bool        m_idrRefParamList;
   unsigned    m_dualITree;
   uint32_t              m_uiMaxCUWidth;
   uint32_t              m_uiMaxCUHeight;
   uint32_t              m_uiMaxCodingDepth; ///< Total CU depth, relative to the smallest possible transform block size.
 
-  Window            m_conformanceWindow;
 
-  RPSList           m_RPSList;
+  RPLList           m_RPLList0;
+  RPLList           m_RPLList1;
+  uint32_t          m_numRPL0;
+  uint32_t          m_numRPL1;
+  bool              m_rpl1CopyFromRpl0Flag;
+  bool              m_rpl1IdxPresentFlag;
+  bool              m_allRplEntriesHasSameSignFlag;
   bool              m_bLongTermRefsPresent;
   bool              m_SPSTemporalMVPEnabledFlag;
   int               m_numReorderPics[MAX_TLAYER];
 
   // Tool list
-  bool                  m_pcmEnabledFlag;
-  uint32_t              m_pcmLog2MaxSize;
-  uint32_t              m_uiPCMLog2MinSize;
 
+  bool              m_transformSkipEnabledFlag;
+  int               m_BDPCMEnabled;
+  bool              m_JointCbCrEnabledFlag;
   // Parameter
   BitDepths         m_bitDepths;
   int               m_qpBDOffset[MAX_NUM_CHANNEL_TYPE];
-  int               m_pcmBitDepths[MAX_NUM_CHANNEL_TYPE];
-  bool              m_bPCMFilterDisableFlag;
+  int               m_minQpMinus4[MAX_NUM_CHANNEL_TYPE]; //  QP_internal - QP_input;
 
   bool              m_sbtmvpEnabledFlag;
   bool              m_bdofEnabledFlag;
-  bool              m_disFracMmvdEnabledFlag;
+  bool              m_fpelMmvdEnabledFlag;
+  bool              m_BdofControlPresentFlag;
+  bool              m_DmvrControlPresentFlag;
+  bool              m_ProfControlPresentFlag;
   uint32_t              m_uiBitsForPOC;
   uint32_t              m_numLongTermRefPicSPS;
   uint32_t              m_ltRefPicPocLsbSps[MAX_NUM_LONG_TERM_REF_PICS];
   bool              m_usedByCurrPicLtSPSFlag[MAX_NUM_LONG_TERM_REF_PICS];
-#if MAX_TB_SIZE_SIGNALLING
   uint32_t          m_log2MaxTbSize;
-#endif
+  bool             m_useWeightPred;                     //!< Use of Weighting Prediction (P_SLICE)
+  bool             m_useWeightedBiPred;                 //!< Use of Weighting Bi-Prediction (B_SLICE)
 
   bool              m_saoEnabledFlag;
 
   bool              m_bTemporalIdNestingFlag; // temporal_id_nesting_flag
 
-#if HEVC_USE_SCALING_LISTS
   bool              m_scalingListEnabledFlag;
-  bool              m_scalingListPresentFlag;
-  ScalingList       m_scalingList;
-#endif
+  bool              m_loopFilterAcrossVirtualBoundariesDisabledFlag;   //!< disable loop filtering across virtual boundaries
+  unsigned          m_numVerVirtualBoundaries;                         //!< number of vertical virtual boundaries
+  unsigned          m_numHorVirtualBoundaries;                         //!< number of horizontal virtual boundaries
+  unsigned          m_virtualBoundariesPosX[3];                        //!< horizontal position of each vertical virtual boundary
+  unsigned          m_virtualBoundariesPosY[3];                        //!< vertical position of each horizontal virtual boundary
   uint32_t              m_uiMaxDecPicBuffering[MAX_TLAYER];
   uint32_t              m_uiMaxLatencyIncreasePlus1[MAX_TLAYER];
 
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  bool              m_useStrongIntraSmoothing;
-#endif
+
+  TimingInfo        m_timingInfo;
+  bool              m_hrdParametersPresentFlag;
+  HRDParameters     m_hrdParameters;
 
   bool              m_vuiParametersPresentFlag;
   VUI               m_vuiParameters;
@@ -1096,29 +1063,31 @@ private:
 
   static const int  m_winUnitX[NUM_CHROMA_FORMAT];
   static const int  m_winUnitY[NUM_CHROMA_FORMAT];
-#if !JVET_M0101_HLS
-  PTL               m_pcPTL;
-#else
   ProfileTierLevel  m_profileTierLevel;
-#endif
 
   bool              m_alfEnabledFlag;
 
   bool              m_wrapAroundEnabledFlag;
   unsigned          m_wrapAroundOffset;
   unsigned          m_IBCFlag;
+  bool              m_useColorTrans;
+  unsigned          m_PLTMode;
 
-  bool              m_lumaReshapeEnable;
+  bool              m_lmcsEnabled;
   bool              m_AMVREnabledFlag;
   bool              m_LMChroma;
-  bool              m_cclmCollocatedChromaFlag;
+  bool              m_horCollocatedChromaFlag;
+  bool              m_verCollocatedChromaFlag;
   bool              m_MTS;
   bool              m_IntraMTS;                   // 18
   bool              m_InterMTS;                   // 19
+  bool              m_LFNST;
+  bool              m_SMVD;
   bool              m_Affine;
   bool              m_AffineType;
-  bool              m_GBi;                        //
-  bool              m_MHIntra;
+  bool              m_PROF;
+  bool              m_bcw;                        //
+  bool              m_ciip;
   bool              m_Triangle;
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   bool              m_LadfEnabled;
@@ -1126,84 +1095,66 @@ private:
   int               m_LadfQpOffset[MAX_LADF_INTERVALS];
   int               m_LadfIntervalLowerBound[MAX_LADF_INTERVALS];
 #endif
+  bool              m_MRL;
+  bool              m_MIP;
+  ChromaQpMappingTable m_chromaQpMappingTable;
+  bool m_GDREnabledFlag;
+  bool              m_SubLayerCbpParametersPresentFlag;
 
+  bool              m_rprEnabledFlag;
+  bool              m_interLayerPresentFlag;
 
 public:
 
   SPS();
   virtual                 ~SPS();
 
-#if HEVC_VPS
-  int                     getVPSId() const                                                                { return m_VPSId;                                                      }
-  void                    setVPSId(int i)                                                                 { m_VPSId = i;                                                         }
-#endif
-#if !JVET_M0101_HLS
-  bool                    getIntraOnlyConstraintFlag() const                                              { return m_bIntraOnlyConstraintFlag;                                   }
-  void                    setIntraOnlyConstraintFlag(bool bVal)                                           { m_bIntraOnlyConstraintFlag = bVal;                                   }
-  uint32_t                getMaxBitDepthConstraintIdc() const                                             { return m_maxBitDepthConstraintIdc;                                   }
-  void                    setMaxBitDepthConstraintIdc(uint32_t u)                                         { m_maxBitDepthConstraintIdc = u;                                      }
-  uint32_t                getMaxChromaFormatConstraintIdc() const                                         { return m_maxChromaFormatConstraintIdc;                               }
-  void                    setMaxChromaFormatConstraintIdc(uint32_t u)                                     { m_maxChromaFormatConstraintIdc = u;                                  }
-  bool                    getFrameConstraintFlag() const                                                  { return m_bFrameConstraintFlag;                                       }
-  void                    setFrameConstraintFlag(bool bVal)                                               { m_bFrameConstraintFlag = bVal;                                       }
-  bool                    getNoQtbttDualTreeIntraConstraintFlag() const                                   { return m_bNoQtbttDualTreeIntraConstraintFlag;                        }
-  void                    setNoQtbttDualTreeIntraConstraintFlag(bool bVal)                                { m_bNoQtbttDualTreeIntraConstraintFlag = bVal;                        }
-  bool                    getNoSaoConstraintFlag() const                                                  { return m_bNoSaoConstraintFlag;                                       }
-  void                    setNoSaoConstraintFlag(bool bVal)                                               { m_bNoSaoConstraintFlag = bVal;                                       }
-  bool                    getNoAlfConstraintFlag() const                                                  { return m_bNoAlfConstraintFlag;                                       }
-  void                    setNoAlfConstraintFlag(bool bVal)                                               { m_bNoAlfConstraintFlag = bVal;                                       }
-  bool                    getNoPcmConstraintFlag() const                                                  { return m_bNoPcmConstraintFlag;                                       }
-  void                    setNoPcmConstraintFlag(bool bVal)                                               { m_bNoPcmConstraintFlag = bVal;                                       }
-  bool                    getNoRefWraparoundConstraintFlag() const                                        { return m_bNoRefWraparoundConstraintFlag;                             }
-  void                    setNoRefWraparoundConstraintFlag(bool bVal)                                     { m_bNoRefWraparoundConstraintFlag= bVal;                              }
-  bool                    getNoTemporalMvpConstraintFlag() const                                          { return m_bNoTemporalMvpConstraintFlag;                               }
-  void                    setNoTemporalMvpConstraintFlag(bool bVal)                                       { m_bNoTemporalMvpConstraintFlag = bVal;                               }
-  bool                    getNoSbtmvpConstraintFlag() const                                               { return m_bNoSbtmvpConstraintFlag;                                    }
-  void                    setNoSbtmvpConstraintFlag(bool bVal)                                            { m_bNoSbtmvpConstraintFlag = bVal;                                    }
-  bool                    getNoAmvrConstraintFlag() const                                                 { return m_bNoAmvrConstraintFlag;                                      }
-  void                    setNoAmvrConstraintFlag(bool bVal)                                              { m_bNoAmvrConstraintFlag = bVal;                                      }
-  bool                    getNoBdofConstraintFlag() const                                                 { return m_bNoBdofConstraintFlag;                                      }
-  void                    setNoBdofConstraintFlag(bool bVal)                                              { m_bNoBdofConstraintFlag = bVal;                                      }
-  bool                    getNoCclmConstraintFlag() const                                                 { return m_bNoCclmConstraintFlag;                                      }
-  void                    setNoCclmConstraintFlag(bool bVal)                                              { m_bNoCclmConstraintFlag = bVal;                                      }
-  bool                    getNoMtsConstraintFlag() const                                                  { return m_bNoMtsConstraintFlag;                                       }
-  void                    setNoMtsConstraintFlag(bool bVal)                                               { m_bNoMtsConstraintFlag = bVal;                                       }
-  bool                    getNoAffineMotionConstraintFlag() const                                         { return m_bNoAffineMotionConstraintFlag;                              }
-  void                    setNoAffineMotionConstraintFlag(bool bVal)                                      { m_bNoAffineMotionConstraintFlag = bVal;                              }
-  bool                    getNoGbiConstraintFlag() const                                                  { return m_bNoGbiConstraintFlag;                                       }
-  void                    setNoGbiConstraintFlag(bool bVal)                                               { m_bNoGbiConstraintFlag = bVal;                                       }
-  bool                    getNoMhIntraConstraintFlag() const                                              { return m_bNoMhIntraConstraintFlag;                                   }
-  void                    setNoMhIntraConstraintFlag(bool bVal)                                           { m_bNoMhIntraConstraintFlag = bVal;                                   }
-  bool                    getNoTriangleConstraintFlag() const                                             { return m_bNoTriangleConstraintFlag;                                  }
-  void                    setNoTriangleConstraintFlag(bool bVal)                                          { m_bNoTriangleConstraintFlag = bVal;                                  }
-  bool                    getNoLadfConstraintFlag() const                                                 { return m_bNoLadfConstraintFlag;                                      }
-  void                    setNoLadfConstraintFlag(bool bVal)                                              { m_bNoLadfConstraintFlag = bVal;                                      }
-  bool                    getNoCurrPicRefConstraintFlag() const                                           { return m_bNoCurrPicRefConstraintFlag;                                }
-  void                    setNoCurrPicRefConstraintFlag(bool bVal)                                        { m_bNoCurrPicRefConstraintFlag = bVal;                                }
-  bool                    getNoQpDeltaConstraintFlag() const                                              { return m_bNoQpDeltaConstraintFlag;                                   }
-  void                    setNoQpDeltaConstraintFlag(bool bVal)                                           { m_bNoQpDeltaConstraintFlag = bVal;                                   }
-  bool                    getNoDepQuantConstraintFlag() const                                             { return m_bNoDepQuantConstraintFlag;                                  }
-  void                    setNoDepQuantConstraintFlag(bool bVal)                                          { m_bNoDepQuantConstraintFlag = bVal;                                  }
-  bool                    getNoSignDataHidingConstraintFlag() const                                       { return m_bNoSignDataHidingConstraintFlag;                            }
-  void                    setNoSignDataHidingConstraintFlag(bool bVal)                                    { m_bNoSignDataHidingConstraintFlag = bVal;                            }
-#endif
   int                     getSPSId() const                                                                { return m_SPSId;                                                      }
   void                    setSPSId(int i)                                                                 { m_SPSId = i;                                                         }
+  void                    setDecodingParameterSetId(int val)                                              { m_decodingParameterSetId = val; }
+  int                     getDecodingParameterSetId() const                                               { return m_decodingParameterSetId; }
+  int                     getVPSId() const                                                                { return m_VPSId; }
+  void                    setVPSId(int i)                                                                 { m_VPSId = i; }
+
   ChromaFormat            getChromaFormatIdc () const                                                     { return m_chromaFormatIdc;                                            }
   void                    setChromaFormatIdc (ChromaFormat i)                                             { m_chromaFormatIdc = i;                                               }
+  void                    setSeparateColourPlaneFlag ( bool b )                                           { m_separateColourPlaneFlag = b;                                       }
+  bool                    getSeparateColourPlaneFlag () const                                             { return m_separateColourPlaneFlag;                                    }
 
   static int              getWinUnitX (int chromaFormatIdc)                                               { CHECK(chromaFormatIdc < 0 || chromaFormatIdc >= NUM_CHROMA_FORMAT, "Invalid chroma format parameter"); return m_winUnitX[chromaFormatIdc]; }
   static int              getWinUnitY (int chromaFormatIdc)                                               { CHECK(chromaFormatIdc < 0 || chromaFormatIdc >= NUM_CHROMA_FORMAT, "Invalid chroma format parameter"); return m_winUnitY[chromaFormatIdc]; }
 
   // structure
-  void                    setPicWidthInLumaSamples( uint32_t u )                                              { m_picWidthInLumaSamples = u;                                         }
-  uint32_t                    getPicWidthInLumaSamples() const                                                { return  m_picWidthInLumaSamples;                                     }
-  void                    setPicHeightInLumaSamples( uint32_t u )                                             { m_picHeightInLumaSamples = u;                                        }
-  uint32_t                    getPicHeightInLumaSamples() const                                               { return  m_picHeightInLumaSamples;                                    }
-
-  Window&                 getConformanceWindow()                                                          { return  m_conformanceWindow;                                         }
-  const Window&           getConformanceWindow() const                                                    { return  m_conformanceWindow;                                         }
-  void                    setConformanceWindow(Window& conformanceWindow )                                { m_conformanceWindow = conformanceWindow;                             }
+  void                    setMaxPicWidthInLumaSamples( uint32_t u )                                       { m_maxWidthInLumaSamples = u; }
+  uint32_t                getMaxPicWidthInLumaSamples() const                                             { return  m_maxWidthInLumaSamples; }
+  void                    setMaxPicHeightInLumaSamples( uint32_t u )                                      { m_maxHeightInLumaSamples = u; }
+  uint32_t                getMaxPicHeightInLumaSamples() const                                            { return  m_maxHeightInLumaSamples; }
+
+  void                    setSubPicPresentFlag(bool b)                                                    { m_subPicPresentFlag = b;                }
+  bool                    getSubPicPresentFlag() const                                                    { return m_subPicPresentFlag;             }
+
+  void                    setNumSubPics( uint8_t u )                                                      { m_numSubPics = u;                        }
+  uint8_t                 getNumSubPics( ) const                                                          { return  m_numSubPics;                    }
+  void                    setSubPicCtuTopLeftX( int i, uint32_t u )                                       { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicCtuTopLeftX[i] = u;                     }
+  uint32_t                getSubPicCtuTopLeftX( int i ) const                                             { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_subPicCtuTopLeftX[i];                 }
+  void                    setSubPicCtuTopLeftY( int i, uint32_t u )                                       { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicCtuTopLeftY[i] = u;                     }
+  uint32_t                getSubPicCtuTopLeftY( int i ) const                                             { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_subPicCtuTopLeftY[i];                 }
+  void                    setSubPicWidth( int i, uint32_t u )                                             { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_SubPicWidth[i] = u;                           }
+  uint32_t                getSubPicWidth( int i ) const                                                   { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_SubPicWidth[i];                       }
+  void                    setSubPicHeight( int i, uint32_t u )                                            { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_SubPicHeight[i] = u;                          }
+  uint32_t                getSubPicHeight( int i ) const                                                  { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_SubPicHeight[i];                      }
+  void                    setSubPicTreatedAsPicFlag( int i, bool u )                                      { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicTreatedAsPicFlag[i] = u;                }
+  bool                    getSubPicTreatedAsPicFlag( int i ) const                                        { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_subPicTreatedAsPicFlag[i];            }
+  void                    setLoopFilterAcrossSubpicEnabledFlag( int i, bool u )                           { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_loopFilterAcrossSubpicEnabledFlag[i] = u;     }
+  bool                    getLoopFilterAcrossSubpicEnabledFlag( int i ) const                             { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_loopFilterAcrossSubpicEnabledFlag[i]; }
+  void                    setSubPicIdPresentFlag( bool b )                                                { m_subPicIdPresentFlag = b;               }
+  bool                    getSubPicIdPresentFlag() const                                                  { return  m_subPicIdPresentFlag;           }
+  void                    setSubPicIdSignallingPresentFlag( bool b )                                      { m_subPicIdSignallingPresentFlag = b;     }
+  bool                    getSubPicIdSignallingPresentFlag() const                                        { return  m_subPicIdSignallingPresentFlag; }
+  void                    setSubPicIdLen( uint32_t u )                                                    { m_subPicIdLen = u;                       }
+  uint32_t                getSubPicIdLen() const                                                          { return  m_subPicIdLen;                   }
+  void                    setSubPicId( int i, uint8_t u )                                                 { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicId[i] = u;     }
+  uint8_t                 getSubPicId( int i ) const                                                      { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_subPicId[i]; }
 
   uint32_t                    getNumLongTermRefPicSPS() const                                                 { return m_numLongTermRefPicSPS;                                       }
   void                    setNumLongTermRefPicSPS(uint32_t val)                                               { m_numLongTermRefPicSPS = val;                                        }
@@ -1226,13 +1177,13 @@ public:
   unsigned                getMinQTSize(SliceType   slicetype,
                                        ChannelType chType = CHANNEL_TYPE_LUMA)
                                                                                                     const { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_minQT[0] : m_minQT[2]) : m_minQT[1]; }
-  void                    setMaxBTDepth(unsigned    maxBTDepth,
-                                        unsigned    maxBTDepthI,
-                                        unsigned    maxBTDepthIChroma)
-                                                                                                          { m_maxBTDepth[1] = maxBTDepth; m_maxBTDepth[0] = maxBTDepthI; m_maxBTDepth[2] = maxBTDepthIChroma; }
-  unsigned                getMaxBTDepth()                                                           const { return m_maxBTDepth[1]; }
-  unsigned                getMaxBTDepthI()                                                          const { return m_maxBTDepth[0]; }
-  unsigned                getMaxBTDepthIChroma()                                                    const { return m_maxBTDepth[2]; }
+  void                    setMaxMTTHierarchyDepth(unsigned    maxMTTHierarchyDepth,
+                                        unsigned    maxMTTHierarchyDepthI,
+                                        unsigned    maxMTTHierarchyDepthIChroma)
+                                                                                                          { m_maxMTTHierarchyDepth[1] = maxMTTHierarchyDepth; m_maxMTTHierarchyDepth[0] = maxMTTHierarchyDepthI; m_maxMTTHierarchyDepth[2] = maxMTTHierarchyDepthIChroma; }
+  unsigned                getMaxMTTHierarchyDepth()                                                 const { return m_maxMTTHierarchyDepth[1]; }
+  unsigned                getMaxMTTHierarchyDepthI()                                                const { return m_maxMTTHierarchyDepth[0]; }
+  unsigned                getMaxMTTHierarchyDepthIChroma()                                          const { return m_maxMTTHierarchyDepth[2]; }
   void                    setMaxBTSize(unsigned    maxBTSize,
                                        unsigned    maxBTSizeI,
                                        unsigned    maxBTSizeC)
@@ -1247,7 +1198,12 @@ public:
   unsigned                getMaxTTSize()                                                            const { return m_maxTTSize[1]; }
   unsigned                getMaxTTSizeI()                                                           const { return m_maxTTSize[0]; }
   unsigned                getMaxTTSizeIChroma()                                                     const { return m_maxTTSize[2]; }
-
+  unsigned*               getMinQTSizes()                                                          const { return (unsigned *)m_minQT;                }
+  unsigned*               getMaxMTTHierarchyDepths()                                               const { return (unsigned *)m_maxMTTHierarchyDepth; }
+  unsigned*               getMaxBTSizes()                                                          const { return (unsigned *)m_maxBTSize;            }
+  unsigned*               getMaxTTSizes()                                                          const { return (unsigned *)m_maxTTSize;            }
+  void                    setIDRRefParamListPresent(bool b)                             { m_idrRefParamList = b; }
+  bool                    getIDRRefParamListPresent()                             const { return m_idrRefParamList; }
   void                    setUseDualITree(bool b) { m_dualITree = b; }
   bool                    getUseDualITree()                                      const { return m_dualITree; }
 
@@ -1257,28 +1213,37 @@ public:
   uint32_t                    getMaxCUHeight() const                                                          { return  m_uiMaxCUHeight;                                             }
   void                    setMaxCodingDepth( uint32_t u )                                                     { m_uiMaxCodingDepth = u;                                              }
   uint32_t                    getMaxCodingDepth() const                                                       { return  m_uiMaxCodingDepth;                                          }
-  void                    setPCMEnabledFlag( bool b )                                                         { m_pcmEnabledFlag = b;                                                }
-  bool                    getPCMEnabledFlag() const                                                           { return m_pcmEnabledFlag;                                             }
-  void                    setPCMLog2MaxSize( uint32_t u )                                                     { m_pcmLog2MaxSize = u;                                                }
-  uint32_t                    getPCMLog2MaxSize() const                                                       { return  m_pcmLog2MaxSize;                                            }
-  void                    setPCMLog2MinSize( uint32_t u )                                                     { m_uiPCMLog2MinSize = u;                                              }
-  uint32_t                    getPCMLog2MinSize() const                                                       { return  m_uiPCMLog2MinSize;                                          }
+  bool                    getTransformSkipEnabledFlag() const                                                 { return m_transformSkipEnabledFlag;                                   }
+  void                    setTransformSkipEnabledFlag( bool b )                                               { m_transformSkipEnabledFlag = b;                                      }
+  int                     getBDPCMEnabled() const                                                             { return m_BDPCMEnabled;                                               }
+  void                    setBDPCMEnabled(int val)                                                            { m_BDPCMEnabled = val;                                                }
   void                    setBitsForPOC( uint32_t u )                                                         { m_uiBitsForPOC = u;                                                  }
   uint32_t                    getBitsForPOC() const                                                           { return m_uiBitsForPOC;                                               }
   void                    setNumReorderPics(int i, uint32_t tlayer)                                           { m_numReorderPics[tlayer] = i;                                        }
   int                     getNumReorderPics(uint32_t tlayer) const                                            { return m_numReorderPics[tlayer];                                     }
-  void                    createRPSList( int numRPS );
-  const RPSList*          getRPSList() const                                                              { return &m_RPSList;                                                   }
-  RPSList*                getRPSList()                                                                    { return &m_RPSList;                                                   }
+  void                    createRPLList0(int numRPL);
+  void                    createRPLList1(int numRPL);
+  const RPLList*          getRPLList( bool b ) const                                                          { return b==1 ? &m_RPLList1 : &m_RPLList0;                             }
+  RPLList*                getRPLList( bool b )                                                                { return b==1 ? &m_RPLList1 : &m_RPLList0;                             }
+  uint32_t                getNumRPL( bool b ) const                                                           { return b==1 ? m_numRPL1   : m_numRPL0;                               }
+  const RPLList*          getRPLList0() const                                                                 { return &m_RPLList0;                                                  }
+  RPLList*                getRPLList0()                                                                       { return &m_RPLList0;                                                  }
+  const RPLList*          getRPLList1() const                                                                 { return &m_RPLList1;                                                  }
+  RPLList*                getRPLList1()                                                                       { return &m_RPLList1;                                                  }
+  uint32_t                getNumRPL0() const                                                                  { return m_numRPL0;                                                    }
+  uint32_t                getNumRPL1() const                                                                  { return m_numRPL1;                                                    }
+  void                    setRPL1CopyFromRPL0Flag(bool isCopy)                                                { m_rpl1CopyFromRpl0Flag = isCopy;                                     }
+  bool                    getRPL1CopyFromRPL0Flag() const                                                     { return m_rpl1CopyFromRpl0Flag;                                       }
+  bool                    getRPL1IdxPresentFlag() const                                                       { return m_rpl1IdxPresentFlag;                                         }
+  void                    setAllActiveRplEntriesHasSameSignFlag(bool isAllSame)                               { m_allRplEntriesHasSameSignFlag = isAllSame;                          }
+  bool                    getAllActiveRplEntriesHasSameSignFlag() const                                       { return m_allRplEntriesHasSameSignFlag;                               }
   bool                    getLongTermRefsPresent() const                                                  { return m_bLongTermRefsPresent;                                       }
   void                    setLongTermRefsPresent(bool b)                                                  { m_bLongTermRefsPresent=b;                                            }
   bool                    getSPSTemporalMVPEnabledFlag() const                                            { return m_SPSTemporalMVPEnabledFlag;                                  }
   void                    setSPSTemporalMVPEnabledFlag(bool b)                                            { m_SPSTemporalMVPEnabledFlag=b;                                       }
-#if MAX_TB_SIZE_SIGNALLING
   void                    setLog2MaxTbSize( uint32_t u )                                                  { m_log2MaxTbSize = u;                                                 }
   uint32_t                getLog2MaxTbSize() const                                                        { return  m_log2MaxTbSize;                                             }
   uint32_t                getMaxTbSize() const                                                            { return  1 << m_log2MaxTbSize;                                        }
-#endif
   // Bit-depth
   int                     getBitDepth(ChannelType type) const                                             { return m_bitDepths.recon[type];                                      }
   void                    setBitDepth(ChannelType type, int u )                                           { m_bitDepths.recon[type] = u;                                         }
@@ -1288,12 +1253,16 @@ public:
   int                     getDifferentialLumaChromaBitDepth() const                                       { return int(m_bitDepths.recon[CHANNEL_TYPE_LUMA]) - int(m_bitDepths.recon[CHANNEL_TYPE_CHROMA]); }
   int                     getQpBDOffset(ChannelType type) const                                           { return m_qpBDOffset[type];                                           }
   void                    setQpBDOffset(ChannelType type, int i)                                          { m_qpBDOffset[type] = i;                                              }
+  int                     getMinQpPrimeTsMinus4(ChannelType type) const                                         { return m_minQpMinus4[type];                                           }
+  void                    setMinQpPrimeTsMinus4(ChannelType type, int i)                                        { m_minQpMinus4[type] = i;                                              }
 
   void                    setSAOEnabledFlag(bool bVal)                                                    { m_saoEnabledFlag = bVal;                                                    }
   bool                    getSAOEnabledFlag() const                                                       { return m_saoEnabledFlag;                                                    }
 
   bool                    getALFEnabledFlag() const                                                       { return m_alfEnabledFlag; }
   void                    setALFEnabledFlag( bool b )                                                     { m_alfEnabledFlag = b; }
+  void                    setJointCbCrEnabledFlag(bool bVal)                                              { m_JointCbCrEnabledFlag = bVal; }
+  bool                    getJointCbCrEnabledFlag() const                                                 { return m_JointCbCrEnabledFlag; }
 
   bool                    getSBTMVPEnabledFlag() const                                                    { return m_sbtmvpEnabledFlag; }
   void                    setSBTMVPEnabledFlag(bool b)                                                    { m_sbtmvpEnabledFlag = b; }
@@ -1301,51 +1270,57 @@ public:
   void                    setBDOFEnabledFlag(bool b)                                                      { m_bdofEnabledFlag = b; }
   bool                    getBDOFEnabledFlag() const                                                      { return m_bdofEnabledFlag; }
 
-  bool                    getDisFracMmvdEnabledFlag() const                                               { return m_disFracMmvdEnabledFlag; }
-  void                    setDisFracMmvdEnabledFlag( bool b )                                             { m_disFracMmvdEnabledFlag = b;    }
+  bool                    getFpelMmvdEnabledFlag() const                                                  { return m_fpelMmvdEnabledFlag; }
+  void                    setFpelMmvdEnabledFlag( bool b )                                                { m_fpelMmvdEnabledFlag = b;    }
   bool                    getUseDMVR()const                                                               { return m_DMVR; }
   void                    setUseDMVR(bool b)                                                              { m_DMVR = b;    }
+  bool                    getUseMMVD()const                                                               { return m_MMVD; }
+  void                    setUseMMVD(bool b)                                                              { m_MMVD = b;    }
+  bool                    getBdofControlPresentFlag()const                                                { return m_BdofControlPresentFlag; }
+  void                    setBdofControlPresentFlag(bool b)                                               { m_BdofControlPresentFlag = b;    }
+
+  bool                    getDmvrControlPresentFlag()const                                                { return m_DmvrControlPresentFlag; }
+  void                    setDmvrControlPresentFlag(bool b)                                               { m_DmvrControlPresentFlag = b;    }
+
+  bool                    getProfControlPresentFlag()const                                                { return m_ProfControlPresentFlag; }
+  void                    setProfControlPresentFlag(bool b)                                               { m_ProfControlPresentFlag = b;    }
   uint32_t                getMaxTLayers() const                                                           { return m_uiMaxTLayers; }
   void                    setMaxTLayers( uint32_t uiMaxTLayers )                                          { CHECK( uiMaxTLayers > MAX_TLAYER, "Invalid number T-layers" ); m_uiMaxTLayers = uiMaxTLayers; }
 
   bool                    getTemporalIdNestingFlag() const                                                { return m_bTemporalIdNestingFlag;                                     }
   void                    setTemporalIdNestingFlag( bool bValue )                                         { m_bTemporalIdNestingFlag = bValue;                                   }
-  uint32_t                    getPCMBitDepth(ChannelType type) const                                          { return m_pcmBitDepths[type];                                         }
-  void                    setPCMBitDepth(ChannelType type, uint32_t u)                                        { m_pcmBitDepths[type] = u;                                            }
-  void                    setPCMFilterDisableFlag( bool bValue )                                          { m_bPCMFilterDisableFlag = bValue;                                    }
-  bool                    getPCMFilterDisableFlag() const                                                 { return m_bPCMFilterDisableFlag;                                      }
 
-#if HEVC_USE_SCALING_LISTS
   bool                    getScalingListFlag() const                                                      { return m_scalingListEnabledFlag;                                     }
   void                    setScalingListFlag( bool b )                                                    { m_scalingListEnabledFlag  = b;                                       }
-  bool                    getScalingListPresentFlag() const                                               { return m_scalingListPresentFlag;                                     }
-  void                    setScalingListPresentFlag( bool b )                                             { m_scalingListPresentFlag  = b;                                       }
-  ScalingList&            getScalingList()                                                                { return m_scalingList; }
-  const ScalingList&      getScalingList() const                                                          { return m_scalingList; }
-#endif
+  void                    setLoopFilterAcrossVirtualBoundariesDisabledFlag(bool b)                        { m_loopFilterAcrossVirtualBoundariesDisabledFlag = b;                 }
+  bool                    getLoopFilterAcrossVirtualBoundariesDisabledFlag() const                        { return m_loopFilterAcrossVirtualBoundariesDisabledFlag;              }
+  void                    setNumVerVirtualBoundaries(unsigned u)                                          { m_numVerVirtualBoundaries = u;                                       }
+  unsigned                getNumVerVirtualBoundaries() const                                              { return m_numVerVirtualBoundaries;                                    }
+  void                    setNumHorVirtualBoundaries(unsigned u)                                          { m_numHorVirtualBoundaries = u;                                       }
+  unsigned                getNumHorVirtualBoundaries() const                                              { return m_numHorVirtualBoundaries;                                    }
+  void                    setVirtualBoundariesPosX(unsigned u, unsigned idx)                              { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); m_virtualBoundariesPosX[idx] = u;    }
+  unsigned                getVirtualBoundariesPosX(unsigned idx) const                                    { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); return m_virtualBoundariesPosX[idx]; }
+  void                    setVirtualBoundariesPosY(unsigned u, unsigned idx)                              { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); m_virtualBoundariesPosY[idx] = u;    }
+  unsigned                getVirtualBoundariesPosY(unsigned idx) const                                    { CHECK( idx >= 3, "vitrual boundary index exceeds valid range" ); return m_virtualBoundariesPosY[idx]; }
   uint32_t                    getMaxDecPicBuffering(uint32_t tlayer) const                                        { return m_uiMaxDecPicBuffering[tlayer];                               }
   void                    setMaxDecPicBuffering( uint32_t ui, uint32_t tlayer )                                   { CHECK(tlayer >= MAX_TLAYER, "Invalid T-layer"); m_uiMaxDecPicBuffering[tlayer] = ui;    }
   uint32_t                    getMaxLatencyIncreasePlus1(uint32_t tlayer) const                                   { return m_uiMaxLatencyIncreasePlus1[tlayer];                          }
   void                    setMaxLatencyIncreasePlus1( uint32_t ui , uint32_t tlayer)                              { m_uiMaxLatencyIncreasePlus1[tlayer] = ui;                            }
 
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  void                    setUseStrongIntraSmoothing(bool bVal)                                           { m_useStrongIntraSmoothing = bVal;                                    }
-  bool                    getUseStrongIntraSmoothing() const                                              { return m_useStrongIntraSmoothing;                                    }
-
-#endif
   void                    setAffineAmvrEnabledFlag( bool val )                                            { m_affineAmvrEnabledFlag = val;                                       }
   bool                    getAffineAmvrEnabledFlag() const                                                { return m_affineAmvrEnabledFlag;                                      }
+  TimingInfo*             getTimingInfo()                                                                 { return &m_timingInfo; }
+  const TimingInfo*       getTimingInfo() const                                                           { return &m_timingInfo; }
+  bool                    getHrdParametersPresentFlag() const                                             { return m_hrdParametersPresentFlag; }
+  void                    setHrdParametersPresentFlag(bool b)                                             { m_hrdParametersPresentFlag = b; }
+  HRDParameters*          getHrdParameters()                                                              { return &m_hrdParameters; }
+  const HRDParameters*    getHrdParameters() const                                                        { return &m_hrdParameters; }
   bool                    getVuiParametersPresentFlag() const                                             { return m_vuiParametersPresentFlag;                                   }
   void                    setVuiParametersPresentFlag(bool b)                                             { m_vuiParametersPresentFlag = b;                                      }
   VUI*                    getVuiParameters()                                                              { return &m_vuiParameters;                                             }
   const VUI*              getVuiParameters() const                                                        { return &m_vuiParameters;                                             }
-#if !JVET_M0101_HLS
-  const PTL*              getPTL() const                                                                  { return &m_pcPTL;                                                     }
-  PTL*                    getPTL()                                                                        { return &m_pcPTL;                                                     }
-#else
   const ProfileTierLevel* getProfileTierLevel() const                                                     { return &m_profileTierLevel; }
   ProfileTierLevel*       getProfileTierLevel()                                                           { return &m_profileTierLevel; }
-#endif
 
   const SPSRExt&          getSpsRangeExtension() const                                                    { return m_spsRangeExtension;                                          }
   SPSRExt&                getSpsRangeExtension()                                                          { return m_spsRangeExtension;                                          }
@@ -1354,14 +1329,18 @@ public:
   bool                    getWrapAroundEnabledFlag() const                                                { return m_wrapAroundEnabledFlag;                                      }
   void                    setWrapAroundOffset(unsigned offset)                                            { m_wrapAroundOffset = offset;                                         }
   unsigned                getWrapAroundOffset() const                                                     { return m_wrapAroundOffset;                                           }
-  void                    setUseReshaper(bool b)                                                          { m_lumaReshapeEnable = b;                                                   }
-  bool                    getUseReshaper() const                                                          { return m_lumaReshapeEnable;                                                }
+  void                    setUseLmcs(bool b)                                                              { m_lmcsEnabled = b;                                                   }
+  bool                    getUseLmcs() const                                                              { return m_lmcsEnabled;                                                }
   void                    setIBCFlag(unsigned IBCFlag)                                                    { m_IBCFlag = IBCFlag; }
   unsigned                getIBCFlag() const                                                              { return m_IBCFlag; }
+  void                    setUseColorTrans(bool value) { m_useColorTrans = value; }
+  bool                    getUseColorTrans() const { return m_useColorTrans; }
+  void                    setPLTMode(unsigned PLTMode)                                                    { m_PLTMode = PLTMode; }
+  unsigned                getPLTMode() const                                                              { return m_PLTMode; }
   void                    setUseSBT( bool b )                                                             { m_SBT = b; }
   bool                    getUseSBT() const                                                               { return m_SBT; }
-  void                    setMaxSbtSize( uint8_t val )                                                    { m_MaxSbtSize = val; }
-  uint8_t                 getMaxSbtSize() const                                                           { return m_MaxSbtSize; }
+  void                    setUseISP( bool b )                                                             { m_ISP = b; }
+  bool                    getUseISP() const                                                               { return m_ISP; }
 
   void      setAMVREnabledFlag    ( bool b )                                        { m_AMVREnabledFlag = b; }
   bool      getAMVREnabledFlag    ()                                      const     { return m_AMVREnabledFlag; }
@@ -1369,19 +1348,28 @@ public:
   bool      getUseAffine          ()                                      const     { return m_Affine; }
   void      setUseAffineType      ( bool b )                                        { m_AffineType = b; }
   bool      getUseAffineType      ()                                      const     { return m_AffineType; }
+  void      setUsePROF            ( bool b )                                        { m_PROF = b; }
+  bool      getUsePROF            ()                                      const     { return m_PROF; }
   void      setUseLMChroma        ( bool b )                                        { m_LMChroma = b; }
   bool      getUseLMChroma        ()                                      const     { return m_LMChroma; }
-  void      setCclmCollocatedChromaFlag( bool b )                                   { m_cclmCollocatedChromaFlag = b; }
-  bool      getCclmCollocatedChromaFlag()                                 const     { return m_cclmCollocatedChromaFlag; }
+  void      setHorCollocatedChromaFlag( bool b )                                    { m_horCollocatedChromaFlag = b;    }
+  bool      getHorCollocatedChromaFlag()                                  const     { return m_horCollocatedChromaFlag; }
+  void      setVerCollocatedChromaFlag( bool b )                                    { m_verCollocatedChromaFlag = b;    }
+  bool      getVerCollocatedChromaFlag()                                  const     { return m_verCollocatedChromaFlag; }
+  bool      getCclmCollocatedChromaFlag()                                 const     { return m_verCollocatedChromaFlag; }
   void      setUseMTS             ( bool b )                                        { m_MTS = b; }
   bool      getUseMTS             ()                                      const     { return m_MTS; }
-  bool      getUseImplicitMTS     ()                                      const     { return m_MTS && !m_IntraMTS && !m_InterMTS; }
+  bool      getUseImplicitMTS     ()                                      const     { return m_MTS && !m_IntraMTS; }
   void      setUseIntraMTS        ( bool b )                                        { m_IntraMTS = b; }
   bool      getUseIntraMTS        ()                                      const     { return m_IntraMTS; }
   void      setUseInterMTS        ( bool b )                                        { m_InterMTS = b; }
   bool      getUseInterMTS        ()                                      const     { return m_InterMTS; }
-  void      setUseGBi             ( bool b )                                        { m_GBi = b; }
-  bool      getUseGBi             ()                                      const     { return m_GBi; }
+  void      setUseLFNST           ( bool b )                                        { m_LFNST = b; }
+  bool      getUseLFNST           ()                                      const     { return m_LFNST; }
+  void      setUseSMVD(bool b)                                                      { m_SMVD = b; }
+  bool      getUseSMVD()                                                  const     { return m_SMVD; }
+  void      setUseBcw             ( bool b )                                        { m_bcw = b; }
+  bool      getUseBcw             ()                                      const     { return m_bcw; }
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   void      setLadfEnabled        ( bool b )                                        { m_LadfEnabled = b; }
   bool      getLadfEnabled        ()                                      const     { return m_LadfEnabled; }
@@ -1393,51 +1381,45 @@ public:
   int       getLadfIntervalLowerBound( int idx )                          const     { return m_LadfIntervalLowerBound[ idx ]; }
 #endif
 
-  void      setUseMHIntra         ( bool b )                                        { m_MHIntra = b; }
-  bool      getUseMHIntra         ()                                      const     { return m_MHIntra; }
+  void      setUseCiip         ( bool b )                                        { m_ciip = b; }
+  bool      getUseCiip         ()                                      const     { return m_ciip; }
   void      setUseTriangle        ( bool b )                                        { m_Triangle = b; }
   bool      getUseTriangle        ()                                      const     { return m_Triangle; }
+  void      setUseMRL             ( bool b )                                        { m_MRL = b; }
+  bool      getUseMRL             ()                                      const     { return m_MRL; }
+  void      setUseMIP             ( bool b )                                        { m_MIP = b; }
+  bool      getUseMIP             ()                                      const     { return m_MIP; }
+
+  bool      getUseWP              ()                                      const     { return m_useWeightPred; }
+  bool      getUseWPBiPred        ()                                      const     { return m_useWeightedBiPred; }
+  void      setUseWP              ( bool b )                                        { m_useWeightPred = b; }
+  void      setUseWPBiPred        ( bool b )                                        { m_useWeightedBiPred = b; }
+  void      setChromaQpMappingTableFromParams(const ChromaQpMappingTableParams &params, const int qpBdOffset)   { m_chromaQpMappingTable.setParams(params, qpBdOffset); }
+  void      derivedChromaQPMappingTables()                                          { m_chromaQpMappingTable.derivedChromaQPMappingTables(); }
+  const ChromaQpMappingTable& getChromaQpMappingTable()                   const     { return m_chromaQpMappingTable;}
+  int       getMappedChromaQpValue(ComponentID compID, int qpVal)         const     { return m_chromaQpMappingTable.getMappedChromaQpValue(compID, qpVal); }
+  void setGDREnabledFlag(bool flag) { m_GDREnabledFlag = flag; }
+  bool getGDREnabledFlag() const { return m_GDREnabledFlag; }
+  void      setSubLayerParametersPresentFlag(bool flag)                             { m_SubLayerCbpParametersPresentFlag = flag; }
+  bool      getSubLayerParametersPresentFlag()                            const     { return m_SubLayerCbpParametersPresentFlag;  }
+
+  bool      getRprEnabledFlag()                                           const     { return m_rprEnabledFlag; }
+  void      setRprEnabledFlag( bool flag )                                          { m_rprEnabledFlag = flag; }
+  bool      getInterLayerPresentFlag()                                        const { return m_interLayerPresentFlag; }
+  void      setInterLayerPresentFlag( bool b )                                      { m_interLayerPresentFlag = b; }
+
 };
 
 
 /// Reference Picture Lists class
 
-class RefPicListModification
-{
-private:
-  bool m_refPicListModificationFlagL0;
-  bool m_refPicListModificationFlagL1;
-  uint32_t m_RefPicSetIdxL0[REF_PIC_LIST_NUM_IDX];
-  uint32_t m_RefPicSetIdxL1[REF_PIC_LIST_NUM_IDX];
-
-public:
-          RefPicListModification();
-  virtual ~RefPicListModification();
-
-  bool    getRefPicListModificationFlagL0() const        { return m_refPicListModificationFlagL0;                                  }
-  void    setRefPicListModificationFlagL0(bool flag)     { m_refPicListModificationFlagL0 = flag;                                  }
-  bool    getRefPicListModificationFlagL1() const        { return m_refPicListModificationFlagL1;                                  }
-  void    setRefPicListModificationFlagL1(bool flag)     { m_refPicListModificationFlagL1 = flag;                                  }
-  uint32_t    getRefPicSetIdxL0(uint32_t idx) const              { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); return m_RefPicSetIdxL0[idx];         }
-  void    setRefPicSetIdxL0(uint32_t idx, uint32_t refPicSetIdx) { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); m_RefPicSetIdxL0[idx] = refPicSetIdx; }
-  uint32_t    getRefPicSetIdxL1(uint32_t idx) const              { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); return m_RefPicSetIdxL1[idx];         }
-  void    setRefPicSetIdxL1(uint32_t idx, uint32_t refPicSetIdx) { CHECK(idx>=REF_PIC_LIST_NUM_IDX, "Invalid ref-pic-list index"); m_RefPicSetIdxL1[idx] = refPicSetIdx; }
-};
-
-
 
 /// PPS RExt class
 class PPSRExt // Names aligned to text specification
 {
 private:
-  int              m_log2MaxTransformSkipBlockSize;
   bool             m_crossComponentPredictionEnabledFlag;
 
-  // Chroma QP Adjustments
-  int              m_cuChromaQpOffsetSubdiv;
-  int              m_chromaQpOffsetListLen; // size (excludes the null entry used in the following array).
-  ChromaQpAdj      m_ChromaQpAdjTableIncludingNullEntry[1+MAX_QP_OFFSET_LIST_SIZE]; //!< Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise
-
   uint32_t             m_log2SaoOffsetScale[MAX_NUM_CHANNEL_TYPE];
 
 public:
@@ -1445,41 +1427,14 @@ public:
 
   bool settingsDifferFromDefaults(const bool bTransformSkipEnabledFlag) const
   {
-    return (bTransformSkipEnabledFlag && (getLog2MaxTransformSkipBlockSize() !=2))
-        || (getCrossComponentPredictionEnabledFlag() )
-        || (getChromaQpOffsetListEnabledFlag() )
+    return (getCrossComponentPredictionEnabledFlag() )
         || (getLog2SaoOffsetScale(CHANNEL_TYPE_LUMA) !=0 )
         || (getLog2SaoOffsetScale(CHANNEL_TYPE_CHROMA) !=0 );
   }
 
-  uint32_t                   getLog2MaxTransformSkipBlockSize() const                         { return m_log2MaxTransformSkipBlockSize;         }
-  void                   setLog2MaxTransformSkipBlockSize( uint32_t u )                       { m_log2MaxTransformSkipBlockSize  = u;           }
-
   bool                   getCrossComponentPredictionEnabledFlag() const                   { return m_crossComponentPredictionEnabledFlag;   }
   void                   setCrossComponentPredictionEnabledFlag(bool value)               { m_crossComponentPredictionEnabledFlag = value;  }
 
-  void                   clearChromaQpOffsetList()                                        { m_chromaQpOffsetListLen = 0;                    }
-
-  uint32_t               getCuChromaQpOffsetSubdiv () const                               { return m_cuChromaQpOffsetSubdiv;                }
-  void                   setCuChromaQpOffsetSubdiv ( uint32_t u )                         { m_cuChromaQpOffsetSubdiv = u;                   }
-
-  bool                   getChromaQpOffsetListEnabledFlag() const                         { return getChromaQpOffsetListLen()>0;            }
-  int                    getChromaQpOffsetListLen() const                                 { return m_chromaQpOffsetListLen;                 }
-
-  const ChromaQpAdj&     getChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1 ) const
-  {
-    CHECK(cuChromaQpOffsetIdxPlus1 >= m_chromaQpOffsetListLen+1, "Invalid chroma QP offset");
-    return m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1]; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise
-  }
-
-  void                   setChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1, int cbOffset, int crOffset )
-  {
-    CHECK(cuChromaQpOffsetIdxPlus1 == 0 || cuChromaQpOffsetIdxPlus1 > MAX_QP_OFFSET_LIST_SIZE, "Invalid chroma QP offset");
-    m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CbOffset = cbOffset; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise
-    m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CrOffset = crOffset;
-    m_chromaQpOffsetListLen = std::max(m_chromaQpOffsetListLen, cuChromaQpOffsetIdxPlus1);
-  }
-
   // Now: getPpsRangeExtension().getLog2SaoOffsetScale and getPpsRangeExtension().setLog2SaoOffsetScale
   uint32_t                   getLog2SaoOffsetScale(ChannelType type) const                    { return m_log2SaoOffsetScale[type];             }
   void                   setLog2SaoOffsetScale(ChannelType type, uint32_t uiBitShift)         { m_log2SaoOffsetScale[type] = uiBitShift;       }
@@ -1495,54 +1450,86 @@ private:
   int              m_SPSId;                    // seq_parameter_set_id
   int              m_picInitQPMinus26;
   bool             m_useDQP;
-  bool             m_bConstrainedIntraPred;    // constrained_intra_pred_flag
   bool             m_bSliceChromaQpFlag;       // slicelevel_chroma_qp_flag
 
+  int              m_layerId;
+  int              m_temporalId;
+
   // access channel
-  uint32_t         m_cuQpDeltaSubdiv;           // cu_qp_delta_subdiv
 
   int              m_chromaCbQpOffset;
   int              m_chromaCrQpOffset;
+  bool             m_chromaJointCbCrQpOffsetPresentFlag;
+  int              m_chromaCbCrQpOffset;
+
+  // Chroma QP Adjustments
+  int              m_chromaQpOffsetListLen; // size (excludes the null entry used in the following array).
+  ChromaQpAdj      m_ChromaQpAdjTableIncludingNullEntry[1+MAX_QP_OFFSET_LIST_SIZE]; //!< Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise
 
   uint32_t             m_numRefIdxL0DefaultActive;
   uint32_t             m_numRefIdxL1DefaultActive;
 
+  bool             m_rpl1IdxPresentFlag;
+
   bool             m_bUseWeightPred;                    //!< Use of Weighting Prediction (P_SLICE)
   bool             m_useWeightedBiPred;                 //!< Use of Weighting Bi-Prediction (B_SLICE)
   bool             m_OutputFlagPresentFlag;             //!< Indicates the presence of output_flag in slice header
-  bool             m_TransquantBypassEnabledFlag;       //!< Indicates presence of cu_transquant_bypass_flag in CUs.
-  bool             m_useTransformSkip;
-#if HEVC_DEPENDENT_SLICES
-  bool             m_dependentSliceSegmentsEnabledFlag; //!< Indicates the presence of dependent slices
-#endif
-#if HEVC_TILES_WPP
-  bool             m_tilesEnabledFlag;                  //!< Indicates the presence of tiles
+  uint8_t          m_numSubPics;                        //!< number of sub-pictures used - must match SPS
+  bool             m_subPicIdSignallingPresentFlag;     //!< indicates the presence of sub-picture ID signalling in the PPS
+  uint32_t         m_subPicIdLen;                       //!< sub-picture ID length in bits
+  uint8_t          m_subPicId[MAX_NUM_SUB_PICS];        //!< sub-picture ID for each sub-picture in the sequence
+  bool             m_noPicPartitionFlag;                //!< no picture partitioning flag - single slice, single tile
+  uint8_t          m_log2CtuSize;                       //!< log2 of the CTU size - required to match corresponding value in SPS
+  uint8_t          m_ctuSize;                           //!< CTU size
+  uint32_t         m_picWidthInCtu;                     //!< picture width in units of CTUs
+  uint32_t         m_picHeightInCtu;                    //!< picture height in units of CTUs
+  uint32_t         m_numExpTileCols;                    //!< number of explicitly specified tile columns
+  uint32_t         m_numExpTileRows;                    //!< number of explicitly specified tile rows
+  uint32_t         m_numTileCols;                       //!< number of tile columns
+  uint32_t         m_numTileRows;                       //!< number of tile rows
+  std::vector<uint32_t> m_tileColWidth;                 //!< tile column widths in units of CTUs
+  std::vector<uint32_t> m_tileRowHeight;                //!< tile row heights in units of CTUs
+  std::vector<uint32_t> m_tileColBd;                    //!< tile column left-boundaries in units of CTUs
+  std::vector<uint32_t> m_tileRowBd;                    //!< tile row top-boundaries in units of CTUs  
+  std::vector<uint32_t> m_ctuToTileCol;                 //!< mapping between CTU horizontal address and tile column index
+  std::vector<uint32_t> m_ctuToTileRow;                 //!< mapping between CTU vertical address and tile row index
+  bool             m_rectSliceFlag;                     //!< rectangular slice flag  
+  bool             m_singleSlicePerSubPicFlag;          //!< single slice per sub-picture flag
+  std::vector<uint32_t> m_ctuToSubPicIdx;               //!< mapping between CTU and Sub-picture index
+  uint32_t         m_numSlicesInPic;                    //!< number of rectangular slices in the picture (raster-scan slice specified at slice level)
+  bool             m_tileIdxDeltaPresentFlag;           //!< tile index delta present flag
+  std::vector<RectSlice> m_rectSlices;                  //!< list of rectangular slice signalling parameters
+  std::vector<SliceMap>  m_sliceMap;                    //!< list of CTU maps for each slice in the picture
+  bool             m_loopFilterAcrossTilesEnabledFlag;  //!< loop filtering applied across tiles flag
+  bool             m_loopFilterAcrossSlicesEnabledFlag; //!< loop filtering applied across slices flag
+  int              m_log2MaxTransformSkipBlockSize;
   bool             m_entropyCodingSyncEnabledFlag;      //!< Indicates the presence of wavefronts
 
-  bool             m_loopFilterAcrossTilesEnabledFlag;
-  bool             m_uniformSpacingFlag;
-  int              m_numTileColumnsMinus1;
-  int              m_numTileRowsMinus1;
-  std::vector<int> m_tileColumnWidth;
-  std::vector<int> m_tileRowHeight;
-#endif
+  bool              m_constantSliceHeaderParamsEnabledFlag;
+  int               m_PPSDepQuantEnabledIdc;
+  int               m_PPSRefPicListSPSIdc0;
+  int               m_PPSRefPicListSPSIdc1;
+  int               m_PPSMvdL1ZeroIdc;
+  int               m_PPSCollocatedFromL0Idc;
+  uint32_t          m_PPSSixMinusMaxNumMergeCandPlus1;
+  uint32_t          m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1;
 
   bool             m_cabacInitPresentFlag;
 
+  bool             m_pictureHeaderExtensionPresentFlag;   //< picture header extension flags present in picture headers or not
   bool             m_sliceHeaderExtensionPresentFlag;
-  bool             m_loopFilterAcrossSlicesEnabledFlag;
   bool             m_deblockingFilterControlPresentFlag;
   bool             m_deblockingFilterOverrideEnabledFlag;
   bool             m_ppsDeblockingFilterDisabledFlag;
   int              m_deblockingFilterBetaOffsetDiv2;    //< beta offset for deblocking filter
   int              m_deblockingFilterTcOffsetDiv2;      //< tc offset for deblocking filter
-#if HEVC_USE_SCALING_LISTS
-  bool             m_scalingListPresentFlag;
-  ScalingList      m_scalingList;                       //!< ScalingList class
-#endif
   bool             m_listsModificationPresentFlag;
-  uint32_t             m_log2ParallelMergeLevelMinus2;
-  int              m_numExtraSliceHeaderBits;
+
+
+  uint32_t         m_picWidthInLumaSamples;
+  uint32_t         m_picHeightInLumaSamples;
+  Window           m_conformanceWindow;
+  Window           m_scalingWindow;
 
   PPSRExt          m_ppsRangeExtension;
 
@@ -1558,17 +1545,21 @@ public:
   int                    getSPSId() const                                                 { return m_SPSId;                               }
   void                   setSPSId(int i)                                                  { m_SPSId = i;                                  }
 
+  void                   setTemporalId( int i )                                           { m_temporalId = i;                             }
+  int                    getTemporalId()                                            const { return m_temporalId;                          }
+  void                   setLayerId( int i )                                              { m_layerId = i;                                }
+  int                    getLayerId()                                               const { return m_layerId;                             }
+
   int                    getPicInitQPMinus26() const                                      { return  m_picInitQPMinus26;                   }
   void                   setPicInitQPMinus26( int i )                                     { m_picInitQPMinus26 = i;                       }
   bool                   getUseDQP() const                                                { return m_useDQP;                              }
   void                   setUseDQP( bool b )                                              { m_useDQP   = b;                               }
-  bool                   getConstrainedIntraPred() const                                  { return  m_bConstrainedIntraPred;              }
-  void                   setConstrainedIntraPred( bool b )                                { m_bConstrainedIntraPred = b;                  }
   bool                   getSliceChromaQpFlag() const                                     { return  m_bSliceChromaQpFlag;                 }
   void                   setSliceChromaQpFlag( bool b )                                   { m_bSliceChromaQpFlag = b;                     }
 
-  void                   setCuQpDeltaSubdiv( uint32_t u )                                 { m_cuQpDeltaSubdiv = u;                         }
-  uint32_t               getCuQpDeltaSubdiv() const                                       { return m_cuQpDeltaSubdiv;                      }
+
+  bool                   getJointCbCrQpOffsetPresentFlag() const                          { return m_chromaJointCbCrQpOffsetPresentFlag;   }
+  void                   setJointCbCrQpOffsetPresentFlag(bool b)                          { m_chromaJointCbCrQpOffsetPresentFlag = b;      } 
 
   void                   setQpOffset(ComponentID compID, int i )
   {
@@ -1580,6 +1571,10 @@ public:
     {
       m_chromaCrQpOffset = i;
     }
+    else if (compID==JOINT_CbCr)
+    {
+      m_chromaCbCrQpOffset = i;
+    }
     else
     {
       THROW( "Invalid chroma QP offset" );
@@ -1587,7 +1582,26 @@ public:
   }
   int                    getQpOffset(ComponentID compID) const
   {
-    return (compID==COMPONENT_Y) ? 0 : (compID==COMPONENT_Cb ? m_chromaCbQpOffset : m_chromaCrQpOffset );
+    return (compID==COMPONENT_Y) ? 0 : (compID==COMPONENT_Cb ? m_chromaCbQpOffset : compID==COMPONENT_Cr ? m_chromaCrQpOffset : m_chromaCbCrQpOffset );
+  }
+
+  bool                   getCuChromaQpOffsetEnabledFlag() const                           { return getChromaQpOffsetListLen()>0;            }
+  int                    getChromaQpOffsetListLen() const                                 { return m_chromaQpOffsetListLen;                 }
+  void                   clearChromaQpOffsetList()                                        { m_chromaQpOffsetListLen = 0;                    }
+
+  const ChromaQpAdj&     getChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1 ) const
+  {
+    CHECK(cuChromaQpOffsetIdxPlus1 >= m_chromaQpOffsetListLen+1, "Invalid chroma QP offset");
+    return m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1]; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise
+  }
+
+  void                   setChromaQpOffsetListEntry( int cuChromaQpOffsetIdxPlus1, int cbOffset, int crOffset, int jointCbCrOffset )
+  {
+    CHECK(cuChromaQpOffsetIdxPlus1 == 0 || cuChromaQpOffsetIdxPlus1 > MAX_QP_OFFSET_LIST_SIZE, "Invalid chroma QP offset");
+    m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CbOffset = cbOffset; // Array includes entry [0] for the null offset used when cu_chroma_qp_offset_flag=0, and entries [cu_chroma_qp_offset_idx+1...] otherwise
+    m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.CrOffset = crOffset;
+    m_ChromaQpAdjTableIncludingNullEntry[cuChromaQpOffsetIdxPlus1].u.comp.JointCbCrOffset = jointCbCrOffset;
+    m_chromaQpOffsetListLen = std::max(m_chromaQpOffsetListLen, cuChromaQpOffsetIdxPlus1);
   }
 
   void                   setNumRefIdxL0DefaultActive(uint32_t ui)                             { m_numRefIdxL0DefaultActive=ui;                }
@@ -1595,6 +1609,9 @@ public:
   void                   setNumRefIdxL1DefaultActive(uint32_t ui)                             { m_numRefIdxL1DefaultActive=ui;                }
   uint32_t                   getNumRefIdxL1DefaultActive() const                              { return m_numRefIdxL1DefaultActive;            }
 
+  void                   setRpl1IdxPresentFlag(bool isPresent)                            { m_rpl1IdxPresentFlag = isPresent;             }
+  uint32_t               getRpl1IdxPresentFlag() const                                    { return m_rpl1IdxPresentFlag;                  }
+
   bool                   getUseWP() const                                                 { return m_bUseWeightPred;                      }
   bool                   getWPBiPred() const                                              { return m_useWeightedBiPred;                   }
   void                   setUseWP( bool b )                                               { m_bUseWeightPred = b;                         }
@@ -1602,37 +1619,105 @@ public:
 
   void                   setOutputFlagPresentFlag( bool b )                               { m_OutputFlagPresentFlag = b;                  }
   bool                   getOutputFlagPresentFlag() const                                 { return m_OutputFlagPresentFlag;               }
-  void                   setTransquantBypassEnabledFlag( bool b )                         { m_TransquantBypassEnabledFlag = b;            }
-  bool                   getTransquantBypassEnabledFlag() const                           { return m_TransquantBypassEnabledFlag;         }
+  void                   setNumSubPics( uint8_t u )                                       { m_numSubPics = u;                             }
+  uint8_t                getNumSubPics( ) const                                           { return  m_numSubPics;                         }
+  void                   setSubPicIdSignallingPresentFlag( bool b )                       { m_subPicIdSignallingPresentFlag = b;          }
+  bool                   getSubPicIdSignallingPresentFlag() const                         { return  m_subPicIdSignallingPresentFlag;      }
+  void                   setSubPicIdLen( uint32_t u )                                     { m_subPicIdLen = u;                            }
+  uint32_t               getSubPicIdLen() const                                           { return  m_subPicIdLen;                        }
+  void                   setSubPicId( int i, uint8_t u )                                  { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); m_subPicId[i] = u;     }
+  uint8_t                getSubPicId( int i ) const                                       { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-picture index exceeds valid range" ); return  m_subPicId[i]; }
+  void                   setNoPicPartitionFlag( bool b )                                  { m_noPicPartitionFlag = b;                     }
+  bool                   getNoPicPartitionFlag( ) const                                   { return  m_noPicPartitionFlag;                 }
+  void                   setLog2CtuSize( uint8_t u )                                      { m_log2CtuSize = u; m_ctuSize = 1 << m_log2CtuSize; 
+                                                                                            m_picWidthInCtu = (m_picWidthInLumaSamples  + m_ctuSize - 1) / m_ctuSize;  
+                                                                                            m_picHeightInCtu = (m_picHeightInLumaSamples  + m_ctuSize - 1) / m_ctuSize; }
+  uint8_t                getLog2CtuSize( ) const                                          { return  m_log2CtuSize;                        }
+  uint8_t                getCtuSize( ) const                                              { return  m_ctuSize;                            }
+  uint8_t                getPicWidthInCtu( ) const                                        { return  m_picWidthInCtu;                      }
+  uint8_t                getPicHeightInCtu( ) const                                       { return  m_picHeightInCtu;                     }
+  void                   setNumExpTileColumns( uint32_t u )                               { m_numExpTileCols = u;                         }
+  uint32_t               getNumExpTileColumns( ) const                                    { return  m_numExpTileCols;                     }
+  void                   setNumExpTileRows( uint32_t u )                                  { m_numExpTileRows = u;                         }
+  uint32_t               getNumExpTileRows( ) const                                       { return  m_numExpTileRows;                     }
+  void                   setNumTileColumns( uint32_t u )                                  { m_numTileCols = u;                            }
+  uint32_t               getNumTileColumns( ) const                                       { return  m_numTileCols;                        }
+  void                   setNumTileRows( uint32_t u )                                     { m_numTileRows = u;                            }
+  uint32_t               getNumTileRows( ) const                                          { return  m_numTileRows;                        }
+  uint32_t               getNumTiles( ) const                                             { return  m_numTileCols * m_numTileRows;        }
+  void                   setTileColumnWidths( std::vector<uint32_t> widths )              { m_tileColWidth = widths;                      }
+  void                   setTileRowHeights( std::vector<uint32_t> heights )               { m_tileRowHeight = heights;                    }
+  void                   addTileColumnWidth( uint32_t u )                                 { CHECK( m_tileColWidth.size()  >= MAX_TILE_COLS, "Number of tile columns exceeds valid range" ); m_tileColWidth.push_back(u);    }
+  void                   addTileRowHeight( uint32_t u )                                   { CHECK( m_tileRowHeight.size() >= MAX_TILE_ROWS, "Number of tile rows exceeds valid range" );    m_tileRowHeight.push_back(u);   }
+  uint32_t               getTileColumnWidth( int idx ) const                              { CHECK( idx >= m_tileColWidth.size(), "Tile column index exceeds valid range" );                 return  m_tileColWidth[idx];    }
+  uint32_t               getTileRowHeight( int idx ) const                                { CHECK( idx >= m_tileRowHeight.size(), "Tile row index exceeds valid range" );                   return  m_tileRowHeight[idx];   }  
+  uint32_t               getTileColumnBd( int idx ) const                                 { CHECK( idx >= m_tileColBd.size(), "Tile column index exceeds valid range" );                    return  m_tileColBd[idx];       }
+  uint32_t               getTileRowBd( int idx ) const                                    { CHECK( idx >= m_tileRowBd.size(), "Tile row index exceeds valid range" );                       return  m_tileRowBd[idx];       }
+  uint32_t               ctuToTileCol( int ctuX ) const                                   { CHECK( ctuX >= m_ctuToTileCol.size(), "CTU address index exceeds valid range" ); return  m_ctuToTileCol[ctuX];                  }
+  uint32_t               ctuToTileRow( int ctuY ) const                                   { CHECK( ctuY >= m_ctuToTileRow.size(), "CTU address index exceeds valid range" ); return  m_ctuToTileRow[ctuY];                  }
+  uint32_t               ctuToTileColBd( int ctuX ) const                                 { return  getTileColumnBd(ctuToTileCol( ctuX ));                                                                                  }
+  uint32_t               ctuToTileRowBd( int ctuY ) const                                 { return  getTileRowBd(ctuToTileRow( ctuY ));                                                                                     }
+  bool                   ctuIsTileColBd( int ctuX ) const                                 { return  ctuX == ctuToTileColBd( ctuX );                                                                                         }
+  bool                   ctuIsTileRowBd( int ctuY ) const                                 { return  ctuY == ctuToTileRowBd( ctuY );                                                                                         }
+  uint32_t               getTileIdx( uint32_t ctuX, uint32_t ctuY ) const                 { return (ctuToTileRow( ctuY ) * getNumTileColumns()) + ctuToTileCol( ctuX );                                                     }
+  uint32_t               getTileIdx( uint32_t ctuRsAddr) const                            { return getTileIdx( ctuRsAddr % m_picWidthInCtu,  ctuRsAddr / m_picWidthInCtu );                                                 }
+  uint32_t               getTileIdx( const Position& pos ) const                          { return getTileIdx( pos.x / m_ctuSize, pos.y / m_ctuSize );                                                                      }
+  void                   setRectSliceFlag( bool b )                                       { m_rectSliceFlag = b;                                                                                                            }
+  bool                   getRectSliceFlag( ) const                                        { return  m_rectSliceFlag;                                                                                                        }
+  void                   setSingleSlicePerSubPicFlag( bool b )                            { m_singleSlicePerSubPicFlag = b;                                                                                                 }
+  bool                   getSingleSlicePerSubPicFlag( ) const                             { return  m_singleSlicePerSubPicFlag;                                                                                             }
+  uint32_t               getCtuToSubPicIdx( int idx ) const                               { CHECK( idx >= m_ctuToSubPicIdx.size(), "CTU address index exceeds valid range" ); return  m_ctuToSubPicIdx[idx];                }
+  void                   setNumSlicesInPic( uint32_t u )                                  { CHECK( u > MAX_SLICES, "Number of slices in picture exceeds valid range" ); m_numSlicesInPic = u;                               }
+  uint32_t               getNumSlicesInPic( ) const                                       { return  m_numSlicesInPic;                                                                                                       }
+  void                   setTileIdxDeltaPresentFlag( bool b )                             { m_tileIdxDeltaPresentFlag = b;                                                                                                  }
+  bool                   getTileIdxDeltaPresentFlag( ) const                              { return  m_tileIdxDeltaPresentFlag;                                                                                              }
+  void                   setSliceWidthInTiles( int idx, uint32_t u )                      { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    m_rectSlices[idx].setSliceWidthInTiles( u );            }
+  uint32_t               getSliceWidthInTiles( int idx ) const                            { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    return  m_rectSlices[idx].getSliceWidthInTiles( );      }
+  void                   setSliceHeightInTiles( int idx, uint32_t u )                     { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    m_rectSlices[idx].setSliceHeightInTiles( u );           }
+  uint32_t               getSliceHeightInTiles( int idx ) const                           { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    return  m_rectSlices[idx].getSliceHeightInTiles( );     }
+  void                   setNumSlicesInTile( int idx, uint32_t u )                        { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    m_rectSlices[idx].setNumSlicesInTile( u );              }
+  uint32_t               getNumSlicesInTile( int idx ) const                              { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    return  m_rectSlices[idx].getNumSlicesInTile( );        }
+  void                   setSliceHeightInCtu( int idx, uint32_t u )                       { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    m_rectSlices[idx].setSliceHeightInCtu( u );             }
+  uint32_t               getSliceHeightInCtu( int idx ) const                             { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    return  m_rectSlices[idx].getSliceHeightInCtu( );       }
+  void                   setSliceTileIdx(  int idx, uint32_t u )                          { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    m_rectSlices[idx].setTileIdx( u );                      }
+  uint32_t               getSliceTileIdx( int idx ) const                                 { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    return  m_rectSlices[idx].getTileIdx( );                }
+  void                   setRectSlices( std::vector<RectSlice> rectSlices )               { m_rectSlices = rectSlices;                                                                                                      }
+  void                   setLoopFilterAcrossTilesEnabledFlag( bool b )                    { m_loopFilterAcrossTilesEnabledFlag = b;                                                                                         }
+  bool                   getLoopFilterAcrossTilesEnabledFlag( ) const                     { return  m_loopFilterAcrossTilesEnabledFlag;                                                                                     }
+  void                   setLoopFilterAcrossSlicesEnabledFlag( bool b )                   { m_loopFilterAcrossSlicesEnabledFlag = b;                                                                                        }
+  bool                   getLoopFilterAcrossSlicesEnabledFlag( ) const                    { return  m_loopFilterAcrossSlicesEnabledFlag;                                                                                    }
+  void                   resetTileSliceInfo();
+  void                   initTiles();
+  void                   initRectSlices();
+  void                   initRectSliceMap();
+  void                   initRasterSliceMap( std::vector<uint32_t> sizes );
+  void                   checkSliceMap(); 
+  SliceMap               getSliceMap( int idx ) const                                     { CHECK( idx >= m_numSlicesInPic, "Slice index exceeds valid range" );    return m_sliceMap[idx];                             }
+
+  uint32_t               getLog2MaxTransformSkipBlockSize() const                         { return m_log2MaxTransformSkipBlockSize; }
+  void                   setLog2MaxTransformSkipBlockSize(uint32_t u)                     { m_log2MaxTransformSkipBlockSize = u; }
 
-  bool                   getUseTransformSkip() const                                      { return m_useTransformSkip;                    }
-  void                   setUseTransformSkip( bool b )                                    { m_useTransformSkip  = b;                      }
-
-#if HEVC_TILES_WPP
-  void                   setLoopFilterAcrossTilesEnabledFlag(bool b)                      { m_loopFilterAcrossTilesEnabledFlag = b;       }
-  bool                   getLoopFilterAcrossTilesEnabledFlag() const                      { return m_loopFilterAcrossTilesEnabledFlag;    }
-#endif
-#if HEVC_DEPENDENT_SLICES
-  bool                   getDependentSliceSegmentsEnabledFlag() const                     { return m_dependentSliceSegmentsEnabledFlag;   }
-  void                   setDependentSliceSegmentsEnabledFlag(bool val)                   { m_dependentSliceSegmentsEnabledFlag = val;    }
-#endif
-#if HEVC_TILES_WPP
   bool                   getEntropyCodingSyncEnabledFlag() const                          { return m_entropyCodingSyncEnabledFlag;        }
   void                   setEntropyCodingSyncEnabledFlag(bool val)                        { m_entropyCodingSyncEnabledFlag = val;         }
 
-  void                   setTilesEnabledFlag(bool val)                                    { m_tilesEnabledFlag = val;                     }
-  bool                   getTilesEnabledFlag() const                                      { return m_tilesEnabledFlag;                    }
-  void                   setTileUniformSpacingFlag(bool b)                                { m_uniformSpacingFlag = b;                     }
-  bool                   getTileUniformSpacingFlag() const                                { return m_uniformSpacingFlag;                  }
-  void                   setNumTileColumnsMinus1(int i)                                   { m_numTileColumnsMinus1 = i;                   }
-  int                    getNumTileColumnsMinus1() const                                  { return m_numTileColumnsMinus1;                }
-  void                   setTileColumnWidth(const std::vector<int>& columnWidth )         { m_tileColumnWidth = columnWidth;              }
-  uint32_t                   getTileColumnWidth(uint32_t columnIdx) const                         { return  m_tileColumnWidth[columnIdx];         }
-  void                   setNumTileRowsMinus1(int i)                                      { m_numTileRowsMinus1 = i;                      }
-  int                    getNumTileRowsMinus1() const                                     { return m_numTileRowsMinus1;                   }
-  void                   setTileRowHeight(const std::vector<int>& rowHeight)              { m_tileRowHeight = rowHeight;                  }
-  uint32_t                   getTileRowHeight(uint32_t rowIdx) const                              { return m_tileRowHeight[rowIdx];               }
-#endif
+
+  bool                    getConstantSliceHeaderParamsEnabledFlag() const                 { return m_constantSliceHeaderParamsEnabledFlag; }
+  void                    setConstantSliceHeaderParamsEnabledFlag(bool b)                 { m_constantSliceHeaderParamsEnabledFlag = b;   }
+  int                     getPPSDepQuantEnabledIdc() const                                { return m_PPSDepQuantEnabledIdc;               }
+  void                    setPPSDepQuantEnabledIdc(int u)                                 { m_PPSDepQuantEnabledIdc = u;                  }
+  int                     getPPSRefPicListSPSIdc( bool b ) const                          { return b==1 ? m_PPSRefPicListSPSIdc1: m_PPSRefPicListSPSIdc0; }
+  int                     getPPSRefPicListSPSIdc0() const                                 { return m_PPSRefPicListSPSIdc0;                }
+  void                    setPPSRefPicListSPSIdc0(int u)                                  { m_PPSRefPicListSPSIdc0 = u;                   }
+  int                     getPPSRefPicListSPSIdc1() const                                 { return m_PPSRefPicListSPSIdc1;                }
+  void                    setPPSRefPicListSPSIdc1(int u)                                  { m_PPSRefPicListSPSIdc1 = u;                   }
+  int                     getPPSMvdL1ZeroIdc() const                                      { return m_PPSMvdL1ZeroIdc;                     }
+  void                    setPPSMvdL1ZeroIdc(int u)                                       { m_PPSMvdL1ZeroIdc = u;                        }
+  int                     getPPSCollocatedFromL0Idc() const                               { return m_PPSCollocatedFromL0Idc;              }
+  void                    setPPSCollocatedFromL0Idc(int u)                                { m_PPSCollocatedFromL0Idc = u;                 }
+  uint32_t                getPPSSixMinusMaxNumMergeCandPlus1() const                      { return m_PPSSixMinusMaxNumMergeCandPlus1;     }
+  void                    setPPSSixMinusMaxNumMergeCandPlus1(uint32_t u)                  { m_PPSSixMinusMaxNumMergeCandPlus1 = u;        }
+  uint32_t                getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1() const       { return m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; }
+  void                    setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(uint32_t u)   { m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = u; }
 
   void                   setCabacInitPresentFlag( bool flag )                             { m_cabacInitPresentFlag = flag;                }
   bool                   getCabacInitPresentFlag() const                                  { return m_cabacInitPresentFlag;                }
@@ -1646,32 +1731,41 @@ public:
   int                    getDeblockingFilterBetaOffsetDiv2() const                        { return m_deblockingFilterBetaOffsetDiv2;      } //!< get beta offset for deblocking filter
   void                   setDeblockingFilterTcOffsetDiv2(int val)                         { m_deblockingFilterTcOffsetDiv2 = val;         } //!< set tc offset for deblocking filter
   int                    getDeblockingFilterTcOffsetDiv2() const                          { return m_deblockingFilterTcOffsetDiv2;        } //!< get tc offset for deblocking filter
-#if HEVC_USE_SCALING_LISTS
-  bool                   getScalingListPresentFlag() const                                { return m_scalingListPresentFlag;              }
-  void                   setScalingListPresentFlag( bool b )                              { m_scalingListPresentFlag  = b;                }
-  ScalingList&           getScalingList()                                                 { return m_scalingList;                         }
-  const ScalingList&     getScalingList() const                                           { return m_scalingList;                         }
-#endif
   bool                   getListsModificationPresentFlag() const                          { return m_listsModificationPresentFlag;        }
   void                   setListsModificationPresentFlag( bool b )                        { m_listsModificationPresentFlag = b;           }
-  uint32_t                   getLog2ParallelMergeLevelMinus2() const                          { return m_log2ParallelMergeLevelMinus2;        }
-  void                   setLog2ParallelMergeLevelMinus2(uint32_t mrgLevel)                   { m_log2ParallelMergeLevelMinus2 = mrgLevel;    }
-  int                    getNumExtraSliceHeaderBits() const                               { return m_numExtraSliceHeaderBits;             }
-  void                   setNumExtraSliceHeaderBits(int i)                                { m_numExtraSliceHeaderBits = i;                }
-  void                   setLoopFilterAcrossSlicesEnabledFlag( bool bValue )              { m_loopFilterAcrossSlicesEnabledFlag = bValue; }
-  bool                   getLoopFilterAcrossSlicesEnabledFlag() const                     { return m_loopFilterAcrossSlicesEnabledFlag;   }
+  bool                   getPictureHeaderExtensionPresentFlag() const                     { return m_pictureHeaderExtensionPresentFlag;     }
+  void                   setPictureHeaderExtensionPresentFlag(bool val)                   { m_pictureHeaderExtensionPresentFlag = val;      }
   bool                   getSliceHeaderExtensionPresentFlag() const                       { return m_sliceHeaderExtensionPresentFlag;     }
   void                   setSliceHeaderExtensionPresentFlag(bool val)                     { m_sliceHeaderExtensionPresentFlag = val;      }
 
+
   const PPSRExt&         getPpsRangeExtension() const                                     { return m_ppsRangeExtension;                   }
   PPSRExt&               getPpsRangeExtension()                                           { return m_ppsRangeExtension;                   }
+
+  void                    setPicWidthInLumaSamples( uint32_t u )                          { m_picWidthInLumaSamples = u; }
+  uint32_t                getPicWidthInLumaSamples() const                                { return  m_picWidthInLumaSamples; }
+  void                    setPicHeightInLumaSamples( uint32_t u )                         { m_picHeightInLumaSamples = u; }
+  uint32_t                getPicHeightInLumaSamples() const                               { return  m_picHeightInLumaSamples; }
+
+  Window&                 getConformanceWindow()                                          { return  m_conformanceWindow; }
+  const Window&           getConformanceWindow() const                                    { return  m_conformanceWindow; }
+  void                    setConformanceWindow( Window& conformanceWindow )               { m_conformanceWindow = conformanceWindow; }
+
+  Window&                 getScalingWindow()                                              { return  m_scalingWindow; }
+  const Window&           getScalingWindow()                                        const { return  m_scalingWindow; }
+  void                    setScalingWindow( Window& scalingWindow )                       { m_scalingWindow = scalingWindow; }
 };
 
 class APS
 {
 private:
   int                    m_APSId;                    // adaptation_parameter_set_id
-  AlfSliceParam          m_alfAPSParam;
+  int                    m_temporalId;
+  int                    m_layerId;
+  ApsType                m_APSType;                  // aps_params_type
+  AlfParam               m_alfAPSParam;
+  SliceReshapeInfo       m_reshapeAPSInfo;
+  ScalingList            m_scalingListApsInfo;
 
 public:
   APS();
@@ -1680,8 +1774,20 @@ public:
   int                    getAPSId() const                                                 { return m_APSId;                               }
   void                   setAPSId(int i)                                                  { m_APSId = i;                                  }
 
-  void                   setAlfAPSParam(AlfSliceParam& alfAPSParam)                       { m_alfAPSParam = alfAPSParam;                  }
-  const AlfSliceParam&   getAlfAPSParam() const                                           { return m_alfAPSParam;                         }
+  ApsType                getAPSType() const                                               { return m_APSType;                             }
+  void                   setAPSType( ApsType type )                                       { m_APSType = type;                             }
+
+  void                   setAlfAPSParam(AlfParam& alfAPSParam)                            { m_alfAPSParam = alfAPSParam;                  }
+  void                   setTemporalId( int i )                                           { m_temporalId = i;                             }
+  int                    getTemporalId()                                            const { return m_temporalId;                          }
+  void                   setLayerId( int i )                                              { m_layerId = i;                                }
+  int                    getLayerId()                                               const { return m_layerId;                             }
+  AlfParam&              getAlfAPSParam()  { return m_alfAPSParam; }
+
+  void                   setReshaperAPSInfo(SliceReshapeInfo& reshapeAPSInfo)             { m_reshapeAPSInfo = reshapeAPSInfo;            }
+  SliceReshapeInfo&      getReshaperAPSInfo()                                             { return m_reshapeAPSInfo;                      }
+  void                   setScalingList( ScalingList& scalingListAPSInfo )                { m_scalingListApsInfo = scalingListAPSInfo;    }
+  ScalingList&           getScalingList()                                                 { return m_scalingListApsInfo;                  }
 };
 struct WPScalingParam
 {
@@ -1706,7 +1812,245 @@ struct WPACDCParam
   int64_t iDC;
 };
 
+// picture header class
+class PicHeader
+{
+private:
+  bool                        m_valid;                                                  //!< picture header is valid yet or not
+  Picture*                    m_pcPic;                                                  //!< pointer to picture structure
+  bool                        m_nonReferencePictureFlag;                                //!< non-reference picture flag
+  bool                        m_gdrPicFlag;                                             //!< gradual decoding refresh picture flag
+  bool                        m_noOutputOfPriorPicsFlag;                                //!< no output of prior pictures flag
+  uint32_t                    m_recoveryPocCnt;                                         //!< recovery POC count
+  int                         m_spsId;                                                  //!< sequence parameter set ID
+  int                         m_ppsId;                                                  //!< picture parameter set ID
+  bool                        m_subPicIdSignallingPresentFlag;                          //!< indicates the presence of sub-picture ID signalling in the SPS
+  uint32_t                    m_subPicIdLen;                                            //!< sub-picture ID length in bits
+  uint8_t                     m_subPicId[MAX_NUM_SUB_PICS];                             //!< sub-picture ID for each sub-picture in the sequence
+  bool                        m_loopFilterAcrossVirtualBoundariesDisabledFlag;          //!< loop filtering across virtual boundaries disabled
+  unsigned                    m_numVerVirtualBoundaries;                                //!< number of vertical virtual boundaries
+  unsigned                    m_numHorVirtualBoundaries;                                //!< number of horizontal virtual boundaries
+  unsigned                    m_virtualBoundariesPosX[3];                               //!< horizontal virtual boundary positions
+  unsigned                    m_virtualBoundariesPosY[3];                               //!< vertical virtual boundary positions
+  unsigned                    m_colourPlaneId;                                          //!< 4:4:4 colour plane ID
+  bool                        m_picOutputFlag;                                          //!< picture output flag
+  bool                        m_picRplPresentFlag;                                      //!< reference lists present in picture header or not
+  const ReferencePictureList* m_pRPL0;                                                  //!< pointer to RPL for L0, either in the SPS or the local RPS in the picture header
+  const ReferencePictureList* m_pRPL1;                                                  //!< pointer to RPL for L1, either in the SPS or the local RPS in the picture header
+  ReferencePictureList        m_localRPL0;                                              //!< RPL for L0 when present in picture header
+  ReferencePictureList        m_localRPL1;                                              //!< RPL for L1 when present in picture header
+  int                         m_rpl0Idx;                                                //!< index of used RPL in the SPS or -1 for local RPL in the picture header
+  int                         m_rpl1Idx;                                                //!< index of used RPL in the SPS or -1 for local RPL in the picture header
+  bool                        m_splitConsOverrideFlag;                                  //!< partitioning constraint override flag  
+  uint32_t                    m_cuQpDeltaSubdivIntra;                                   //!< CU QP delta maximum subdivision for intra slices
+  uint32_t                    m_cuQpDeltaSubdivInter;                                   //!< CU QP delta maximum subdivision for inter slices 
+  uint32_t                    m_cuChromaQpOffsetSubdivIntra;                            //!< CU chroma QP offset maximum subdivision for intra slices 
+  uint32_t                    m_cuChromaQpOffsetSubdivInter;                            //!< CU chroma QP offset maximum subdivision for inter slices 
+  bool                        m_enableTMVPFlag;                                         //!< enable temporal motion vector prediction
+  bool                        m_mvdL1ZeroFlag;                                          //!< L1 MVD set to zero flag  
+  uint32_t                    m_maxNumMergeCand;                                        //!< max number of merge candidates
+  uint32_t                    m_maxNumAffineMergeCand;                                  //!< max number of sub-block merge candidates
+  bool                        m_disFracMMVD;                                            //!< fractional MMVD offsets disabled flag
+  bool                        m_disBdofFlag;                                            //!< picture level BDOF disable flag
+  bool                        m_disDmvrFlag;                                            //!< picture level DMVR disable flag
+  bool                        m_disProfFlag;                                            //!< picture level PROF disable flag
+  uint32_t                    m_maxNumTriangleCand;                                     //!< max number of triangle merge candidates
+  uint32_t                    m_maxNumIBCMergeCand;                                     //!< max number of IBC merge candidates
+  bool                        m_jointCbCrSignFlag;                                      //!< joint Cb/Cr residual sign flag  
+  bool                        m_saoEnabledPresentFlag;                                  //!< sao enabled flags present in the picture header
+  bool                        m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE];                   //!< sao enabled flags for each channel
+  bool                        m_alfEnabledPresentFlag;                                  //!< alf enabled flags present in the picture header
+  bool                        m_alfEnabledFlag[MAX_NUM_COMPONENT];                      //!< alf enabled flags for each component
+  int                         m_numAlfAps;                                              //!< number of alf aps active for the picture
+  std::vector<int>            m_alfApsId;                                               //!< list of alf aps for the picture
+  int                         m_alfChromaApsId;                                         //!< chroma alf aps ID
+  bool                        m_depQuantEnabledFlag;                                    //!< dependent quantization enabled flag
+  bool                        m_signDataHidingEnabledFlag;                              //!< sign data hiding enabled flag
+  bool                        m_deblockingFilterOverridePresentFlag;                    //!< deblocking filter override controls present in picture header
+  bool                        m_deblockingFilterOverrideFlag;                           //!< deblocking filter override controls enabled
+  bool                        m_deblockingFilterDisable;                                //!< deblocking filter disabled flag
+  int                         m_deblockingFilterBetaOffsetDiv2;                         //!< beta offset for deblocking filter
+  int                         m_deblockingFilterTcOffsetDiv2;                           //!< tc offset for deblocking filter
+  bool                        m_lmcsEnabledFlag;                                        //!< lmcs enabled flag
+  int                         m_lmcsApsId;                                              //!< lmcs APS ID
+  APS*                        m_lmcsAps;                                                //!< lmcs APS
+  bool                        m_lmcsChromaResidualScaleFlag;                            //!< lmcs chroma residual scale flag  
+  bool                        m_scalingListPresentFlag;                                 //!< quantization scaling lists present
+  int                         m_scalingListApsId;                                       //!< quantization scaling list APS ID
+  APS*                        m_scalingListAps;                                         //!< quantization scaling list APS
+  unsigned                    m_minQT[3];                                               //!< minimum quad-tree size  0: I slice luma; 1: P/B slice; 2: I slice chroma
+  unsigned                    m_maxMTTHierarchyDepth[3];                                //!< maximum MTT depth
+  unsigned                    m_maxBTSize[3];                                           //!< maximum BT size
+  unsigned                    m_maxTTSize[3];                                           //!< maximum TT size
 
+public:
+                              PicHeader();
+  virtual                     ~PicHeader();
+  void                        initPicHeader();
+  bool                        isValid()                                                 { return m_valid;                                                                              }
+  void                        setValid()                                                { m_valid = true;                                                                              }
+  void                        setPic( Picture* p )                                      { m_pcPic = p;                                                                                 }
+  Picture*                    getPic()                                                  { return m_pcPic;                                                                              }
+  const Picture*              getPic() const                                            { return m_pcPic;                                                                              }
+  void                        setNonReferencePictureFlag( bool b )                      { m_nonReferencePictureFlag = b;                                                               }
+  bool                        getNonReferencePictureFlag() const                        { return m_nonReferencePictureFlag;                                                            }
+  void                        setGdrPicFlag( bool b )                                   { m_gdrPicFlag = b;                                                                            }
+  bool                        getGdrPicFlag() const                                     { return m_gdrPicFlag;                                                                         }
+  void                        setNoOutputOfPriorPicsFlag( bool b )                      { m_noOutputOfPriorPicsFlag = b;                                                               }
+  bool                        getNoOutputOfPriorPicsFlag() const                        { return m_noOutputOfPriorPicsFlag;                                                            }
+  void                        setRecoveryPocCnt( uint32_t u )                           { m_recoveryPocCnt = u;                                                                        }
+  bool                        getRecoveryPocCnt() const                                 { return m_recoveryPocCnt;                                                                     }
+  void                        setSPSId( uint32_t u )                                    { m_spsId = u;                                                                                 }
+  uint32_t                    getSPSId() const                                          { return m_spsId;                                                                              }
+  void                        setPPSId( uint32_t u )                                    { m_ppsId = u;                                                                                 }
+  uint32_t                    getPPSId() const                                          { return m_ppsId;                                                                              }
+  void                        setSubPicIdSignallingPresentFlag( bool b )                { m_subPicIdSignallingPresentFlag = b;                                                         }
+  bool                        getSubPicIdSignallingPresentFlag() const                  { return  m_subPicIdSignallingPresentFlag;                                                     }
+  void                        setSubPicIdLen( uint32_t u )                              { m_subPicIdLen = u;                                                                           }
+  uint32_t                    getSubPicIdLen() const                                    { return  m_subPicIdLen;                                                                       }
+  void                        setSubPicId( int i, uint8_t u )                           { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-pic index exceeds valid range" ); m_subPicId[i] = u;      }
+  uint8_t                     getSubPicId( int i ) const                                { CHECK( i >= MAX_NUM_SUB_PICS, "Sub-pic index exceeds valid range" ); return  m_subPicId[i];  }
+  void                        setLoopFilterAcrossVirtualBoundariesDisabledFlag(bool b)  { m_loopFilterAcrossVirtualBoundariesDisabledFlag = b;                                         }
+  bool                        getLoopFilterAcrossVirtualBoundariesDisabledFlag() const  { return m_loopFilterAcrossVirtualBoundariesDisabledFlag;                                      }
+  void                        setNumVerVirtualBoundaries(unsigned u)                    { m_numVerVirtualBoundaries = u;                                                               }
+  unsigned                    getNumVerVirtualBoundaries() const                        { return m_numVerVirtualBoundaries;                                                            }
+  void                        setNumHorVirtualBoundaries(unsigned u)                    { m_numHorVirtualBoundaries = u;                                                               }
+  unsigned                    getNumHorVirtualBoundaries() const                        { return m_numHorVirtualBoundaries;                                                            }
+  void                        setVirtualBoundariesPosX(unsigned u, unsigned idx)        { CHECK( idx >= 3, "boundary index exceeds valid range" ); m_virtualBoundariesPosX[idx] = u;   }
+  unsigned                    getVirtualBoundariesPosX(unsigned idx) const              { CHECK( idx >= 3, "boundary index exceeds valid range" ); return m_virtualBoundariesPosX[idx];}
+  void                        setVirtualBoundariesPosY(unsigned u, unsigned idx)        { CHECK( idx >= 3, "boundary index exceeds valid range" ); m_virtualBoundariesPosY[idx] = u;   }
+  unsigned                    getVirtualBoundariesPosY(unsigned idx) const              { CHECK( idx >= 3, "boundary index exceeds valid range" ); return m_virtualBoundariesPosY[idx];}
+  void                        setColourPlaneId(unsigned u)                              { m_colourPlaneId = u;                                                                         }
+  unsigned                    getColourPlaneId() const                                  { return m_colourPlaneId;                                                                      }
+  void                        setPicOutputFlag( bool b )                                { m_picOutputFlag = b;                                                                         }
+  bool                        getPicOutputFlag() const                                  { return m_picOutputFlag;                                                                      }
+  void                        setPicRplPresentFlag( bool b )                            { m_picRplPresentFlag = b;                                                                     }
+  bool                        getPicRplPresentFlag() const                              { return m_picRplPresentFlag;                                                                  }
+  void                        setRPL( bool b, const ReferencePictureList *pcRPL)        { if(b==1) { m_pRPL1 = pcRPL; } else { m_pRPL0 = pcRPL; }                                      }
+  const ReferencePictureList* getRPL( bool b )                                          { return b==1 ? m_pRPL1 : m_pRPL0;                                                             }
+  ReferencePictureList*       getLocalRPL( bool b )                                     { return b==1 ? &m_localRPL1 : &m_localRPL0;                                                   }
+  void                        setRPLIdx( bool b, int rplIdx)                            { if(b==1) { m_rpl1Idx = rplIdx; } else { m_rpl0Idx = rplIdx; }                                }
+  int                         getRPLIdx( bool b ) const                                 { return b==1 ? m_rpl1Idx : m_rpl0Idx;                                                         }
+  void                        setRPL0(const ReferencePictureList *pcRPL)                { m_pRPL0 = pcRPL;                                                                             }
+  void                        setRPL1(const ReferencePictureList *pcRPL)                { m_pRPL1 = pcRPL;                                                                             }
+  const ReferencePictureList* getRPL0()                                                 { return m_pRPL0;                                                                              }
+  const ReferencePictureList* getRPL1()                                                 { return m_pRPL1;                                                                              }
+  ReferencePictureList*       getLocalRPL0()                                            { return &m_localRPL0;                                                                         }
+  ReferencePictureList*       getLocalRPL1()                                            { return &m_localRPL1;                                                                         }
+  void                        setRPL0idx(int rplIdx)                                    { m_rpl0Idx = rplIdx;                                                                          }
+  void                        setRPL1idx(int rplIdx)                                    { m_rpl1Idx = rplIdx;                                                                          }
+  int                         getRPL0idx() const                                        { return m_rpl0Idx;                                                                            }
+  int                         getRPL1idx() const                                        { return m_rpl1Idx;                                                                            }
+  void                        setSplitConsOverrideFlag( bool b )                        { m_splitConsOverrideFlag = b;                                                                 }
+  bool                        getSplitConsOverrideFlag() const                          { return m_splitConsOverrideFlag;                                                              }  
+  void                        setCuQpDeltaSubdivIntra( uint32_t u )                     { m_cuQpDeltaSubdivIntra = u;                                                                  }
+  uint32_t                    getCuQpDeltaSubdivIntra() const                           { return m_cuQpDeltaSubdivIntra;                                                               }
+  void                        setCuQpDeltaSubdivInter( uint32_t u )                     { m_cuQpDeltaSubdivInter = u;                                                                  }
+  uint32_t                    getCuQpDeltaSubdivInter() const                           { return m_cuQpDeltaSubdivInter;                                                               }
+  void                        setCuChromaQpOffsetSubdivIntra( uint32_t u )              { m_cuChromaQpOffsetSubdivIntra = u;                                                           }
+  uint32_t                    getCuChromaQpOffsetSubdivIntra() const                    { return m_cuChromaQpOffsetSubdivIntra;                                                        }
+  void                        setCuChromaQpOffsetSubdivInter( uint32_t u )              { m_cuChromaQpOffsetSubdivInter = u;                                                           }
+  uint32_t                    getCuChromaQpOffsetSubdivInter() const                    { return m_cuChromaQpOffsetSubdivInter;                                                        }
+  void                        setEnableTMVPFlag( bool b )                               { m_enableTMVPFlag = b;                                                                        }
+  bool                        getEnableTMVPFlag() const                                 { return m_enableTMVPFlag;                                                                     }
+  void                        setMvdL1ZeroFlag( bool b )                                { m_mvdL1ZeroFlag = b;                                                                         }
+  bool                        getMvdL1ZeroFlag() const                                  { return m_mvdL1ZeroFlag;                                                                      }  
+  void                        setMaxNumMergeCand(uint32_t val )                         { m_maxNumMergeCand = val;                                                                     }
+  uint32_t                    getMaxNumMergeCand() const                                { return m_maxNumMergeCand;                                                                    }  
+  void                        setMaxNumAffineMergeCand( uint32_t val )                  { m_maxNumAffineMergeCand = val;                                                               }
+  uint32_t                    getMaxNumAffineMergeCand() const                          { return m_maxNumAffineMergeCand;                                                              }
+  void                        setDisFracMMVD( bool val )                                { m_disFracMMVD = val;                                                                         }
+  bool                        getDisFracMMVD() const                                    { return m_disFracMMVD;                                                                        }  
+  void                        setDisBdofFlag( bool val )                                { m_disBdofFlag = val;                                                                         }
+  bool                        getDisBdofFlag() const                                    { return m_disBdofFlag;                                                                        }
+  void                        setDisDmvrFlag( bool val )                                { m_disDmvrFlag = val;                                                                         }
+  bool                        getDisDmvrFlag() const                                    { return m_disDmvrFlag;                                                                        }
+  void                        setDisProfFlag( bool val )                                { m_disProfFlag = val;                                                                         }
+  bool                        getDisProfFlag() const                                    { return m_disProfFlag;                                                                        }
+  void                        setMaxNumTriangleCand(uint32_t b)                         { m_maxNumTriangleCand = b;                                                                    }
+  uint32_t                    getMaxNumTriangleCand() const                             { return m_maxNumTriangleCand;                                                                 }
+  void                        setMaxNumIBCMergeCand( uint32_t b )                       { m_maxNumIBCMergeCand = b;                                                                    }
+  uint32_t                    getMaxNumIBCMergeCand() const                             { return m_maxNumIBCMergeCand;                                                                 } 
+  void                        setJointCbCrSignFlag( bool b )                            { m_jointCbCrSignFlag = b;                                                                     }
+  bool                        getJointCbCrSignFlag() const                              { return m_jointCbCrSignFlag;                                                                  }
+  void                        setSaoEnabledPresentFlag( bool b )                        { m_saoEnabledPresentFlag = b;                                                                 }
+  bool                        getSaoEnabledPresentFlag() const                          { return m_saoEnabledPresentFlag;                                                              }
+  void                        setSaoEnabledFlag(ChannelType chType, bool b)             { m_saoEnabledFlag[chType] = b;                                                                }
+  bool                        getSaoEnabledFlag(ChannelType chType) const               { return m_saoEnabledFlag[chType];                                                             }  
+  void                        setAlfEnabledPresentFlag( bool b )                        { m_alfEnabledPresentFlag = b;                                                                 }
+  bool                        getAlfEnabledPresentFlag() const                          { return m_alfEnabledPresentFlag;                                                              }
+  void                        setAlfEnabledFlag(ComponentID compId, bool b)             { m_alfEnabledFlag[compId] = b;                                                                }
+  bool                        getAlfEnabledFlag(ComponentID compId) const               { return m_alfEnabledFlag[compId];                                                             }
+  void                        setNumAlfAps(int i)                                       { m_numAlfAps = i;                                                                             }
+  int                         getNumAlfAps() const                                      { return m_numAlfAps;                                                                          }
+  void                        setAlfApsIdChroma(int i)                                  { m_alfChromaApsId = i;                                                                        }
+  int                         getAlfApsIdChroma() const                                 { return m_alfChromaApsId;                                                                     }  
+  void                        setDepQuantEnabledFlag( bool b )                          { m_depQuantEnabledFlag = b;                                                                   }
+  bool                        getDepQuantEnabledFlag() const                            { return m_depQuantEnabledFlag;                                                                }  
+  void                        setSignDataHidingEnabledFlag( bool b )                    { m_signDataHidingEnabledFlag = b;                                                             }
+  bool                        getSignDataHidingEnabledFlag() const                      { return m_signDataHidingEnabledFlag;                                                          }  
+  void                        setDeblockingFilterOverridePresentFlag( bool b )          { m_deblockingFilterOverridePresentFlag = b;                                                   }
+  bool                        getDeblockingFilterOverridePresentFlag() const            { return m_deblockingFilterOverridePresentFlag;                                                }
+  void                        setDeblockingFilterOverrideFlag( bool b )                 { m_deblockingFilterOverrideFlag = b;                                                          }
+  bool                        getDeblockingFilterOverrideFlag() const                   { return m_deblockingFilterOverrideFlag;                                                       }    
+  void                        setDeblockingFilterDisable( bool b )                      { m_deblockingFilterDisable= b;                                                                }  
+  bool                        getDeblockingFilterDisable() const                        { return m_deblockingFilterDisable;                                                            }
+  void                        setDeblockingFilterBetaOffsetDiv2( int i )                { m_deblockingFilterBetaOffsetDiv2 = i;                                                        }  
+  int                         getDeblockingFilterBetaOffsetDiv2()const                  { return m_deblockingFilterBetaOffsetDiv2;                                                     }
+  void                        setDeblockingFilterTcOffsetDiv2( int i )                  { m_deblockingFilterTcOffsetDiv2 = i;                                                          }  
+  int                         getDeblockingFilterTcOffsetDiv2() const                   { return m_deblockingFilterTcOffsetDiv2;                                                       }    
+  void                        setLmcsEnabledFlag(bool b)                                { m_lmcsEnabledFlag = b;                                                                       }
+  bool                        getLmcsEnabledFlag()                                      { return m_lmcsEnabledFlag;                                                                    }
+  const bool                  getLmcsEnabledFlag() const                                { return m_lmcsEnabledFlag;                                                                    }
+  void                        setLmcsAPS(APS* aps)                                      { m_lmcsAps = aps; m_lmcsApsId = (aps) ? aps->getAPSId() : -1;                                 }
+  APS*                        getLmcsAPS() const                                        { return m_lmcsAps;                                                                            }
+  void                        setLmcsAPSId(int id)                                      { m_lmcsApsId = id;                                                                            }
+  int                         getLmcsAPSId() const                                      { return m_lmcsApsId;                                                                          }
+  void                        setLmcsChromaResidualScaleFlag(bool b)                    { m_lmcsChromaResidualScaleFlag = b;                                                           }
+  bool                        getLmcsChromaResidualScaleFlag()                          { return m_lmcsChromaResidualScaleFlag;                                                        }
+  const bool                  getLmcsChromaResidualScaleFlag() const                    { return m_lmcsChromaResidualScaleFlag;                                                        }
+  void                        setScalingListAPS( APS* aps )                             { m_scalingListAps = aps; m_scalingListApsId = ( aps ) ? aps->getAPSId() : -1;                 }
+  APS*                        getScalingListAPS() const                                 { return m_scalingListAps;                                                                     }
+  void                        setScalingListAPSId( int id )                             { m_scalingListApsId = id;                                                                     }
+  int                         getScalingListAPSId() const                               { return m_scalingListApsId;                                                                   }
+  void                        setScalingListPresentFlag( bool b )                       { m_scalingListPresentFlag = b;                                                                }
+  bool                        getScalingListPresentFlag()                               { return m_scalingListPresentFlag;                                                             }
+  const bool                  getScalingListPresentFlag() const                         { return m_scalingListPresentFlag;                                                             }
+  
+  unsigned*                   getMinQTSizes() const                                     { return (unsigned *)m_minQT;                                                                  }
+  unsigned*                   getMaxMTTHierarchyDepths() const                          { return (unsigned *)m_maxMTTHierarchyDepth;                                                   }
+  unsigned*                   getMaxBTSizes() const                                     { return (unsigned *)m_maxBTSize;                                                              }
+  unsigned*                   getMaxTTSizes() const                                     { return (unsigned *)m_maxTTSize;                                                              }
+  
+  void                        setMinQTSize(unsigned idx, unsigned minQT)                { m_minQT[idx] = minQT;                                                                        }
+  void                        setMaxMTTHierarchyDepth(unsigned idx, unsigned maxMTT)    { m_maxMTTHierarchyDepth[idx] = maxMTT;                                                        }
+  void                        setMaxBTSize(unsigned idx, unsigned maxBT)                { m_maxBTSize[idx] = maxBT;                                                                    }
+  void                        setMaxTTSize(unsigned idx, unsigned maxTT)                { m_maxTTSize[idx] = maxTT;                                                                    }
+
+  void                        setMinQTSizes(unsigned*   minQT)                          { m_minQT[0] = minQT[0]; m_minQT[1] = minQT[1]; m_minQT[2] = minQT[2];                                                 }
+  void                        setMaxMTTHierarchyDepths(unsigned*   maxMTT)              { m_maxMTTHierarchyDepth[0] = maxMTT[0]; m_maxMTTHierarchyDepth[1] = maxMTT[1]; m_maxMTTHierarchyDepth[2] = maxMTT[2]; }
+  void                        setMaxBTSizes(unsigned*   maxBT)                          { m_maxBTSize[0] = maxBT[0]; m_maxBTSize[1] = maxBT[1]; m_maxBTSize[2] = maxBT[2];                                     }
+  void                        setMaxTTSizes(unsigned*   maxTT)                          { m_maxTTSize[0] = maxTT[0]; m_maxTTSize[1] = maxTT[1]; m_maxTTSize[2] = maxTT[2];                                     }
+    
+  unsigned                    getMinQTSize(SliceType   slicetype,
+                                       ChannelType chType = CHANNEL_TYPE_LUMA) const    { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_minQT[0] : m_minQT[2]) : m_minQT[1];                                              }
+  unsigned                    getMaxMTTHierarchyDepth(SliceType   slicetype,
+                                       ChannelType chType = CHANNEL_TYPE_LUMA) const    { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxMTTHierarchyDepth[0] : m_maxMTTHierarchyDepth[2]) : m_maxMTTHierarchyDepth[1]; }
+  unsigned                    getMaxBTSize(SliceType   slicetype,
+                                       ChannelType chType = CHANNEL_TYPE_LUMA) const    { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxBTSize[0] : m_maxBTSize[2]) : m_maxBTSize[1];                                  }
+  unsigned                    getMaxTTSize(SliceType   slicetype,
+                                       ChannelType chType = CHANNEL_TYPE_LUMA) const    { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxTTSize[0] : m_maxTTSize[2]) : m_maxTTSize[1];                                  }
+  
+  void                        setAlfAPSs(std::vector<int> apsIDs)                       { m_alfApsId.resize(m_numAlfAps);
+                                                                                          for (int i = 0; i < m_numAlfAps; i++)
+                                                                                          {
+                                                                                            m_alfApsId[i] = apsIDs[i];
+                                                                                          }
+                                                                                        }
+
+  std::vector<int>            getAlfAPSs() const                                        { return m_alfApsId; }
+
+};
 
 /// slice header class
 class Slice
@@ -1715,23 +2059,24 @@ class Slice
 private:
   //  Bitstream writing
   bool                       m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE];
-  int                        m_iPPSId;               ///< picture parameter set ID
-  bool                       m_PicOutputFlag;        ///< pic_output_flag
   int                        m_iPOC;
   int                        m_iLastIDR;
   int                        m_iAssociatedIRAP;
   NalUnitType                m_iAssociatedIRAPType;
-  const ReferencePictureSet* m_pRPS;             //< pointer to RPS, either in the SPS or the local RPS in the same slice header
-  ReferencePictureSet        m_localRPS;             //< RPS when present in slice header
-  int                        m_rpsIdx;               //< index of used RPS in the SPS or -1 for local RPS in the slice header
-  RefPicListModification     m_RefPicListModification;
+  bool                       m_enableDRAPSEI;
+  bool                       m_useLTforDRAP;
+  bool                       m_isDRAP;
+  int                        m_latestDRAPPOC;
+  const ReferencePictureList* m_pRPL0;                //< pointer to RPL for L0, either in the SPS or the local RPS in the same slice header
+  const ReferencePictureList* m_pRPL1;                //< pointer to RPL for L1, either in the SPS or the local RPS in the same slice header
+  ReferencePictureList        m_localRPL0;            //< RPL for L0 when present in slice header
+  ReferencePictureList        m_localRPL1;            //< RPL for L1 when present in slice header
+  int                         m_rpl0Idx;              //< index of used RPL in the SPS or -1 for local RPL in the slice header
+  int                         m_rpl1Idx;              //< index of used RPL in the SPS or -1 for local RPL in the slice header
   NalUnitType                m_eNalUnitType;         ///< Nal unit type for the slice
   SliceType                  m_eSliceType;
   int                        m_iSliceQp;
   int                        m_iSliceQpBase;
-#if HEVC_DEPENDENT_SLICES
-  bool                       m_dependentSliceSegmentFlag;
-#endif
   bool                       m_ChromaQpAdjEnabled;
   bool                       m_deblockingFilterDisable;
   bool                       m_deblockingFilterOverrideFlag;      //< offsets for deblocking filter inherit from PPS
@@ -1741,10 +2086,6 @@ private:
   int                        m_aiNumRefIdx   [NUM_REF_PIC_LIST_01];    //  for multiple reference of current slice
   bool                       m_pendingRasInit;
 
-  bool                       m_depQuantEnabledFlag;
-#if HEVC_USE_SIGN_HIDING
-  bool                       m_signDataHidingEnabledFlag;
-#endif
   bool                       m_bCheckLDC;
 
   bool                       m_biDirPred;
@@ -1752,127 +2093,103 @@ private:
 
   //  Data
   int                        m_iSliceQpDelta;
-  int                        m_iSliceChromaQpDelta[MAX_NUM_COMPONENT];
+  int                        m_iSliceChromaQpDelta[MAX_NUM_COMPONENT+1];
   Picture*                   m_apcRefPicList [NUM_REF_PIC_LIST_01][MAX_NUM_REF+1];
   int                        m_aiRefPOCList  [NUM_REF_PIC_LIST_01][MAX_NUM_REF+1];
   bool                       m_bIsUsedAsLongTerm[NUM_REF_PIC_LIST_01][MAX_NUM_REF+1];
   int                        m_iDepth;
+  Picture*                   m_scaledRefPicList[NUM_REF_PIC_LIST_01][MAX_NUM_REF + 1];
+  Picture*                   m_savedRefPicList[NUM_REF_PIC_LIST_01][MAX_NUM_REF + 1];
+  std::pair<int, int>        m_scalingRatio[NUM_REF_PIC_LIST_01][MAX_NUM_REF_PICS];
 
 
   // access channel
-#if HEVC_VPS
+  const DPS*                 m_dps;
   const VPS*                 m_pcVPS;
-#endif
   const SPS*                 m_pcSPS;
   const PPS*                 m_pcPPS;
   Picture*                   m_pcPic;
+  const PicHeader*           m_pcPicHeader;    //!< pointer to picture header structure
   bool                       m_colFromL0Flag;  // collocated picture from List0 flag
 
-  bool                       m_noOutputPriorPicsFlag;
-  bool                       m_noRaslOutputFlag;
+  bool                       m_noIncorrectPicOutputFlag;
   bool                       m_handleCraAsCvsStartFlag;
 
   uint32_t                       m_colRefIdx;
-  uint32_t                       m_maxNumMergeCand;
-  uint32_t                   m_maxNumAffineMergeCand;
-  bool                       m_disFracMMVD;
   double                     m_lambdas[MAX_NUM_COMPONENT];
 
   bool                       m_abEqualRef  [NUM_REF_PIC_LIST_01][MAX_NUM_REF][MAX_NUM_REF];
   uint32_t                       m_uiTLayer;
   bool                       m_bTLayerSwitchingFlag;
 
-  SliceConstraint            m_sliceMode;
-  uint32_t                       m_sliceArgument;
-  uint32_t                       m_sliceCurStartCtuTsAddr;
-  uint32_t                       m_sliceCurEndCtuTsAddr;
+  SliceMap                   m_sliceMap;                     //!< list of CTUs in current slice - raster scan CTU addresses
   uint32_t                       m_independentSliceIdx;
-#if HEVC_DEPENDENT_SLICES
-  uint32_t                       m_sliceSegmentIdx;
-  SliceConstraint            m_sliceSegmentMode;
-  uint32_t                       m_sliceSegmentArgument;
-  uint32_t                       m_sliceSegmentCurStartCtuTsAddr;
-  uint32_t                       m_sliceSegmentCurEndCtuTsAddr;
-#endif
   bool                       m_nextSlice;
-#if HEVC_DEPENDENT_SLICES
-  bool                       m_nextSliceSegment;
-#endif
   uint32_t                       m_sliceBits;
-#if HEVC_DEPENDENT_SLICES
-  uint32_t                       m_sliceSegmentBits;
-#endif
   bool                       m_bFinalized;
 
+
   bool                       m_bTestWeightPred;
   bool                       m_bTestWeightBiPred;
   WPScalingParam             m_weightPredTable[NUM_REF_PIC_LIST_01][MAX_NUM_REF][MAX_NUM_COMPONENT]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
   WPACDCParam                m_weightACDCParam[MAX_NUM_COMPONENT];
   ClpRngs                    m_clpRngs;
   std::vector<uint32_t>          m_substreamSizes;
+  uint32_t                   m_numEntryPoints;
 
   bool                       m_cabacInitFlag;
 
-  bool                       m_bLMvdL1Zero;
-#if !JVET_M0101_HLS
-  bool                       m_temporalLayerNonReferenceFlag;
-#endif
-  bool                       m_LFCrossSliceBoundaryFlag;
+  uint32_t                   m_sliceSubPicId;
 
-  bool                       m_enableTMVPFlag;
+  
+  
 
 
   SliceType                  m_encCABACTableIdx;           // Used to transmit table selection across slices.
 
   clock_t                    m_iProcessingStartTime;
   double                     m_dProcessingTime;
-  bool                       m_splitConsOverrideFlag;
-  uint32_t                   m_uiMinQTSize;
-  uint32_t                   m_uiMaxBTDepth;
-  uint32_t                   m_uiMaxTTSize;
-
-  uint32_t                   m_uiMinQTSizeIChroma;
-  uint32_t                   m_uiMaxBTDepthIChroma;
-  uint32_t                   m_uiMaxBTSizeIChroma;
-  uint32_t                   m_uiMaxTTSizeIChroma;
-  uint32_t                       m_uiMaxBTSize;
-
-  int                        m_apsId;
-  APS*                       m_aps;
-  bool                       m_tileGroupAlfEnabledFlag;
-  SliceReshapeInfo           m_sliceReshapeInfo;
+  
+  int                        m_rpPicOrderCntVal;
+  APS*                       m_alfApss[ALF_CTB_MAX_NUM_APS];
+  bool                       m_tileGroupAlfEnabledFlag[MAX_NUM_COMPONENT];
+  int                        m_tileGroupNumAps;
+  std::vector<int>           m_tileGroupLumaApsId;
+  int                        m_tileGroupChromaApsId;
+  bool                       m_disableSATDForRd;
 public:
                               Slice();
   virtual                     ~Slice();
   void                        initSlice();
+  void                        inheritFromPicHeader( PicHeader *picHeader, const PPS *pps, const SPS *sps );
+  void                        setPicHeader( const PicHeader* pcPicHeader )           { m_pcPicHeader = pcPicHeader;                                  }
+  const PicHeader*            getPicHeader() const                                   { return m_pcPicHeader;                                         }
   int                         getRefIdx4MVPair( RefPicList eCurRefPicList, int nCurRefIdx );
-#if HEVC_VPS
-  void                        setVPS( VPS* pcVPS )                                   { m_pcVPS = pcVPS;                                              }
-  const VPS*                  getVPS() const                                         { return m_pcVPS;                                               }
-#endif
+  void                        setDPS( DPS* dps )                                     { m_dps = dps;                                              }
+  const DPS*                  getDPS() const                                         { return m_dps;                                               }
+
   void                        setSPS( const SPS* pcSPS )                             { m_pcSPS = pcSPS;                                              }
   const SPS*                  getSPS() const                                         { return m_pcSPS;                                               }
+  void                        setVPS( const VPS* pcVPS )                             { m_pcVPS = pcVPS;                                              }
+  const VPS*                  getVPS() const                                         { return m_pcVPS;                                               }
 
-  void                        setPPS( const PPS* pcPPS )                             { m_pcPPS = pcPPS; m_iPPSId = (pcPPS) ? pcPPS->getPPSId() : -1; }
+  void                        setPPS( const PPS* pcPPS )                             { m_pcPPS = pcPPS;                                              }
   const PPS*                  getPPS() const                                         { return m_pcPPS;                                               }
 
-  void                        setPPSId( int PPSId )                                  { m_iPPSId = PPSId;                                             }
-  int                         getPPSId() const                                       { return m_iPPSId;                                              }
-  void                        setAPS(APS* aps)                                     { m_aps = aps; m_apsId = (aps) ? aps->getAPSId() : -1; }
-  APS*                        getAPS()                                               { return m_aps;                                               }
-  void                        setAPSId(int apsId)                                    { m_apsId = apsId;                                             }
-  int                         getAPSId() const                                       { return m_apsId;                                              }
-  void                        setPicOutputFlag( bool b   )                           { m_PicOutputFlag = b;                                          }
-  bool                        getPicOutputFlag() const                               { return m_PicOutputFlag;                                       }
+  void                        setAlfAPSs(APS** apss)                                 { memcpy(m_alfApss, apss, sizeof(m_alfApss));                   }
+  APS**                       getAlfAPSs()                                           { return m_alfApss;                                             }
   void                        setSaoEnabledFlag(ChannelType chType, bool s)          {m_saoEnabledFlag[chType] =s;                                   }
   bool                        getSaoEnabledFlag(ChannelType chType) const            { return m_saoEnabledFlag[chType];                              }
-  void                        setRPS( const ReferencePictureSet *pcRPS )             { m_pRPS = pcRPS;                                               }
-  const ReferencePictureSet*  getRPS()                                               { return m_pRPS;                                                }
-  ReferencePictureSet*        getLocalRPS()                                          { return &m_localRPS;                                           }
-
-  void                        setRPSidx( int rpsIdx )                                { m_rpsIdx = rpsIdx;                                            }
-  int                         getRPSidx() const                                      { return m_rpsIdx;                                              }
-  RefPicListModification*     getRefPicListModification()                            { return &m_RefPicListModification;                             }
+  void                        setRPL0(const ReferencePictureList *pcRPL)             { m_pRPL0 = pcRPL;                                             }
+  void                        setRPL1(const ReferencePictureList *pcRPL)             { m_pRPL1 = pcRPL;                                             }
+  const ReferencePictureList* getRPL0()                                              { return m_pRPL0;                                              }
+  const ReferencePictureList* getRPL1()                                              { return m_pRPL1;                                              }
+  ReferencePictureList*       getLocalRPL0()                                         { return &m_localRPL0;                                         }
+  ReferencePictureList*       getLocalRPL1()                                         { return &m_localRPL1;                                         }
+  void                        setRPL0idx(int rplIdx)                                 { m_rpl0Idx = rplIdx;                                          }
+  void                        setRPL1idx(int rplIdx)                                 { m_rpl1Idx = rplIdx;                                          }
+  int                         getRPL0idx() const                                     { return m_rpl0Idx;                                            }
+  int                         getRPL1idx() const                                     { return m_rpl1Idx;                                            }
   void                        setLastIDR(int iIDRPOC)                                { m_iLastIDR = iIDRPOC;                                         }
   int                         getLastIDR() const                                     { return m_iLastIDR;                                            }
   void                        setAssociatedIRAPPOC(int iAssociatedIRAPPOC)           { m_iAssociatedIRAP = iAssociatedIRAPPOC;                       }
@@ -1883,10 +2200,6 @@ public:
   int                         getPOC() const                                         { return m_iPOC;                                                }
   int                         getSliceQp() const                                     { return m_iSliceQp;                                            }
   bool                        getUseWeightedPrediction() const                       { return( (m_eSliceType==P_SLICE && testWeightPred()) || (m_eSliceType==B_SLICE && testWeightBiPred()) ); }
-#if HEVC_DEPENDENT_SLICES
-  bool                        getDependentSliceSegmentFlag() const                   { return m_dependentSliceSegmentFlag;                           }
-  void                        setDependentSliceSegmentFlag(bool val)                 { m_dependentSliceSegmentFlag = val;                            }
-#endif
   int                         getSliceQpDelta() const                                { return m_iSliceQpDelta;                                       }
   int                         getSliceChromaQpDelta(ComponentID compID) const        { return isLuma(compID) ? 0 : m_iSliceChromaQpDelta[compID];    }
   bool                        getUseChromaQpAdj() const                              { return m_ChromaQpAdjEnabled;                                  }
@@ -1909,25 +2222,16 @@ public:
   bool                        getIsUsedAsLongTerm(int i, int j) const                { return m_bIsUsedAsLongTerm[i][j];                             }
   void                        setIsUsedAsLongTerm(int i, int j, bool value)          { m_bIsUsedAsLongTerm[i][j] = value;                            }
   bool                        getCheckLDC() const                                    { return m_bCheckLDC;                                           }
-  bool                        getMvdL1ZeroFlag() const                               { return m_bLMvdL1Zero;                                         }
-  int                         getNumRpsCurrTempList() const;
   int                         getList1IdxToList0Idx( int list1Idx ) const            { return m_list1IdxToList0Idx[list1Idx];                        }
-#if !JVET_M0101_HLS
-  bool                        isReferenceNalu() const                                { return ((getNalUnitType() <= NAL_UNIT_RESERVED_VCL_R15) && (getNalUnitType()%2 != 0)) || ((getNalUnitType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP) && (getNalUnitType() <= NAL_UNIT_RESERVED_IRAP_VCL23) ); }
-#endif
   void                        setPOC( int i )                                        { m_iPOC              = i;                                      }
   void                        setNalUnitType( NalUnitType e )                        { m_eNalUnitType      = e;                                      }
   NalUnitType                 getNalUnitType() const                                 { return m_eNalUnitType;                                        }
   bool                        getRapPicFlag() const;
   bool                        getIdrPicFlag() const                                  { return getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP; }
-#if !JVET_M0101_HLS
-  bool                        isIRAP() const                                         { return (getNalUnitType() >= 16) && (getNalUnitType() <= 23);  }
-  bool                        isIDRorBLA() const                                      { return (getNalUnitType() >= 16) && (getNalUnitType() <= 20);  }
-#else
-  bool                        isIRAP() const { return (getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (getNalUnitType() <= NAL_UNIT_RESERVED_IRAP_VCL13); }
-  bool                        isIDRorBLA() const { return (getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP); }
-#endif
-  void                        checkCRA(const ReferencePictureSet *pReferencePictureSet, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic);
+  bool                        isIRAP() const { return (getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (getNalUnitType() <= NAL_UNIT_CODED_SLICE_CRA); }
+  bool                        isIDRorBLA() const { return (getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP); }
+  void                        checkCRA(const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, int& pocCRA, NalUnitType& associatedIRAPType, PicList& rcListPic);
+  void                        checkSTSA(PicList& rcListPic);
   void                        decodingRefreshMarking(int& pocCRA, bool& bRefreshPending, PicList& rcListPic, const bool bEfficientFieldIRAPEnabled);
   void                        setSliceType( SliceType e )                            { m_eSliceType        = e;                                      }
   void                        setSliceQp( int i )                                    { m_iSliceQp          = i;                                      }
@@ -1943,13 +2247,12 @@ public:
   void                        setPic( Picture* p )                                   { m_pcPic             = p;                                      }
   void                        setDepth( int iDepth )                                 { m_iDepth            = iDepth;                                 }
 
-  void                        setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr = false, bool bCopyL0toL1ErrorCase = false );
+  void                        constructRefPicList(PicList& rcListPic);
   void                        setRefPOCList();
 
   void                        setColFromL0Flag( bool colFromL0 )                     { m_colFromL0Flag = colFromL0;                                  }
   void                        setColRefIdx( uint32_t refIdx)                             { m_colRefIdx = refIdx;                                         }
   void                        setCheckLDC( bool b )                                  { m_bCheckLDC = b;                                              }
-  void                        setMvdL1ZeroFlag( bool b)                              { m_bLMvdL1Zero = b;                                            }
 
   void                        setBiDirPred( bool b, int refIdx0, int refIdx1 ) { m_biDirPred = b; m_symRefIdx[0] = refIdx0; m_symRefIdx[1] = refIdx1; }
   bool                        getBiDirPred() const { return m_biDirPred; }
@@ -1959,36 +2262,26 @@ public:
   bool                        isInterB() const                                       { return m_eSliceType == B_SLICE;                               }
   bool                        isInterP() const                                       { return m_eSliceType == P_SLICE;                               }
 
+  bool                        getEnableDRAPSEI () const                              { return m_enableDRAPSEI;                                       }
+  void                        setEnableDRAPSEI ( bool b )                            { m_enableDRAPSEI = b;                                          }
+  bool                        getUseLTforDRAP () const                               { return m_useLTforDRAP;                                        }
+  void                        setUseLTforDRAP ( bool b )                             { m_useLTforDRAP = b;                                           }
+  bool                        isDRAP () const                                        { return m_isDRAP;                                              }
+  void                        setDRAP ( bool b )                                     { m_isDRAP = b;                                                 }
+  void                        setLatestDRAPPOC ( int i )                             { m_latestDRAPPOC = i;                                          }
+  int                         getLatestDRAPPOC () const                              { return m_latestDRAPPOC;                                       }
+  bool                        cvsHasPreviousDRAP() const                             { return m_latestDRAPPOC != MAX_INT;                            }
+  bool                        isPocRestrictedByDRAP( int poc, bool precedingDRAPinDecodingOrder );
+  bool                        isPOCInRefPicList( const ReferencePictureList *rpl, int poc );
+  void                        checkConformanceForDRAP( uint32_t temporalId );
+
   void                        setLambdas( const double lambdas[MAX_NUM_COMPONENT] )  { for (int component = 0; component < MAX_NUM_COMPONENT; component++) m_lambdas[component] = lambdas[component]; }
   const double*               getLambdas() const                                     { return m_lambdas;                                             }
 
-  void                        setSplitConsOverrideFlag(bool b)                       { m_splitConsOverrideFlag = b; }
-  bool                        getSplitConsOverrideFlag() const                       { return m_splitConsOverrideFlag; }
-  void                        setMinQTSize(int i)                                    { m_uiMinQTSize = i; }
-  uint32_t                    getMinQTSize() const                                   { return m_uiMinQTSize; }
-  void                        setMaxBTDepth(int i)                                   { m_uiMaxBTDepth = i; }
-  uint32_t                    getMaxBTDepth() const                                  { return m_uiMaxBTDepth; }
-  void                        setMaxTTSize(int i)                                    { m_uiMaxTTSize = i; }
-  uint32_t                    getMaxTTSize() const                                   { return m_uiMaxTTSize; }
-
-  void                        setMinQTSizeIChroma(int i)                             { m_uiMinQTSizeIChroma = i; }
-  uint32_t                    getMinQTSizeIChroma() const                            { return m_uiMinQTSizeIChroma; }
-  void                        setMaxBTDepthIChroma(int i)                            { m_uiMaxBTDepthIChroma = i; }
-  uint32_t                    getMaxBTDepthIChroma() const                           { return m_uiMaxBTDepthIChroma; }
-  void                        setMaxBTSizeIChroma(int i)                             { m_uiMaxBTSizeIChroma = i; }
-  uint32_t                    getMaxBTSizeIChroma() const                            { return m_uiMaxBTSizeIChroma; }
-  void                        setMaxTTSizeIChroma(int i)                             { m_uiMaxTTSizeIChroma = i; }
-  uint32_t                    getMaxTTSizeIChroma() const                            { return m_uiMaxTTSizeIChroma; }
-  void                        setMaxBTSize(int i)                                    { m_uiMaxBTSize = i; }
-  uint32_t                        getMaxBTSize() const                                   { return m_uiMaxBTSize; }
-
-  void                        setDepQuantEnabledFlag( bool b )                       { m_depQuantEnabledFlag = b; }
-  bool                        getDepQuantEnabledFlag() const                         { return m_depQuantEnabledFlag; }
-#if HEVC_USE_SIGN_HIDING
-  void                        setSignDataHidingEnabledFlag( bool b )                 { m_signDataHidingEnabledFlag = b;              }
-  bool                        getSignDataHidingEnabledFlag() const                   { return m_signDataHidingEnabledFlag;           }
-#endif
-
+  void                        setSliceSubPicId(int i)                               { m_sliceSubPicId = i;   }
+  uint32_t                    getSliceSubPicId() const                              { return m_sliceSubPicId; }
+  uint32_t                    getCuQpDeltaSubdiv() const                             { return this->isIntra() ? m_pcPicHeader->getCuQpDeltaSubdivIntra() : m_pcPicHeader->getCuQpDeltaSubdivInter(); }
+  uint32_t                    getCuChromaQpOffsetSubdiv() const                      { return this->isIntra() ? m_pcPicHeader->getCuChromaQpOffsetSubdivIntra() : m_pcPicHeader->getCuChromaQpOffsetSubdivInter(); }
   void                        initEqualRef();
   bool                        isEqualRef( RefPicList e, int iRefIdx1, int iRefIdx2 )
   {
@@ -2016,61 +2309,34 @@ public:
   void                        setTLayer( uint32_t uiTLayer )                             { m_uiTLayer = uiTLayer;                                        }
 
   void                        checkLeadingPictureRestrictions( PicList& rcListPic )                                         const;
-  void                        applyReferencePictureSet( PicList& rcListPic, const ReferencePictureSet *RPSList)             const;
+  int                         checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList* pRPL, int rplIdx, bool printErrors, int* refPicIndex) const;
+  void                        applyReferencePictureListBasedMarking( PicList& rcListPic, const ReferencePictureList *pRPL0, const ReferencePictureList *pRPL1, const int layerId )  const;
   bool                        isTemporalLayerSwitchingPoint( PicList& rcListPic )                                           const;
   bool                        isStepwiseTemporalLayerSwitchingPointCandidate( PicList& rcListPic )                          const;
-  int                         checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool printErrors, int pocRandomAccess = 0, bool bUseRecoveryPoint = false) const;
-  void                        createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled
-                              , bool isEncodeLtRef, bool isCompositeRefEnable
-  );
-  void                        setMaxNumMergeCand(uint32_t val )                          { m_maxNumMergeCand = val;                                      }
-  uint32_t                    getMaxNumMergeCand() const                             { return m_maxNumMergeCand;                                     }
-  void                        setMaxNumAffineMergeCand( uint32_t val )               { m_maxNumAffineMergeCand = val;  }
-  uint32_t                    getMaxNumAffineMergeCand() const                       { return m_maxNumAffineMergeCand; }
-  void                        setDisFracMMVD( bool val )                             { m_disFracMMVD = val;                                          }
-  bool                        getDisFracMMVD() const                                 { return m_disFracMMVD;                                         }
-  void                        setNoOutputPriorPicsFlag( bool val )                   { m_noOutputPriorPicsFlag = val;                                }
-  bool                        getNoOutputPriorPicsFlag() const                       { return m_noOutputPriorPicsFlag;                               }
-
-  void                        setNoRaslOutputFlag( bool val )                        { m_noRaslOutputFlag = val;                                     }
-  bool                        getNoRaslOutputFlag() const                            { return m_noRaslOutputFlag;                                    }
+  int                         checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePictureList *pRPL, int rplIdx, bool printErrors)                const;
+  void                        setNoIncorrectPicOutputFlag(bool val)                  { m_noIncorrectPicOutputFlag = val;                             }
+  bool                        getNoIncorrectPicOutputFlag() const                    { return m_noIncorrectPicOutputFlag;                                    }
 
   void                        setHandleCraAsCvsStartFlag( bool val )                 { m_handleCraAsCvsStartFlag = val;                                   }
   bool                        getHandleCraAsCvsStartFlag() const                     { return m_handleCraAsCvsStartFlag;                                  }
 
-  void                        setSliceMode( SliceConstraint mode )                   { m_sliceMode = mode;                                           }
-  SliceConstraint             getSliceMode() const                                   { return m_sliceMode;                                           }
-  void                        setSliceArgument( uint32_t uiArgument )                    { m_sliceArgument = uiArgument;                                 }
-  uint32_t                        getSliceArgument() const                               { return m_sliceArgument;                                       }
-  void                        setSliceCurStartCtuTsAddr( uint32_t ctuTsAddr )            { m_sliceCurStartCtuTsAddr = ctuTsAddr;                         } // CTU Tile-scan address (as opposed to raster-scan)
-  uint32_t                        getSliceCurStartCtuTsAddr() const                      { return m_sliceCurStartCtuTsAddr;                              } // CTU Tile-scan address (as opposed to raster-scan)
-  void                        setSliceCurEndCtuTsAddr( uint32_t ctuTsAddr )              { m_sliceCurEndCtuTsAddr = ctuTsAddr;                           } // CTU Tile-scan address (as opposed to raster-scan)
-  uint32_t                        getSliceCurEndCtuTsAddr() const                        { return m_sliceCurEndCtuTsAddr;                                } // CTU Tile-scan address (as opposed to raster-scan)
+  void                        setNumTilesInSlice( uint32_t u )                       { m_sliceMap.setNumTilesInSlice( u );                                       }
+  uint32_t                    getNumTilesInSlice() const                             { return m_sliceMap.getNumTilesInSlice();                                   }
+  void                        setSliceMap( SliceMap map )                            { m_sliceMap = map;                                                         }
+  uint32_t                    getFirstCtuRsAddrInSlice() const                       { return m_sliceMap.getCtuAddrInSlice(0);                                   }
+  void                        setSliceID( uint32_t u )                               { m_sliceMap.setSliceID( u );                                               }
+  uint32_t                    getSliceID() const                                     { return m_sliceMap.getSliceID();                                           }
+  uint32_t                    getNumCtuInSlice() const                               { return m_sliceMap.getNumCtuInSlice();                                     }
+  uint32_t                    getCtuAddrInSlice( int idx ) const                     { return m_sliceMap.getCtuAddrInSlice( idx );                               }
+  void                        initSliceMap()                                         { m_sliceMap.initSliceMap();                                                }
+  void                        addCtusToSlice( uint32_t startX, uint32_t stopX, 
+                                              uint32_t startY, uint32_t stopY, 
+                                              uint32_t picWidthInCtbsY )             { m_sliceMap.addCtusToSlice(startX, stopX, startY, stopY, picWidthInCtbsY); }
   void                        setIndependentSliceIdx( uint32_t i)                        { m_independentSliceIdx = i;                                    }
   uint32_t                        getIndependentSliceIdx() const                         { return  m_independentSliceIdx;                                }
-#if HEVC_DEPENDENT_SLICES
-  void                        setSliceSegmentIdx( uint32_t i)                            { m_sliceSegmentIdx = i;                                        }
-  uint32_t                        getSliceSegmentIdx() const                             { return  m_sliceSegmentIdx;                                    }
-#endif
   void                        copySliceInfo(Slice *pcSliceSrc, bool cpyAlmostAll = true);
-#if HEVC_DEPENDENT_SLICES
-  void                        setSliceSegmentMode( SliceConstraint mode )            { m_sliceSegmentMode = mode;                                    }
-  SliceConstraint             getSliceSegmentMode() const                            { return m_sliceSegmentMode;                                    }
-  void                        setSliceSegmentArgument( uint32_t uiArgument )             { m_sliceSegmentArgument = uiArgument;                          }
-  uint32_t                        getSliceSegmentArgument() const                        { return m_sliceSegmentArgument;                                }
-#if HEVC_TILES_WPP
-  void                        setSliceSegmentCurStartCtuTsAddr( uint32_t ctuTsAddr )     { m_sliceSegmentCurStartCtuTsAddr = ctuTsAddr;                  } // CTU Tile-scan address (as opposed to raster-scan)
-  uint32_t                        getSliceSegmentCurStartCtuTsAddr() const               { return m_sliceSegmentCurStartCtuTsAddr;                       } // CTU Tile-scan address (as opposed to raster-scan)
-  void                        setSliceSegmentCurEndCtuTsAddr( uint32_t ctuTsAddr )       { m_sliceSegmentCurEndCtuTsAddr = ctuTsAddr;                    } // CTU Tile-scan address (as opposed to raster-scan)
-  uint32_t                        getSliceSegmentCurEndCtuTsAddr() const                 { return m_sliceSegmentCurEndCtuTsAddr;                         } // CTU Tile-scan address (as opposed to raster-scan)
-#endif
-#endif
   void                        setSliceBits( uint32_t uiVal )                             { m_sliceBits = uiVal;                                          }
   uint32_t                        getSliceBits() const                                   { return m_sliceBits;                                           }
-#if HEVC_DEPENDENT_SLICES
-  void                        setSliceSegmentBits( uint32_t uiVal )                      { m_sliceSegmentBits = uiVal;                                   }
-  uint32_t                        getSliceSegmentBits() const                            { return m_sliceSegmentBits;                                    }
-#endif
   void                        setFinalized( bool uiVal )                             { m_bFinalized = uiVal;                                         }
   bool                        getFinalized() const                                   { return m_bFinalized;                                          }
   bool                        testWeightPred( ) const                                { return m_bTestWeightPred;                                     }
@@ -2099,15 +2365,6 @@ public:
 
   void                        setCabacInitFlag( bool val )                           { m_cabacInitFlag = val;                                        } //!< set CABAC initial flag
   bool                        getCabacInitFlag()                               const { return m_cabacInitFlag;                                       } //!< get CABAC initial flag
-#if !JVET_M0101_HLS
-  bool                        getTemporalLayerNonReferenceFlag()               const { return m_temporalLayerNonReferenceFlag;                       }
-  void                        setTemporalLayerNonReferenceFlag(bool x)               { m_temporalLayerNonReferenceFlag = x;                          }
-#endif
-  void                        setLFCrossSliceBoundaryFlag( bool   val )              { m_LFCrossSliceBoundaryFlag = val;                             }
-  bool                        getLFCrossSliceBoundaryFlag()                    const { return m_LFCrossSliceBoundaryFlag;                            }
-
-  void                        setEnableTMVPFlag( bool   b )                          { m_enableTMVPFlag = b;                                         }
-  bool                        getEnableTMVPFlag() const                              { return m_enableTMVPFlag;                                      }
 
   void                        setEncCABACTableIdx( SliceType idx )                   { m_encCABACTableIdx = idx;                                     }
   SliceType                   getEncCABACTableIdx() const                            { return m_encCABACTableIdx;                                    }
@@ -2126,20 +2383,39 @@ public:
   void resetProcessingTime()       { m_dProcessingTime = m_iProcessingStartTime = 0; }
   double getProcessingTime() const { return m_dProcessingTime; }
 
-  bool                        getTileGroupAlfEnabledFlag() const { return m_tileGroupAlfEnabledFlag; }
-  void                        setTileGroupAlfEnabledFlag(bool b) { m_tileGroupAlfEnabledFlag = b; }
+  void                        resetTileGroupAlfEnabledFlag() { memset(m_tileGroupAlfEnabledFlag, 0, sizeof(m_tileGroupAlfEnabledFlag)); }
+  bool                        getTileGroupAlfEnabledFlag(ComponentID compId) const { return m_tileGroupAlfEnabledFlag[compId]; }
+  void                        setTileGroupAlfEnabledFlag(ComponentID compId, bool b) { m_tileGroupAlfEnabledFlag[compId] = b; }
+  int                         getTileGroupNumAps() const { return m_tileGroupNumAps; }
+  void                        setTileGroupNumAps(int i) { m_tileGroupNumAps = i; }
+  int                         getTileGroupApsIdChroma() const { return m_tileGroupChromaApsId; }
+  void                        setTileGroupApsIdChroma(int i) { m_tileGroupChromaApsId = i; }
+  std::vector<int32_t>        getTileGroupApsIdLuma() const { return m_tileGroupLumaApsId; }
+  void                        setAlfAPSs(std::vector<int> ApsIDs)
+  {
+    m_tileGroupLumaApsId.resize(m_tileGroupNumAps);
+    for (int i = 0; i < m_tileGroupNumAps; i++)
+    {
+      m_tileGroupLumaApsId[i] = ApsIDs[i];
+    }
+  }
+  void                        setDisableSATDForRD(bool b) { m_disableSATDForRd = b; }
+  bool                        getDisableSATDForRD() { return m_disableSATDForRd; }
+  void                        scaleRefPicList( Picture *scaledRefPic[ ], PicHeader *picHeader, APS** apss, APS* lmcsAps, APS* scalingListAps, const bool isDecoder );
+  void                        freeScaledRefPicList( Picture *scaledRefPic[] );
+  bool                        checkRPR();
+  const std::pair<int, int>&  getScalingRatio( const RefPicList refPicList, const int refIdx )  const { CHECK( refIdx < 0, "Invalid reference index" ); return m_scalingRatio[refPicList][refIdx]; }
+  void                        setNumEntryPoints( const PPS *pps );
+  uint32_t                    getNumEntryPoints( ) const { return m_numEntryPoints;  }
 
-  const SliceReshapeInfo&     getReshapeInfo() const { return m_sliceReshapeInfo; }
-        SliceReshapeInfo&     getReshapeInfo()       { return m_sliceReshapeInfo; }
 protected:
-  Picture*              xGetRefPic        (PicList& rcListPic, int poc);
-  Picture*              xGetLongTermRefPic(PicList& rcListPic, int poc, bool pocHasMsb);
+  Picture*              xGetRefPic( PicList& rcListPic, int poc, const int layerId );
+  Picture*              xGetLongTermRefPic( PicList& rcListPic, int poc, bool pocHasMsb, const int layerId );
+public:
+  std::unordered_map< Position, std::unordered_map< Size, double> > m_mapPltCost;
+private:
 };// END CLASS DEFINITION Slice
 
-
-
-
-
 void calculateParameterSetChangedFlag(bool &bChanged, const std::vector<uint8_t> *pOldData, const std::vector<uint8_t> *pNewData);
 
 template <class T> class ParameterSetMap
@@ -2155,9 +2431,10 @@ public:
 
   ParameterSetMap(int maxId)
   :m_maxId (maxId)
-  ,m_activePsId(-1)
   ,m_lastActiveParameterSet(NULL)
-  {}
+  {
+    m_activePsId.clear();
+  }
 
   ~ParameterSetMap()
   {
@@ -2182,6 +2459,11 @@ public:
     return m_paramsetMap[psId].parameterSet;
   }
 
+  void clearMap()
+  {
+    m_paramsetMap.clear();
+  }
+
   void storePS(int psId, T *ps, const std::vector<uint8_t> *pNaluData)
   {
     CHECK( psId >= m_maxId, "Invalid PS id" );
@@ -2199,7 +2481,7 @@ public:
         return;
       }
 
-      if( m_activePsId == psId )
+      if (find(m_activePsId.begin(), m_activePsId.end(), psId) != m_activePsId.end())
       {
         std::swap( m_paramsetMap[psId].parameterSet, m_lastActiveParameterSet );
       }
@@ -2224,6 +2506,39 @@ public:
     }
   }
 
+  void checkAuApsContent( APS *aps, std::vector<int>& accessUnitApsNals )
+  {
+    int apsId = ( aps->getAPSId() << NUM_APS_TYPE_LEN ) + (int)aps->getAPSType();
+
+    if( std::find( accessUnitApsNals.begin(), accessUnitApsNals.end(), apsId ) != accessUnitApsNals.end() )
+    {
+      CHECK( m_paramsetMap.find( apsId ) == m_paramsetMap.end(), "APS does not exist" );
+      APS* existedAPS = m_paramsetMap[apsId].parameterSet;
+
+      if( aps->getAPSType() == LMCS_APS )
+      {
+        CHECK( aps->getReshaperAPSInfo() != existedAPS->getReshaperAPSInfo(), "All APS NAL units with a particular value of adaptation_parameter_set_id and a particular value of aps_params_type within an access unit shall have the same content" );
+      }
+      else if( aps->getAPSType() == ALF_APS )
+      {
+        CHECK( aps->getAlfAPSParam() != existedAPS->getAlfAPSParam(), "All APS NAL units with a particular value of adaptation_parameter_set_id and a particular value of aps_params_type within an access unit shall have the same content" );
+      }
+      else if( aps->getAPSType() == SCALING_LIST_APS )
+      {
+        CHECK( aps->getScalingList() != existedAPS->getScalingList(), "All APS NAL units with a particular value of adaptation_parameter_set_id and a particular value of aps_params_type within an access unit shall have the same content" );
+      }
+      else
+      {
+        CHECK( true, "Wrong APS type" );
+      }
+    }
+    else
+    {
+      accessUnitApsNals.push_back( apsId );
+    }
+  }
+
+
   void setChangedFlag(int psId, bool bChanged=true)
   {
     if ( m_paramsetMap.find(psId) != m_paramsetMap.end() )
@@ -2267,32 +2582,32 @@ public:
     return (m_paramsetMap.begin() == m_paramsetMap.end() ) ? NULL : m_paramsetMap.begin()->second.parameterSet;
   }
 
-  void setActive(int psId ) { m_activePsId = psId;}
+  void setActive(int psId) { m_activePsId.push_back(psId); }
+  void clear() { m_activePsId.clear(); }
 
 private:
   std::map<int,MapData<T> > m_paramsetMap;
   int                       m_maxId;
-  int                       m_activePsId;
+  std::vector<int>          m_activePsId;
   T*                        m_lastActiveParameterSet;
   static void setID(T* parameterSet, const int psId);
 };
 
-
 class ParameterSetManager
 {
 public:
                  ParameterSetManager();
   virtual        ~ParameterSetManager();
 
-#if HEVC_VPS
-  //! store sequence parameter set and take ownership of it
-  void           storeVPS(VPS *vps, const std::vector<uint8_t> &naluData)      { m_vpsMap.storePS( vps->getVPSId(), vps, &naluData); };
+  void           storeVPS(VPS *vps, const std::vector<uint8_t> &naluData)    { m_vpsMap.storePS(vps->getVPSId(), vps, &naluData); }
+  VPS*           getVPS( int vpsId )                                         { return m_vpsMap.getPS( vpsId ); };
+
+  void           storeDPS(DPS *dps, const std::vector<uint8_t> &naluData)    { m_dpsMap.storePS( dps->getDecodingParameterSetId(), dps, &naluData); };
   //! get pointer to existing video parameter set
-  VPS*           getVPS(int vpsId)                                           { return m_vpsMap.getPS(vpsId); };
-  bool           getVPSChangedFlag(int vpsId) const                          { return m_vpsMap.getChangedFlag(vpsId); }
-  void           clearVPSChangedFlag(int vpsId)                              { m_vpsMap.clearChangedFlag(vpsId); }
-  VPS*           getFirstVPS()                                               { return m_vpsMap.getFirstPS(); };
-#endif
+  DPS*           getDPS(int dpsId)                                           { return m_dpsMap.getPS(dpsId); };
+  bool           getDPSChangedFlag(int dpsId) const                          { return m_dpsMap.getChangedFlag(dpsId); }
+  void           clearDPSChangedFlag(int dpsId)                              { m_dpsMap.clearChangedFlag(dpsId); }
+  DPS*           getFirstDPS()                                               { return m_dpsMap.getFirstPS(); };
 
   //! store sequence parameter set and take ownership of it
   void           storeSPS(SPS *sps, const std::vector<uint8_t> &naluData) { m_spsMap.storePS( sps->getSPSId(), sps, &naluData); };
@@ -2314,37 +2629,34 @@ public:
   //! \returns true, if activation is successful
   // bool           activateSPSWithSEI(int SPSId);
 
-#if HEVC_VPS
-  //! activate a PPS and depending on isIDR parameter also SPS and VPS
-#else
   //! activate a PPS and depending on isIDR parameter also SPS
-#endif
   //! \returns true, if activation is successful
   bool           activatePPS(int ppsId, bool isIRAP);
-
-  void           storeAPS(APS *aps, const std::vector<uint8_t> &naluData)    { m_apsMap.storePS(aps->getAPSId(), aps, &naluData); };
-  APS*           getAPS(int apsId)                                           { return m_apsMap.getPS(apsId);                      };
-  bool           getAPSChangedFlag(int apsId) const                          { return m_apsMap.getChangedFlag(apsId);             }
-  void           clearAPSChangedFlag(int apsId)                              { m_apsMap.clearChangedFlag(apsId);                  }
-  APS*           getFirstAPS()                                               { return m_apsMap.getFirstPS();                      };
-  bool           activateAPS(int apsId);
-#if HEVC_VPS
-  const VPS*     getActiveVPS()const                                         { return m_vpsMap.getPS(m_activeVPSId); };
-#endif
+  APS**          getAPSs() { return &m_apss[0]; }
+  ParameterSetMap<APS>* getApsMap() { return &m_apsMap; }
+  void           storeAPS(APS *aps, const std::vector<uint8_t> &naluData)    { m_apsMap.storePS(aps->getAPSId() + (MAX_NUM_APS * aps->getAPSType()), aps, &naluData); };
+  APS*           getAPS(int apsId, int apsType)                              { return m_apsMap.getPS(apsId + (MAX_NUM_APS * apsType)); };
+  bool           getAPSChangedFlag(int apsId, int apsType) const             { return m_apsMap.getChangedFlag(apsId + (MAX_NUM_APS * apsType)); }
+  void           clearAPSChangedFlag(int apsId, int apsType)                 { m_apsMap.clearChangedFlag(apsId + ( MAX_NUM_APS * apsType)); }
+  APS*           getFirstAPS()                                               { return m_apsMap.getFirstPS(); };
+  bool           activateAPS(int apsId, int apsType);
   const SPS*     getActiveSPS()const                                         { return m_spsMap.getPS(m_activeSPSId); };
+  const DPS*     getActiveDPS()const                                         { return m_dpsMap.getPS(m_activeDPSId); };
+
+  void           checkAuApsContent( APS *aps, std::vector<int>& accessUnitApsNals ) { m_apsMap.checkAuApsContent( aps, accessUnitApsNals ); }
 
 protected:
-#if HEVC_VPS
-  ParameterSetMap<VPS> m_vpsMap;
-#endif
   ParameterSetMap<SPS> m_spsMap;
   ParameterSetMap<PPS> m_ppsMap;
   ParameterSetMap<APS> m_apsMap;
+  ParameterSetMap<DPS> m_dpsMap;
+  ParameterSetMap<VPS> m_vpsMap;
 
-#if HEVC_VPS
-  int m_activeVPSId; // -1 for nothing active
-#endif
+  APS* m_apss[ALF_CTB_MAX_NUM_APS];
+
+  int m_activeDPSId; // -1 for nothing active
   int m_activeSPSId; // -1 for nothing active
+  int m_activeVPSId; // -1 for nothing active
 };
 
 class PreCalcValues
@@ -2357,28 +2669,28 @@ public:
     , maxCUHeight         ( sps.getMaxCUHeight() )
     , maxCUWidthMask      ( maxCUWidth  - 1 )
     , maxCUHeightMask     ( maxCUHeight - 1 )
-    , maxCUWidthLog2      ( g_aucLog2[ maxCUWidth  ] )
-    , maxCUHeightLog2     ( g_aucLog2[ maxCUHeight ] )
+    , maxCUWidthLog2      ( floorLog2( maxCUWidth  ) )
+    , maxCUHeightLog2     ( floorLog2( maxCUHeight ) )
     , minCUWidth          ( sps.getMaxCUWidth()  >> sps.getMaxCodingDepth() )
     , minCUHeight         ( sps.getMaxCUHeight() >> sps.getMaxCodingDepth() )
-    , minCUWidthLog2      ( g_aucLog2[ minCUWidth  ] )
-    , minCUHeightLog2     ( g_aucLog2[ minCUHeight ] )
+    , minCUWidthLog2      ( floorLog2( minCUWidth  ) )
+    , minCUHeightLog2     ( floorLog2( minCUHeight ) )
     , partsInCtuWidth     ( 1 << sps.getMaxCodingDepth() )
     , partsInCtuHeight    ( 1 << sps.getMaxCodingDepth() )
     , partsInCtu          ( 1 << (sps.getMaxCodingDepth() << 1) )
-    , widthInCtus         ( (sps.getPicWidthInLumaSamples () + sps.getMaxCUWidth () - 1) / sps.getMaxCUWidth () )
-    , heightInCtus        ( (sps.getPicHeightInLumaSamples() + sps.getMaxCUHeight() - 1) / sps.getMaxCUHeight() )
+    , widthInCtus         ( (pps.getPicWidthInLumaSamples () + sps.getMaxCUWidth () - 1) / sps.getMaxCUWidth () )
+    , heightInCtus        ( (pps.getPicHeightInLumaSamples() + sps.getMaxCUHeight() - 1) / sps.getMaxCUHeight() )
     , sizeInCtus          ( widthInCtus * heightInCtus )
-    , lumaWidth           ( sps.getPicWidthInLumaSamples() )
-    , lumaHeight          ( sps.getPicHeightInLumaSamples() )
+    , lumaWidth           ( pps.getPicWidthInLumaSamples() )
+    , lumaHeight          ( pps.getPicHeightInLumaSamples() )
     , fastDeltaQPCuMaxSize( Clip3(sps.getMaxCUHeight() >> (sps.getLog2DiffMaxMinCodingBlockSize()), sps.getMaxCUHeight(), 32u) )
     , noChroma2x2         (  false )
     , isEncoder           ( _isEncoder )
     , ISingleTree         ( !sps.getUseDualITree() )
-    , maxBtDepth          { sps.getMaxBTDepthI(), sps.getMaxBTDepth(), sps.getMaxBTDepthIChroma() }
-    , minBtSize           { MIN_BT_SIZE, MIN_BT_SIZE_INTER, MIN_BT_SIZE_C }
+    , maxBtDepth          { sps.getMaxMTTHierarchyDepthI(), sps.getMaxMTTHierarchyDepth(), sps.getMaxMTTHierarchyDepthIChroma() }
+    , minBtSize           { 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize() }
     , maxBtSize           { sps.getMaxBTSizeI(), sps.getMaxBTSize(), sps.getMaxBTSizeIChroma() }
-    , minTtSize           { MIN_TT_SIZE, MIN_TT_SIZE_INTER, MIN_TT_SIZE_C }
+    , minTtSize           { 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize(), 1u << sps.getLog2MinCodingBlockSize() }
     , maxTtSize           { sps.getMaxTTSizeI(), sps.getMaxTTSize(), sps.getMaxTTSizeIChroma() }
     , minQtSize           { sps.getMinQTSize(I_SLICE, CHANNEL_TYPE_LUMA), sps.getMinQTSize(B_SLICE, CHANNEL_TYPE_LUMA), sps.getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA) }
   {}
@@ -2429,12 +2741,12 @@ public:
 };
 
 #if ENABLE_TRACING
-#if HEVC_VPS
 void xTraceVPSHeader();
-#endif
+void xTraceDPSHeader();
 void xTraceSPSHeader();
 void xTracePPSHeader();
 void xTraceAPSHeader();
+void xTracePictureHeader();
 void xTraceSliceHeader();
 void xTraceAccessUnitDelimiter();
 #endif
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index 8c701503e3009991f614e59fe2978e5dec32badd..c4221f13644fe2ea8a3c9d4c2c2e37897cb0bd1e 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -83,6 +83,78 @@ InvTrans *fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
 //! \ingroup CommonLib
 //! \{
 
+static inline int64_t square( const int d ) { return d * (int64_t)d; }
+
+template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf &resCb, const PelBuf &resCr, PelBuf& resC1, PelBuf& resC2 )
+{
+  const Pel*  cb  = resCb.buf;
+  const Pel*  cr  = resCr.buf;
+  Pel*        c1  = resC1.buf;
+  Pel*        c2  = resC2.buf;
+  int64_t     d1  = 0;
+  int64_t     d2  = 0;
+  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
+  {
+    for( SizeType x = 0; x < resCb.width; x++ )
+    {
+      int cbx = cb[x], crx = cr[x];
+      if      ( signedMode ==  1 )
+      {
+        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
+        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
+      }
+      else if ( signedMode == -1 )
+      {
+        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
+        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
+      }
+      else if ( signedMode ==  2 )
+      {
+        c1[x] = Pel( ( cbx + crx ) / 2 );
+        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
+      }
+      else if ( signedMode == -2 )
+      {
+        c1[x] = Pel( ( cbx - crx ) / 2 );
+        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
+      }
+      else if ( signedMode ==  3 )
+      {
+        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
+        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
+      }
+      else if ( signedMode == -3 )
+      {
+        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
+        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
+      }
+      else
+      {
+        d1   += square( cbx );
+        d2   += square( crx );
+      }
+    }
+  }
+  return std::make_pair(d1,d2);
+}
+
+template<int signedMode> void invTransformCbCr( PelBuf &resCb, PelBuf &resCr )
+{
+  Pel*  cb  = resCb.buf;
+  Pel*  cr  = resCr.buf;
+  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
+  {
+    for( SizeType x = 0; x < resCb.width; x++ )
+    {
+      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
+      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
+      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
+      else if ( signedMode == -2 )  { cr[x] = (cb[x] == -32768 && sizeof(Pel) == 2) ? 32767 : -cb[x]; }   // non-normative clipping to prevent 16-bit overflow
+      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
+      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
+    }
+  }
+}
 
 // ====================================================================================================================
 // TrQuant class member functions
@@ -90,11 +162,23 @@ InvTrans *fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
 TrQuant::TrQuant() : m_quant( nullptr )
 {
   // allocate temporary buffers
-  m_plTempCoeff   = (TCoeff*) xMalloc( TCoeff, MAX_CU_SIZE * MAX_CU_SIZE );
-  m_mtsCoeffs = new TCoeff*[ NUM_TRAFO_MODES_MTS ];
-  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
   {
-    m_mtsCoeffs[i] = (TCoeff*) xMalloc( TCoeff, MAX_CU_SIZE * MAX_CU_SIZE );
+    m_invICT      = m_invICTMem + maxAbsIctMode;
+    m_invICT[ 0]  = invTransformCbCr< 0>;
+    m_invICT[ 1]  = invTransformCbCr< 1>;
+    m_invICT[-1]  = invTransformCbCr<-1>;
+    m_invICT[ 2]  = invTransformCbCr< 2>;
+    m_invICT[-2]  = invTransformCbCr<-2>;
+    m_invICT[ 3]  = invTransformCbCr< 3>;
+    m_invICT[-3]  = invTransformCbCr<-3>;
+    m_fwdICT      = m_fwdICTMem + maxAbsIctMode;
+    m_fwdICT[ 0]  = fwdTransformCbCr< 0>;
+    m_fwdICT[ 1]  = fwdTransformCbCr< 1>;
+    m_fwdICT[-1]  = fwdTransformCbCr<-1>;
+    m_fwdICT[ 2]  = fwdTransformCbCr< 2>;
+    m_fwdICT[-2]  = fwdTransformCbCr<-2>;
+    m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
+    m_fwdICT[-3]  = fwdTransformCbCr<-3>;
   }
 }
 
@@ -105,23 +189,6 @@ TrQuant::~TrQuant()
     delete m_quant;
     m_quant = nullptr;
   }
-
-  // delete temporary buffers
-  if ( m_plTempCoeff )
-  {
-    xFree( m_plTempCoeff );
-    m_plTempCoeff = nullptr;
-  }
-  if( m_mtsCoeffs )
-  {
-    for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
-    {
-      xFree( m_mtsCoeffs[i] );
-      m_mtsCoeffs[i] = nullptr;
-    }
-    delete[] m_mtsCoeffs;
-    m_mtsCoeffs = nullptr;
-  }
 }
 
 #if ENABLE_SPLIT_PARALLELISM
@@ -146,23 +213,15 @@ void TrQuant::init( const Quant* otherQuant,
 #if T0196_SELECTIVE_RDOQ
                     const bool useSelectiveRDOQ,
 #endif
-                    const bool bEnc,
-                    const bool useTransformSkipFast
+                    const bool bEnc
 )
 {
-  m_uiMaxTrSize          = uiMaxTrSize;
-  m_bEnc                 = bEnc;
-  m_useTransformSkipFast = useTransformSkipFast;
-
   delete m_quant;
   m_quant = nullptr;
 
-  if( bUseRDOQ || !bEnc )
   {
     m_quant = new DepQuant( otherQuant, bEnc );
   }
-  else
-    m_quant = new Quant( otherQuant );
 
   if( m_quant )
   {
@@ -170,41 +229,288 @@ void TrQuant::init( const Quant* otherQuant,
   }
 }
 
+void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
+{
+  const int8_t* trMat  = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
+  const int     trSize = ( size > 4 ) ? 48 : 16;
+  int           coef;
+  int*          out    = dst;
 
+  assert( index < 3 );
 
-void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQP )
+  for( int j = 0; j < zeroOutSize; j++ )
+  {
+    int*          srcPtr   = src;
+    const int8_t* trMatTmp = trMat;
+    coef = 0;
+    for( int i = 0; i < trSize; i++ )
+    {
+      coef += *srcPtr++ * *trMatTmp++;
+    }
+    *out++ = ( coef + 64 ) >> 7;
+    trMat += trSize;
+  }
+
+  ::memset( out, 0, ( trSize - zeroOutSize ) * sizeof( int ) );
+}
+
+void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
 {
-  const CompArea &area    = tu.blocks[compID];
-  const uint32_t uiWidth      = area.width;
-  const uint32_t uiHeight     = area.height;
+  int             maxLog2TrDynamicRange =  15;
+  const TCoeff    outputMinimum         = -( 1 << maxLog2TrDynamicRange );
+  const TCoeff    outputMaximum         =  ( 1 << maxLog2TrDynamicRange ) - 1;
+  const int8_t*   trMat                 =  ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
+  const int       trSize                =  ( size > 4 ) ? 48 : 16;
+  int             resi;
+  int*            out                   =  dst;
 
-#if MAX_TB_SIZE_SIGNALLING
-  CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
-#else
-  CHECK( uiWidth > MAX_TB_SIZEY || uiHeight > MAX_TB_SIZEY, "Maximal allowed transformation size exceeded!" );
-#endif
-  if (tu.cu->transQuantBypass)
-  {
-    // where should this logic go?
-    const bool rotateResidual = TU::isNonTransformedResidualRotated(tu, compID);
-    const CCoeffBuf pCoeff    = tu.getCoeffs(compID);
+  assert( index < 3 );
 
-    for (uint32_t y = 0, coefficientIndex = 0; y < uiHeight; y++)
+  for( int j = 0; j < trSize; j++ )
+  {
+    resi = 0;
+    const int8_t* trMatTmp = trMat;
+    int*          srcPtr   = src;
+    for( int i = 0; i < zeroOutSize; i++ )
     {
-      for (uint32_t x = 0; x < uiWidth; x++, coefficientIndex++)
-      {
-        pResi.at(x, y) = rotateResidual ? pCoeff.at(pCoeff.width - x - 1, pCoeff.height - y - 1) : pCoeff.at(x, y);
-      }
+      resi += *srcPtr++ * *trMatTmp;
+      trMatTmp += trSize;
     }
+    *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
+    trMat++;
+  }
+}
+
+uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode )
+{
+  uint32_t intraMode;
+
+  if( wideAngPredMode < 0 )
+  {
+    intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) + NUM_LUMA_MODE );
+  }
+  else if( wideAngPredMode >= NUM_LUMA_MODE )
+  {
+    intraMode = ( uint32_t ) ( wideAngPredMode + ( NUM_EXT_LUMA_MODE >> 1 ) );
   }
   else
   {
-    CoeffBuf tempCoeff = CoeffBuf( m_plTempCoeff, area );
+    intraMode = ( uint32_t ) wideAngPredMode;
+  }
+
+  return intraMode;
+}
+
+bool TrQuant::getTransposeFlag( uint32_t intraMode )
+{
+  return ( ( intraMode >= NUM_LUMA_MODE ) && ( intraMode >= ( NUM_LUMA_MODE + ( NUM_EXT_LUMA_MODE >> 1 ) ) ) ) ||
+         ( ( intraMode <  NUM_LUMA_MODE ) && ( intraMode >  DIA_IDX ) );
+}
+
+void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
+{
+  const CompArea& area     = tu.blocks[ compID ];
+  const uint32_t  width    = area.width;
+  const uint32_t  height   = area.height;
+  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
+
+  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4)
+  {
+    const bool whge3 = width >= 8 && height >= 8;
+    const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
+    uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
+
+    if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
+    {
+      intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
+    }
+    if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+    {
+      intraMode = PLANAR_IDX;
+    }
+    CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
+
+    if( lfnstIdx < 3 )
+    {
+      intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) );
+#if RExt__DECODER_DEBUG_TOOL_STATISTICS
+      CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType { STATS__TOOL_LFNST, width, height, compID } );
+#endif
+      bool          transposeFlag   = getTransposeFlag( intraMode );
+      const int     sbSize          = whge3 ? 8 : 4;
+      bool          tu4x4Flag       = ( width == 4 && height == 4 );
+      bool          tu8x8Flag       = ( width == 8 && height == 8 );
+      TCoeff*       lfnstTemp;
+      TCoeff*       coeffTemp;
+          int y;
+          lfnstTemp = m_tempInMatrix; // inverse spectral rearrangement
+          coeffTemp = m_tempCoeff;
+          TCoeff * dst = lfnstTemp;
+          const ScanElement * scanPtr = scan;
+          for( y = 0; y < 16; y++ )
+          {
+            *dst++ = coeffTemp[ scanPtr->idx ];
+            scanPtr++;
+          }
+
+          invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
+
+          lfnstTemp = m_tempOutMatrix; // inverse spectral rearrangement
+
+          if( transposeFlag )
+          {
+            if( sbSize == 4 )
+            {
+              for( y = 0; y < 4; y++ )
+              {
+                coeffTemp[ 0 ] = lfnstTemp[ 0 ];  coeffTemp[ 1 ] = lfnstTemp[  4 ];
+                coeffTemp[ 2 ] = lfnstTemp[ 8 ];  coeffTemp[ 3 ] = lfnstTemp[ 12 ];
+                lfnstTemp++;
+                coeffTemp += width;
+              }
+            }
+            else // ( sbSize == 8 )
+            {
+              for( y = 0; y < 8; y++ )
+              {
+                coeffTemp[ 0 ] = lfnstTemp[  0 ];  coeffTemp[ 1 ] = lfnstTemp[  8 ];
+                coeffTemp[ 2 ] = lfnstTemp[ 16 ];  coeffTemp[ 3 ] = lfnstTemp[ 24 ];
+                if( y < 4 )
+                {
+                  coeffTemp[ 4 ] = lfnstTemp[ 32 ];  coeffTemp[ 5 ] = lfnstTemp[ 36 ];
+                  coeffTemp[ 6 ] = lfnstTemp[ 40 ];  coeffTemp[ 7 ] = lfnstTemp[ 44 ];
+                }
+                lfnstTemp++;
+                coeffTemp += width;
+              }
+            }
+          }
+          else
+          {
+            for( y = 0; y < sbSize; y++ )
+            {
+              uint32_t uiStride = ( y < 4 ) ? sbSize : 4;
+              ::memcpy( coeffTemp, lfnstTemp, uiStride * sizeof( TCoeff ) );
+              lfnstTemp += uiStride;
+              coeffTemp += width;
+            }
+          }
+    }
+  }
+}
+
+void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr )
+{
+  const CompArea& area     = tu.blocks[ compID ];
+  const uint32_t  width    = area.width;
+  const uint32_t  height   = area.height;
+  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
+
+  if( lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4)
+  {
+    const bool whge3 = width >= 8 && height >= 8;
+    const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
+    uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
+
+    if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
+    {
+      intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
+    }
+    if (PU::isMIP(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+    {
+      intraMode = PLANAR_IDX;
+    }
+    CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
+
+    if( lfnstIdx < 3 )
+    {
+      intraMode = getLFNSTIntraMode( PU::getWideAngIntraMode( tu, intraMode, compID ) );
+
+      bool            transposeFlag   = getTransposeFlag( intraMode );
+      const int       sbSize          = whge3 ? 8 : 4;
+      bool            tu4x4Flag       = ( width == 4 && height == 4 );
+      bool            tu8x8Flag       = ( width == 8 && height == 8 );
+      TCoeff*         lfnstTemp;
+      TCoeff*         coeffTemp;
+      TCoeff *        tempCoeff = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_tempCoeff;
+
+          int y;
+          lfnstTemp = m_tempInMatrix; // forward low frequency non-separable transform
+          coeffTemp = tempCoeff;
+
+          if( transposeFlag )
+          {
+            if( sbSize == 4 )
+            {
+              for( y = 0; y < 4; y++ )
+              {
+                lfnstTemp[ 0 ] = coeffTemp[ 0 ];  lfnstTemp[  4 ] = coeffTemp[ 1 ];
+                lfnstTemp[ 8 ] = coeffTemp[ 2 ];  lfnstTemp[ 12 ] = coeffTemp[ 3 ];
+                lfnstTemp++;
+                coeffTemp += width;
+              }
+            }
+            else // ( sbSize == 8 )
+            {
+              for( y = 0; y < 8; y++ )
+              {
+                lfnstTemp[  0 ] = coeffTemp[ 0 ];  lfnstTemp[  8 ] = coeffTemp[ 1 ];
+                lfnstTemp[ 16 ] = coeffTemp[ 2 ];  lfnstTemp[ 24 ] = coeffTemp[ 3 ];
+                if( y < 4 )
+                {
+                  lfnstTemp[ 32 ] = coeffTemp[ 4 ];  lfnstTemp[ 36 ] = coeffTemp[ 5 ];
+                  lfnstTemp[ 40 ] = coeffTemp[ 6 ];  lfnstTemp[ 44 ] = coeffTemp[ 7 ];
+                }
+                lfnstTemp++;
+                coeffTemp += width;
+              }
+            }
+          }
+          else
+          {
+            for( y = 0; y < sbSize; y++ )
+            {
+              uint32_t uiStride = ( y < 4 ) ? sbSize : 4;
+              ::memcpy( lfnstTemp, coeffTemp, uiStride * sizeof( TCoeff ) );
+              lfnstTemp += uiStride;
+              coeffTemp += width;
+            }
+          }
+
+          fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
+
+          lfnstTemp = m_tempOutMatrix; // forward spectral rearrangement
+          coeffTemp = tempCoeff;
+          const ScanElement * scanPtr = scan;
+          int lfnstCoeffNum = ( sbSize == 4 ) ? sbSize * sbSize : 48;
+          for( y = 0; y < lfnstCoeffNum; y++ )
+          {
+            coeffTemp[ scanPtr->idx ] = *lfnstTemp++;
+            scanPtr++;
+          }
+    }
+  }
+}
+
+
+void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQP )
+{
+  const CompArea &area    = tu.blocks[compID];
+  const uint32_t uiWidth      = area.width;
+  const uint32_t uiHeight     = area.height;
+
+  CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
+    CoeffBuf tempCoeff = CoeffBuf(m_tempCoeff, area);
     xDeQuant( tu, tempCoeff, compID, cQP );
 
     DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
 
-    if( isLuma(compID) && tu.mtsIdx == 1 )
+    if( tu.cs->sps->getUseLFNST() )
+    {
+      xInvLfnst( tu, compID );
+    }
+
+    if( tu.mtsIdx[compID] == MTS_SKIP )
     {
       xITransformSkip( tempCoeff, pResi, tu, compID );
     }
@@ -212,7 +518,6 @@ void TrQuant::invTransformNxN( TransformUnit &tu, const ComponentID &compID, Pel
     {
       xIT( tu, compID, tempCoeff, pResi );
     }
-  }
 
   //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID);
   DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID);
@@ -223,7 +528,7 @@ void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf &
 {
   const CompArea &area    = tu.blocks[compID];
 
-  if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx==1 || tu.cu->transQuantBypass))
+  if (CU::isRDPCMEnabled(*tu.cu) && (tu.mtsIdx[compID] == MTS_SKIP))
   {
     const uint32_t uiWidth  = area.width;
     const uint32_t uiHeight = area.height;
@@ -277,25 +582,117 @@ void TrQuant::invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf &
   }
 }
 
+
+std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit &tu, const PelBuf &resCb, const PelBuf &resCr, PelBuf &resC1, PelBuf &resC2, int jointCbCr )
+{
+  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
+  CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" );
+  CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" );
+  return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 );
+}
+
+void TrQuant::invTransformICT( const TransformUnit &tu, PelBuf &resCb, PelBuf &resCr )
+{
+  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
+  (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr );
+}
+
+std::vector<int> TrQuant::selectICTCandidates( const TransformUnit &tu, CompStorage* resCb, CompStorage* resCr )
+{
+  CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" );
+
+  if( !CU::isIntra( *tu.cu ) )
+  {
+    int cbfMask = 3;
+    resCb[cbfMask].create( tu.blocks[COMPONENT_Cb] );
+    resCr[cbfMask].create( tu.blocks[COMPONENT_Cr] );
+    fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
+    std::vector<int> cbfMasksToTest;
+    cbfMasksToTest.push_back( cbfMask );
+    return cbfMasksToTest;
+  }
+
+  std::pair<int64_t,int64_t> pairDist[4];
+  for( int cbfMask = 0; cbfMask < 4; cbfMask++ )
+  {
+    if( cbfMask )
+    {
+      CHECK( resCb[cbfMask].valid() || resCr[cbfMask].valid(), "target components for cbfMask=" << cbfMask << " are already present" );
+      resCb[cbfMask].create( tu.blocks[COMPONENT_Cb] );
+      resCr[cbfMask].create( tu.blocks[COMPONENT_Cr] );
+    }
+    pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
+  }
+
+  std::vector<int> cbfMasksToTest;
+  int64_t minDist1  = std::min<int64_t>( pairDist[0].first, pairDist[0].second );
+  int64_t minDist2  = std::numeric_limits<int64_t>::max();
+  int     cbfMask1  = 0;
+  int     cbfMask2  = 0;
+  for( int cbfMask : { 1, 2, 3 } )
+  {
+    if( pairDist[cbfMask].first < minDist1 )
+    {
+      cbfMask2  = cbfMask1; minDist2  = minDist1;
+      cbfMask1  = cbfMask;  minDist1  = pairDist[cbfMask1].first;
+    }
+    else if( pairDist[cbfMask].first < minDist2 )
+    {
+      cbfMask2  = cbfMask;  minDist2  = pairDist[cbfMask2].first;
+    }
+  }
+  if( cbfMask1 )
+  {
+    cbfMasksToTest.push_back( cbfMask1 );
+  }
+  if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) )
+  {
+    cbfMasksToTest.push_back( cbfMask2 );
+  }
+
+  return cbfMasksToTest;
+}
+
+
+
 // ------------------------------------------------------------------------------------------------
 // Logical transform
 // ------------------------------------------------------------------------------------------------
 
-void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer )
+void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer)
 {
-  bool mtsActivated = CU::isIntra( *tu.cu ) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter( *tu.cu );
-
-  bool mtsImplicit  = CU::isIntra( *tu.cu ) && tu.cs->sps->getUseImplicitMTS() && compID == COMPONENT_Y;
+  const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter(*tu.cu)) && isLuma(compID);
+  const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0;
+  const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
+  const bool isSBT = CU::isInter(*tu.cu) && tu.cu->sbtInfo && isLuma(compID);
 
   trTypeHor = DCT2;
   trTypeVer = DCT2;
 
-  if (tu.cu->ispMode && isLuma(compID))
+  if (isISP && tu.cu->lfnstIdx)
   {
-    TU::getTransformTypeISP(tu, compID, trTypeHor, trTypeVer);
     return;
-}
-  if( tu.cu->sbtInfo && compID == COMPONENT_Y )
+  }
+
+  if (!tu.cs->sps->getUseMTS())
+    return;
+
+  if (isImplicitMTS || isISP)
+  {
+    int  width = tu.blocks[compID].width;
+    int  height = tu.blocks[compID].height;
+    bool widthDstOk = width >= 4 && width <= 16;
+    bool heightDstOk = height >= 4 && height <= 16;
+
+    if (widthDstOk)
+      trTypeHor = DST7;
+    if (heightDstOk)
+      trTypeVer = DST7;
+    return;
+  }
+
+
+  if (isSBT)
   {
     uint8_t sbtIdx = tu.cu->getSbtIdx();
     uint8_t sbtPos = tu.cu->getSbtPos();
@@ -329,43 +726,27 @@ void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTy
     return;
   }
 
-  if ( mtsActivated )
+  if (isExplicitMTS)
   {
-    if( compID == COMPONENT_Y )
+    if (tu.mtsIdx[compID] > MTS_SKIP)
     {
-      if ( tu.mtsIdx > 1 )
-      {
-        int indHor = ( tu.mtsIdx - 2 ) &  1;
-        int indVer = ( tu.mtsIdx - 2 ) >> 1;
-
-        trTypeHor = indHor ? DCT8 : DST7;
-        trTypeVer = indVer ? DCT8 : DST7;
-      }
+      int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
+      int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1;
+      trTypeHor = indHor ? DCT8 : DST7;
+      trTypeVer = indVer ? DCT8 : DST7;
     }
   }
-  else if ( mtsImplicit )
-  {
-    int  width       = tu.blocks[compID].width;
-    int  height      = tu.blocks[compID].height;
-    bool widthDstOk  = width  >= 4 && width  <= 16;
-    bool heightDstOk = height >= 4 && height <= 16;
-
-    if ( width < height && widthDstOk )
-      trTypeHor = DST7;
-    else if ( height < width && heightDstOk )
-      trTypeVer = DST7;
-    else if ( width == height && widthDstOk )
-      trTypeHor = trTypeVer = DST7;
-  }
 }
 
+
+
 void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, CoeffBuf &dstCoeff, const int width, const int height )
 {
   const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange( toChannelType( compID ) );
   const unsigned bitDepth               = tu.cs->sps->getBitDepth(              toChannelType( compID ) );
   const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
-  const uint32_t transformWidthIndex    = g_aucLog2[width ] - 1;  // nLog2WidthMinus1, since transform start from 2-point
-  const uint32_t transformHeightIndex   = g_aucLog2[height] - 1;  // nLog2HeightMinus1, since transform start from 2-point
+  const uint32_t transformWidthIndex    = floorLog2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
+  const uint32_t transformHeightIndex   = floorLog2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point
 
 
   int trTypeHor = DCT2;
@@ -373,8 +754,21 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel
 
   getTrTypes ( tu, compID, trTypeHor, trTypeVer );
 
-  const int      skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
-  const int      skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
+  int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
+  int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
+  if( tu.cs->sps->getUseLFNST() && tu.cu->lfnstIdx )
+  {
+    if( (width == 4 && height > 4) || (width > 4 && height == 4) )
+    {
+      skipWidth  = width  - 4;
+      skipHeight = height - 4;
+    }
+    else if( (width >= 8 && height >= 8) )
+    {
+      skipWidth  = width  - 8;
+      skipHeight = height - 8;
+    }
+  }
 
 #if RExt__DECODER_DEBUG_TOOL_STATISTICS
   if ( trTypeHor != DCT2 )
@@ -398,8 +792,8 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel
 
   if( width > 1 && height > 1 ) // 2-D transform
   {
-    const int      shift_1st              = ((g_aucLog2[width ]) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
-    const int      shift_2nd              =  (g_aucLog2[height])            + TRANSFORM_MATRIX_SHIFT                          + COM16_C806_TRANS_PREC;
+    const int      shift_1st              = ((floorLog2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
+    const int      shift_2nd              =  (floorLog2(height))            + TRANSFORM_MATRIX_SHIFT                          + COM16_C806_TRANS_PREC;
     CHECK( shift_1st < 0, "Negative shift" );
     CHECK( shift_2nd < 0, "Negative shift" );
   TCoeff *tmp = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) );
@@ -409,14 +803,14 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel
   }
   else if( height == 1 ) //1-D horizontal transform
   {
-    const int      shift              = ((g_aucLog2[width ]) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
+    const int      shift              = ((floorLog2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
     CHECK( shift < 0, "Negative shift" );
     CHECKD( ( transformWidthIndex < 0 ), "There is a problem with the width." );
     fastFwdTrans[trTypeHor][transformWidthIndex]( block, dstCoeff.buf, shift, 1, 0, skipWidth );
   }
   else //if (iWidth == 1) //1-D vertical transform
   {
-    int shift = ( ( g_aucLog2[height] ) + bitDepth + TRANSFORM_MATRIX_SHIFT ) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
+    int shift = ( ( floorLog2(height) ) + bitDepth + TRANSFORM_MATRIX_SHIFT ) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
     CHECK( shift < 0, "Negative shift" );
     CHECKD( ( transformHeightIndex < 0 ), "There is a problem with the height." );
     fastFwdTrans[trTypeVer][transformHeightIndex]( block, dstCoeff.buf, shift, 1, 0, skipHeight );
@@ -432,17 +826,29 @@ void TrQuant::xIT( const TransformUnit &tu, const ComponentID &compID, const CCo
   const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
   const TCoeff   clipMinimum            = -( 1 << maxLog2TrDynamicRange );
   const TCoeff   clipMaximum            =  ( 1 << maxLog2TrDynamicRange ) - 1;
-  const uint32_t transformWidthIndex    = g_aucLog2[width ] - 1;                                // nLog2WidthMinus1, since transform start from 2-point
-  const uint32_t transformHeightIndex   = g_aucLog2[height] - 1;                                // nLog2HeightMinus1, since transform start from 2-point
+  const uint32_t transformWidthIndex    = floorLog2(width ) - 1;                                // nLog2WidthMinus1, since transform start from 2-point
+  const uint32_t transformHeightIndex   = floorLog2(height) - 1;                                // nLog2HeightMinus1, since transform start from 2-point
 
 
   int trTypeHor = DCT2;
   int trTypeVer = DCT2;
 
   getTrTypes ( tu, compID, trTypeHor, trTypeVer );
-
-  const int      skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
-  const int      skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
+  int skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
+  int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
+  if( tu.cs->sps->getUseLFNST() && tu.cu->lfnstIdx )
+  {
+    if( (width == 4 && height > 4) || (width > 4 && height == 4) )
+    {
+      skipWidth  = width  - 4;
+      skipHeight = height - 4;
+    }
+    else if( (width >= 8 && height >= 8) )
+    {
+      skipWidth  = width  - 8;
+      skipHeight = height - 8;
+    }
+  }
 
   TCoeff *block = ( TCoeff * ) alloca( width * height * sizeof( TCoeff ) );
 
@@ -493,42 +899,13 @@ void TrQuant::xITransformSkip(const CCoeffBuf     &pCoeff,
   const CompArea &area      = tu.blocks[compID];
   const int width           = area.width;
   const int height          = area.height;
-  const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID));
-  const int channelBitDepth = tu.cs->sps->getBitDepth(toChannelType(compID));
-
-  int iTransformShift = getTransformShift(channelBitDepth, area.size(), maxLog2TrDynamicRange);
-  if( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() )
-  {
-    iTransformShift = std::max<int>( 0, iTransformShift );
-  }
-
-  int iWHScale = 1;
 
-  const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID );
-
-  if( iTransformShift >= 0 )
+  for (uint32_t y = 0; y < height; y++)
   {
-    const TCoeff offset = iTransformShift == 0 ? 0 : ( 1 << ( iTransformShift - 1 ) );
-
-    for( uint32_t y = 0; y < height; y++ )
-    {
-      for( uint32_t x = 0; x < width; x++ )
-      {
-        pResidual.at( x, y ) = Pel( ( ( rotateResidual ? pCoeff.at( pCoeff.width - x - 1, pCoeff.height - y - 1 ) : pCoeff.at( x, y ) ) * iWHScale + offset ) >> iTransformShift );
-      }
-    }
-  }
-  else //for very high bit depths
-  {
-    iTransformShift = -iTransformShift;
-
-    for( uint32_t y = 0; y < height; y++ )
-    {
-      for( uint32_t x = 0; x < width; x++ )
+      for (uint32_t x = 0; x < width; x++)
       {
-        pResidual.at( x, y ) = Pel( ( rotateResidual ? pCoeff.at( pCoeff.width - x - 1, pCoeff.height - y - 1 ) : pCoeff.at( x, y ) )  * iWHScale << iTransformShift );
+          pResidual.at(x, y) = Pel(pCoeff.at(x, y));
       }
-    }
   }
 }
 
@@ -537,7 +914,7 @@ void TrQuant::xQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffB
   m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
 }
 
-void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, std::vector<TrMode>* trModes, const int maxCand, double* diagRatio, double* horVerRatio )
+void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, std::vector<TrMode>* trModes, const int maxCand )
 {
         CodingStructure &cs = *tu.cs;
   const CompArea &rect      = tu.blocks[compID];
@@ -546,11 +923,7 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
 
   const CPelBuf  resiBuf    = cs.getResiBuf(rect);
 
-#if MAX_TB_SIZE_SIGNALLING
   CHECK( cs.sps->getMaxTbSize() < width, "Unsupported transformation size" );
-#else
-  CHECK( MAX_TB_SIZEY < width, "Unsupported transformation size" );
-#endif
 
   int pos = 0;
   std::vector<TrCost> trCosts;
@@ -558,8 +931,8 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
   const double facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 };
   while( it != trModes->end() )
   {
-    tu.mtsIdx = it->first;
-    CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx], rect );
+    tu.mtsIdx[compID] = it->first;
+    CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx[compID]], rect);
     if( tu.noResidual )
     {
       int sumAbs = 0;
@@ -568,7 +941,7 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
       continue;
     }
 
-    if( isLuma(compID) && tu.mtsIdx == 1 )
+    if ( tu.mtsIdx[compID] == MTS_SKIP )
     {
       xTransformSkip( tu, compID, resiBuf, tempCoeff.buf );
     }
@@ -584,20 +957,23 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
     }
 
     double scaleSAD=1.0;
-    if (isLuma(compID) && tu.mtsIdx==1 && ((g_aucLog2[width] + g_aucLog2[height]) & 1) == 1 )
+    if ( tu.mtsIdx[compID] == MTS_SKIP && ((floorLog2(width) + floorLog2(height)) & 1) == 1)
     {
       scaleSAD=1.0/1.414213562; // compensate for not scaling transform skip coefficients by 1/sqrt(2)
     }
+    if (tu.mtsIdx[compID] == MTS_SKIP)
+    {
+        int trShift = getTransformShift(tu.cu->slice->getSPS()->getBitDepth(toChannelType(compID)), rect.size(), tu.cu->slice->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)));
+        scaleSAD *= pow(2, trShift);
+    }
+
     trCosts.push_back( TrCost( int(sumAbs*scaleSAD), pos++ ) );
     it++;
   }
 
-  // it gets the distribution of the DCT-II coefficients energy, which will be useful to discard ISP tests
-  CoeffBuf coeffsDCT( m_mtsCoeffs[0], rect );
-  xGetCoeffEnergy( tu, compID, coeffsDCT, diagRatio, horVerRatio );
   int numTests = 0;
   std::vector<TrCost>::iterator itC = trCosts.begin();
-  const double fac   = facBB[g_aucLog2[std::max(width, height)]-2];
+  const double fac   = facBB[std::max(0, floorLog2(std::max(width, height)) - 2)];
   const double thr   = fac * trCosts.begin()->first;
   const double thrTS = trCosts.begin()->first;
   while( itC != trCosts.end() )
@@ -609,15 +985,15 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
   }
 }
 
-void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr, double* diagRatio, double* horVerRatio )
+void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr )
 {
         CodingStructure &cs = *tu.cs;
+  const SPS &sps            = *cs.sps;
   const CompArea &rect      = tu.blocks[compID];
   const uint32_t uiWidth        = rect.width;
   const uint32_t uiHeight       = rect.height;
 
   const CPelBuf resiBuf     = cs.getResiBuf(rect);
-        CoeffBuf rpcCoeff   = tu.getCoeffs(compID);
 
   if( tu.noResidual )
   {
@@ -629,50 +1005,26 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
   RDPCMMode rdpcmMode = RDPCM_OFF;
   rdpcmNxN(tu, compID, cQP, uiAbsSum, rdpcmMode);
 
+  if ((tu.cu->bdpcmMode && isLuma(compID)) || (!isLuma(compID) && tu.cu->bdpcmModeChroma))
+  {
+    tu.mtsIdx[compID] = MTS_SKIP;
+  }
+
   if (rdpcmMode == RDPCM_OFF)
   {
     uiAbsSum = 0;
 
     // transform and quantize
-    if (CU::isLosslessCoded(*tu.cu))
-    {
-      const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID );
-
-      for( uint32_t y = 0; y < uiHeight; y++ )
-      {
-        for( uint32_t x = 0; x < uiWidth; x++ )
-        {
-          const Pel currentSample = resiBuf.at( x, y );
-
-          if( rotateResidual )
-          {
-            rpcCoeff.at( uiWidth - x - 1, uiHeight - y - 1 ) = currentSample;
-          }
-          else
-          {
-            rpcCoeff.at( x, y ) = currentSample;
-          }
-
-          uiAbsSum += TCoeff( abs( currentSample ) );
-        }
-      }
-    }
-    else
-    {
-#if MAX_TB_SIZE_SIGNALLING
       CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" );
 
-#else
-      CHECK( MAX_TB_SIZEY < uiWidth, "Unsupported transformation size" );
-#endif
 
-      CoeffBuf tempCoeff( loadTr ? m_mtsCoeffs[tu.mtsIdx] : m_plTempCoeff, rect );
+      CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_tempCoeff, rect);
 
       DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID );
 
       if( !loadTr )
       {
-        if( isLuma(compID) && tu.mtsIdx == 1 )
+        if ( tu.mtsIdx[compID] == MTS_SKIP )
       {
         xTransformSkip( tu, compID, resiBuf, tempCoeff.buf );
       }
@@ -682,64 +1034,26 @@ void TrQuant::transformNxN( TransformUnit &tu, const ComponentID &compID, const
       }
       }
 
-      //we do this only with the DCT-II coefficients
-      if( isLuma(compID) &&
-        !loadTr && tu.mtsIdx == 0
-        )
+
+      if( sps.getUseLFNST() )
       {
-        //it gets the distribution of the coefficients energy, which will be useful to discard ISP tests
-        xGetCoeffEnergy( tu, compID, tempCoeff, diagRatio, horVerRatio );
+        xFwdLfnst( tu, compID, loadTr );
       }
+
       DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
 
       xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx );
 
       DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID );
-    }
   }
 
   // set coded block flag (CBF)
   TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0);
 }
 
-void TrQuant::xGetCoeffEnergy( TransformUnit &tu, const ComponentID &compID, const CoeffBuf& coeffs, double* diagRatio, double* horVerRatio )
-{
-  if( nullptr == diagRatio || nullptr == horVerRatio ) return;
-
-  if( tu.cu->predMode == MODE_INTRA && !tu.cu->ispMode && isLuma( compID ) && CU::canUseISPSplit( *tu.cu, compID ) != NOT_INTRA_SUBPARTITIONS )
-  {
-    const int width   = tu.cu->blocks[compID].width;
-    const int height  = tu.cu->blocks[compID].height;
-    const int log2Sl  = width <= height ? g_aucLog2[height >> g_aucLog2[width]] : g_aucLog2[width >> g_aucLog2[height]];
-    const int diPos1  = width <= height ? width  : height;
-    const int diPos2  = width <= height ? height : width;
-    const int ofsPos1 = width <= height ? 1 : coeffs.stride;
-    const int ofsPos2 = width <= height ? coeffs.stride : 1;
-
-    int wdtE = 0, hgtE = 0, diaE = 0;
-    int* gtE = width <= height ? &wdtE : &hgtE;
-    int* stE = width <= height ? &hgtE : &wdtE;
-
-    for( int pos1 = 0; pos1 < diPos1; pos1++ )
-    {
-      const int posN = pos1 << log2Sl;
-      for( int pos2 = 0; pos2 < diPos2; pos2++ )
-      {
-        const int blkP = pos1 * ofsPos1 + pos2 * ofsPos2;
-        if( posN  > pos2 ) *gtE += abs( coeffs.buf[ blkP ] );
-        if( posN  < pos2 ) *stE += abs( coeffs.buf[ blkP ] );
-        if( posN == pos2 ) diaE += abs( coeffs.buf[ blkP ] );
-      }
-    }
-
-    *horVerRatio = 0 == wdtE && 0 == hgtE ? 1 : double( wdtE ) / double( hgtE );
-    *diagRatio   = 0 == wdtE && 0 == hgtE && 0 == diaE ? 1 : double( diaE ) / double( wdtE + hgtE );
-  }
-}
 
 void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const RDPCMMode &mode)
 {
-  const bool bLossless      = tu.cu->transQuantBypass;
   const uint32_t uiWidth        = tu.blocks[compID].width;
   const uint32_t uiHeight       = tu.blocks[compID].height;
   const bool rotateResidual = TU::isNonTransformedResidualRotated(tu, compID);
@@ -773,16 +1087,8 @@ void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, co
 
       Pel reconstructedDelta;
 
-      if (bLossless)
-      {
-        pcCoeff.buf[coefficientIndex] = encoderSideDelta;
-        reconstructedDelta            = (Pel) encoderSideDelta;
-      }
-      else
-      {
         m_quant->transformSkipQuantOneSample(tu, compID, encoderSideDelta, pcCoeff.buf[coefficientIndex],   coefficientIndex, cQP, bUseHalfRoundingPoint);
         m_quant->invTrSkipDeQuantOneSample  (tu, compID, pcCoeff.buf[coefficientIndex], reconstructedDelta, coefficientIndex, cQP);
-      }
 
       uiAbsSum += abs(pcCoeff.buf[coefficientIndex]);
 
@@ -796,7 +1102,7 @@ void TrQuant::applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, co
 
 void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, RDPCMMode &rdpcmMode)
 {
-  if (!CU::isRDPCMEnabled(*tu.cu) || (tu.mtsIdx!=1 && !tu.cu->transQuantBypass))
+  if (!CU::isRDPCMEnabled(*tu.cu) || (tu.mtsIdx[compID] != MTS_SKIP))
   {
     rdpcmMode = RDPCM_OFF;
   }
@@ -860,47 +1166,16 @@ void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpPar
 
 void TrQuant::xTransformSkip(const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, TCoeff* psCoeff)
 {
-  const SPS &sps            = *tu.cs->sps;
-  const CompArea &rect      = tu.blocks[compID];
-  const uint32_t width          = rect.width;
-  const uint32_t height         = rect.height;
-  const ChannelType chType  = toChannelType(compID);
-  const int channelBitDepth = sps.getBitDepth(chType);
-  const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
-  int iTransformShift       = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
+  const CompArea &rect = tu.blocks[compID];
+  const uint32_t width = rect.width;
+  const uint32_t height = rect.height;
 
-  if( sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag() )
+  for (uint32_t y = 0, coefficientIndex = 0; y < height; y++)
   {
-    iTransformShift = std::max<int>( 0, iTransformShift );
-  }
-
-  int iWHScale = 1;
-
-  const bool rotateResidual = TU::isNonTransformedResidualRotated( tu, compID );
-  const uint32_t uiSizeMinus1 = ( width * height ) - 1;
-
-  if( iTransformShift >= 0 )
-  {
-    for( uint32_t y = 0, coefficientIndex = 0; y < height; y++ )
-    {
-      for( uint32_t x = 0; x < width; x++, coefficientIndex++ )
+      for (uint32_t x = 0; x < width; x++, coefficientIndex++)
       {
-        psCoeff[rotateResidual ? uiSizeMinus1 - coefficientIndex : coefficientIndex] = ( TCoeff( resi.at( x, y ) ) * iWHScale ) << iTransformShift;
+          psCoeff[ coefficientIndex ] = TCoeff(resi.at(x, y));
       }
-    }
-  }
-  else //for very high bit depths
-  {
-    iTransformShift = -iTransformShift;
-    const TCoeff offset = 1 << ( iTransformShift - 1 );
-
-    for( uint32_t y = 0, coefficientIndex = 0; y < height; y++ )
-    {
-      for( uint32_t x = 0; x < width; x++, coefficientIndex++ )
-      {
-        psCoeff[rotateResidual ? uiSizeMinus1 - coefficientIndex : coefficientIndex] = ( TCoeff( resi.at( x, y ) ) * iWHScale + offset ) >> iTransformShift;
-      }
-    }
   }
 }
 
diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h
index 85964c1c8efa973db6ab9f8a842ba46918dca4b9..50f893da847821036c47ab93c832c6707a2e7ad5 100644
--- a/source/Lib/CommonLib/TrQuant.h
+++ b/source/Lib/CommonLib/TrQuant.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -47,6 +47,7 @@
 #include "UnitPartitioner.h"
 #include "Quant.h"
 
+#include "DepQuant.h"
 //! \ingroup CommonLib
 //! \{
 
@@ -69,32 +70,41 @@ public:
   void init      (
                     const Quant* otherQuant,
                     const uint32_t uiMaxTrSize,
-                    const bool bUseRDOQ             = false,
-                    const bool bUseRDOQTS           = false,
+                    const bool bUseRDOQ,
+                    const bool bUseRDOQTS,
 #if T0196_SELECTIVE_RDOQ
-                    const bool useSelectiveRDOQ     = false,
+                    const bool useSelectiveRDOQ,
 #endif
-                    const bool bEnc                 = false,
-                    const bool useTransformSkipFast = false
+                    const bool bEnc
   );
+  void getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer);
 
-  void getTrTypes( TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer );
+  void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
+  void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
 
+  uint32_t getLFNSTIntraMode( int wideAngPredMode );
+  bool     getTransposeFlag ( uint32_t intraMode  );
 
 protected:
 
+  void xFwdLfnst( const TransformUnit &tu, const ComponentID compID, const bool loadTr = false );
+  void xInvLfnst( const TransformUnit &tu, const ComponentID compID );
+
 public:
 
   void invTransformNxN  (TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQPs);
-
-  void transformNxN     ( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, std::vector<TrMode>* trModes, const int maxCand, double* diagRatio = nullptr, double* horVerRatio = nullptr );
-  void transformNxN     ( TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr = false, double* diagRatio = nullptr, double* horVerRatio = nullptr );
+  void transformNxN     ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, std::vector<TrMode>* trModes, const int maxCand );
+  void transformNxN     ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr = false );
   void rdpcmNxN         (TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum,       RDPCMMode &rdpcmMode);
   void applyForwardRDPCM(TransformUnit &tu, const ComponentID &compID, const QpParam &cQP, TCoeff &uiAbsSum, const RDPCMMode &rdpcmMode);
 
   void transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &compID, const TCoeff &resiDiff, TCoeff &coeff,    const uint32_t &uiPos, const QpParam &cQP, const bool bUseHalfRoundingPoint);
   void invTrSkipDeQuantOneSample  (TransformUnit &tu, const ComponentID &compID, const TCoeff &pcCoeff,  Pel &reconSample, const uint32_t &uiPos, const QpParam &cQP);
 
+  void                        invTransformICT     ( const TransformUnit &tu, PelBuf &resCb, PelBuf &resCr );
+  std::pair<int64_t,int64_t>  fwdTransformICT     ( const TransformUnit &tu, const PelBuf &resCb, const PelBuf &resCr, PelBuf& resC1, PelBuf& resC2, int jointCbCr = -1 );
+  std::vector<int>            selectICTCandidates ( const TransformUnit &tu, CompStorage* resCb, CompStorage* resCr );
+
   void invRdpcmNxN(TransformUnit& tu, const ComponentID &compID, PelBuf &pcResidual);
 #if RDOQ_CHROMA_LAMBDA
   void   setLambdas  ( const double lambdas[MAX_NUM_COMPONENT] )   { m_quant->setLambdas( lambdas ); }
@@ -104,24 +114,27 @@ public:
   void   setLambda   ( const double dLambda )                      { m_quant->setLambda( dLambda ); }
   double getLambda   () const                                      { return m_quant->getLambda(); }
 
-  Quant* getQuant() { return m_quant;  }
-
+  DepQuant* getQuant() { return m_quant; }
+  void   lambdaAdjustColorTrans(bool forward) { m_quant->lambdaAdjustColorTrans(forward); }
+  void   resetStore() { m_quant->resetStore(); }
 
 #if ENABLE_SPLIT_PARALLELISM
   void    copyState( const TrQuant& other );
 #endif
 
 protected:
-  TCoeff*  m_plTempCoeff;
-  uint32_t     m_uiMaxTrSize;
-  bool     m_bEnc;
-  bool     m_useTransformSkipFast;
-
-  bool     m_scalingListEnabledFlag;
+  TCoeff   m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY];
 
 private:
-  Quant    *m_quant;          //!< Quantizer
-  TCoeff** m_mtsCoeffs;
+  DepQuant *m_quant;          //!< Quantizer
+  TCoeff    m_mtsCoeffs[NUM_TRAFO_MODES_MTS][MAX_TB_SIZEY * MAX_TB_SIZEY];
+  TCoeff   m_tempInMatrix [ 48 ];
+  TCoeff   m_tempOutMatrix[ 48 ];
+  static const int maxAbsIctMode = 3;
+  void                      (*m_invICTMem[1+2*maxAbsIctMode])(PelBuf&,PelBuf&);
+  std::pair<int64_t,int64_t>(*m_fwdICTMem[1+2*maxAbsIctMode])(const PelBuf&,const PelBuf&,PelBuf&,PelBuf&);
+  void                      (**m_invICT)(PelBuf&,PelBuf&);
+  std::pair<int64_t,int64_t>(**m_fwdICT)(const PelBuf&,const PelBuf&,PelBuf&,PelBuf&);
 
 
   // forward Transform
diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp
index 78412a87a27187790b7fdf9362da51d229d4ba10..b21ede257072798dca77ec1513bc525815ff7bb8 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.cpp
+++ b/source/Lib/CommonLib/TrQuant_EMT.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/TrQuant_EMT.h b/source/Lib/CommonLib/TrQuant_EMT.h
index f636da5c60eec2ac8a94414e25d1d2bd7a919577..d6e6a2a2d207bfc3efebb9fed1c1214239ff13fb 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.h
+++ b/source/Lib/CommonLib/TrQuant_EMT.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index ced9c082fbb010645b11783caf2bfd0a6eb17157..2a5138da3c867ead0b38a4e52fa462b27471d583 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,46 +50,32 @@
 #include <assert.h>
 #include <cassert>
 
-#define MMVD_LTRP                                         1 // MVD scaling for MMVD considering LTRP from JVET-N0332
 
-#define JCTVC_Y0038_PARAMS                                1
+#define JVET_Q0055_MTS_SIGNALLING                         1 // JVET-Q0055: Check for transform coefficients outside the 16x16 area
+#define JVET_Q0480_RASTER_RECT_SLICES                     1 // JVET-Q0480: Eliminate redundant slice height syntax when in raster rectangular slice mode (tile_idx_delta_present_flag == 0)
 
-#define JVET_MMVD_OFF_MACRO                               0
+#define JVET_Q0433_MODIFIED_CHROMA_DIST_WEIGHT            1 // modification of chroma distortion weight (as agreed during presentation of JVET-Q0433)
 
-#define FIX_DB_MAX_TRANSFORM_SIZE                         1
+#define JVET_Q0487_SCALING_WINDOW_ISSUES                  1 // JVET-Q0487: Fix scaling window issues when scaling ratio is 1:1
 
-#define MRG_SHARELIST_SHARSIZE                            32
+#define JVET_AHG14_LOSSLESS                               1
+#define JVET_AHG14_LOSSLESS_ENC_QP_FIX                    1 && JVET_AHG14_LOSSLESS
 
 #define JVET_M0497_MATRIX_MULT                            0 // 0: Fast method; 1: Matrix multiplication
 
 #define APPLY_SBT_SL_ON_MTS                               1 // apply save & load fast algorithm on inter MTS when SBT is on
-#define FIX_PCM                                           1 // Fix PCM bugs in VTM3
-
 #define MAX_TB_SIZE_SIGNALLING                            0
+#define HEVC_SEI                                          0 // SEI messages that are defined in HEVC, but not in VVC
 
 typedef std::pair<int, bool> TrMode;
 typedef std::pair<int, int>  TrCost;
 
 // clang-format off
-#define ENABLE_JVET_L0283_MRL                             1 // 1: Enable MRL, 0: Disable MRL
-#define JVET_L0090_PAIR_AVG                               1 // Add pairwise average candidates, replace HEVC combined candidates
-#define REUSE_CU_RESULTS                                  1
+#define REUSE_CU_RESULTS                                  1 
 #if REUSE_CU_RESULTS
 #define REUSE_CU_RESULTS_WITH_MULTIPLE_TUS                1
-#define MAX_NUM_TUS                                       4
 #endif
 // clang-format on
-
-#ifndef JVET_B0051_NON_MPM_MODE
-#define JVET_B0051_NON_MPM_MODE                         ( 1 && JEM_TOOLS )
-#endif
-#ifndef QTBT_AS_IN_JEM
-#define QTBT_AS_IN_JEM                                    1
-#endif
-#ifndef HEVC_TOOLS
-#define HEVC_TOOLS                                        0
-#endif
-
 #ifndef JVET_J0090_MEMORY_BANDWITH_MEASURE
 #define JVET_J0090_MEMORY_BANDWITH_MEASURE                0
 #endif
@@ -98,15 +84,13 @@ typedef std::pair<int, int>  TrCost;
 #define EXTENSION_360_VIDEO                               0   ///< extension for 360/spherical video coding support; this macro should be controlled by makefile, as it would be used to control whether the library is built and linked
 #endif
 
-#ifndef ENABLE_WPP_PARALLELISM
-#define ENABLE_WPP_PARALLELISM                            0
-#endif
-#if ENABLE_WPP_PARALLELISM
-#ifndef ENABLE_WPP_STATIC_LINK
-#define ENABLE_WPP_STATIC_LINK                            0 // bug fix static link
+#ifndef EXTENSION_HDRTOOLS
+#define EXTENSION_HDRTOOLS                                0 //< extension for HDRTools/Metrics support; this macro should be controlled by makefile, as it would be used to control whether the library is built and linked
 #endif
-#define PARL_WPP_MAX_NUM_THREADS                         16
 
+#define JVET_O0756_CONFIG_HDRMETRICS                      1
+#if EXTENSION_HDRTOOLS
+#define JVET_O0756_CALCULATE_HDRMETRICS                   1
 #endif
 #ifndef ENABLE_SPLIT_PARALLELISM
 #define ENABLE_SPLIT_PARALLELISM                          0
@@ -121,9 +105,8 @@ typedef std::pair<int, int>  TrCost;
 
 
 // ====================================================================================================================
-// NEXT software switches
+// General settings
 // ====================================================================================================================
-#define K0238_SAO_GREEDY_MERGE_ENCODING                   1
 
 #ifndef ENABLE_TRACING
 #define ENABLE_TRACING                                    0 // DISABLE by default (enable only when debugging, requires 15% run-time in decoding) -- see documentation in 'doc/DTrace for NextSoftware.pdf'
@@ -139,49 +122,7 @@ typedef std::pair<int, int>  TrCost;
 #define WCG_EXT                                           1
 #define WCG_WPSNR                                         WCG_EXT
 
-#if HEVC_TOOLS
-#define HEVC_USE_INTRA_SMOOTHING_T32                      1
-#define HEVC_USE_INTRA_SMOOTHING_T64                      1
-#define HEVC_USE_DC_PREDFILTERING                         1
-#define HEVC_USE_HOR_VER_PREDFILTERING                    1
-#define HEVC_USE_MDCS                                     1
-#define HEVC_USE_SIGN_HIDING                              1
-#define HEVC_USE_SCALING_LISTS                            1
-#define HEVC_VPS                                          1
-#define HEVC_DEPENDENT_SLICES                             1
-#define HEVC_TILES_WPP                                    1
-#else
-#define HEVC_USE_SIGN_HIDING                              1
-#define HEVC_TILES_WPP                                    1
-#endif
-
-#ifndef HEVC_TILES_WPP
-#define HEVC_TILES_WPP                                    1
-#endif
-#if !HEVC_TILES_WPP
-#error JVET_M0445_MCTS_NEEDS_TILES_ENABLED
-#endif
-
-#define JVET_M0101_HLS                                    1  // joint HLS syntax
-
 #define KEEP_PRED_AND_RESI_SIGNALS                        0
-
-
-#if QTBT_AS_IN_JEM // macros which will cause changes in the decoder behavior ara marked with *** - keep them on to retain compatibility with JEM-toolcheck
-#define HM_QTBT_AS_IN_JEM                                 1   // ***
-#if     HM_QTBT_AS_IN_JEM
-#define HM_QTBT_AS_IN_JEM_QUANT                           1   // ***
-#define HM_QTBT_REPRODUCE_FAST_LCTU_BUG                   1
-#endif
-#define HM_CODED_CU_INFO                                  1   // like in JEM, when related CU is skipped, it stays like this even if a non skip mode wins...
-#define HM_4TAPIF_AS_IN_JEM                               1   // *** - PM: condition not well suited for 4-tap interpolation filters
-#define HM_MDIS_AS_IN_JEM                                 1   // *** - PM: not filtering ref. samples for 64xn case and using Planar MDIS condition at encoder
-#define HM_JEM_CLIP_PEL                                   1   // ***
-#define HM_JEM_MERGE_CANDS                                0   // ***
-
-
-#endif//JEM_COMP
-
 // ====================================================================================================================
 // Debugging
 // ====================================================================================================================
@@ -198,6 +139,13 @@ typedef std::pair<int, int>  TrCost;
 #define RExt__DECODER_DEBUG_BIT_STATISTICS                0 ///< 0 (default) = decoder reports as normal, 1 = decoder produces bit usage statistics (will impact decoder run time by up to ~10%)
 #endif
 
+#ifndef RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS
+#define RExt__DECODER_DEBUG_TOOL_MAX_FRAME_STATS         (1 && RExt__DECODER_DEBUG_BIT_STATISTICS )   ///< 0 (default) = decoder reports as normal, 1 = decoder produces max frame bit usage statistics
+#endif
+
+#define TR_ONLY_COEFF_STATS                              (1 && RExt__DECODER_DEBUG_BIT_STATISTICS )   ///< 0 combine TS and non-TS decoder debug statistics. 1 = separate TS and non-TS decoder debug statistics.
+#define EPBINCOUNT_FIX                                   (1 && RExt__DECODER_DEBUG_BIT_STATISTICS )   ///< 0 use count to represent number of calls to decodeBins. 1 = count and bins for EP bins are the same.
+
 #ifndef RExt__DECODER_DEBUG_TOOL_STATISTICS
 #define RExt__DECODER_DEBUG_TOOL_STATISTICS               0 ///< 0 (default) = decoder reports as normal, 1 = decoder produces tool usage statistics
 #endif
@@ -236,7 +184,7 @@ typedef std::pair<int, int>  TrCost;
 #define ENABLE_SIMD_OPT_AFFINE_ME                       ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for affine ME, no impact on RD performance
 #define ENABLE_SIMD_OPT_ALF                             ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for ALF
 #if ENABLE_SIMD_OPT_BUFFER
-#define ENABLE_SIMD_OPT_GBI                               1                                                 ///< SIMD optimization for GBi
+#define ENABLE_SIMD_OPT_BCW                               1                                                 ///< SIMD optimization for Bcw
 #endif
 
 // End of SIMD optimizations
@@ -312,12 +260,28 @@ typedef       uint32_t            Intermediate_UInt; ///< used as intermediate v
 #endif
 
 typedef       uint64_t          SplitSeries;       ///< used to encoded the splits that caused a particular CU size
+typedef       uint64_t          ModeTypeSeries;    ///< used to encoded the ModeType at different split depth
 
 typedef       uint64_t        Distortion;        ///< distortion measurement
 
 // ====================================================================================================================
 // Enumeration
 // ====================================================================================================================
+
+enum BDPCMControl
+{
+  BDPCM_INACTIVE = 0,
+  BDPCM_LUMAONLY = 1,
+  BDPCM_LUMACHROMA = 2,
+};
+
+enum ApsType
+{
+  ALF_APS = 0,
+  LMCS_APS = 1,
+  SCALING_LIST_APS = 2,
+};
+
 enum QuantFlags
 {
   Q_INIT           = 0x0,
@@ -336,13 +300,23 @@ enum TransType
   DCT2_EMT = 4
 };
 
+enum MTSIdx
+{
+  MTS_DCT2_DCT2 = 0,
+  MTS_SKIP = 1,
+  MTS_DST7_DST7 = 2,
+  MTS_DCT8_DST7 = 3,
+  MTS_DST7_DCT8 = 4,
+  MTS_DCT8_DCT8 = 5
+};
+
 enum ISPType
 {
   NOT_INTRA_SUBPARTITIONS       = 0,
   HOR_INTRA_SUBPARTITIONS       = 1,
   VER_INTRA_SUBPARTITIONS       = 2,
-  NUM_INTRA_SUBPARTITIONS_MODES = 3,
-  CAN_USE_VER_AND_HORL_SPLITS   = 4
+  NUM_INTRA_SUBPARTITIONS_MODES = 3,  
+  INTRA_SUBPARTITIONS_RESERVED  = 4
 };
 
 enum SbtIdx
@@ -417,6 +391,20 @@ enum ChannelType
   MAX_NUM_CHANNEL_TYPE = 2
 };
 
+enum TreeType
+{
+  TREE_D = 0, //default tree status (for single-tree slice, TREE_D means joint tree; for dual-tree I slice, TREE_D means TREE_L for luma and TREE_C for chroma)
+  TREE_L = 1, //separate tree only contains luma (may split)
+  TREE_C = 2, //separate tree only contains chroma (not split), to avoid small chroma block
+};
+
+enum ModeType
+{
+  MODE_TYPE_ALL = 0, //all modes can try
+  MODE_TYPE_INTER = 1, //can try inter
+  MODE_TYPE_INTRA = 2, //can try intra, ibc, palette
+};
+
 #define CH_L CHANNEL_TYPE_LUMA
 #define CH_C CHANNEL_TYPE_CHROMA
 
@@ -426,6 +414,7 @@ enum ComponentID
   COMPONENT_Cb        = 1,
   COMPONENT_Cr        = 2,
   MAX_NUM_COMPONENT   = 3,
+  JOINT_CbCr          = MAX_NUM_COMPONENT,
   MAX_NUM_TBLOCKS     = MAX_NUM_COMPONENT
 };
 
@@ -468,7 +457,8 @@ enum PredMode
   MODE_INTER                 = 0,     ///< inter-prediction mode
   MODE_INTRA                 = 1,     ///< intra-prediction mode
   MODE_IBC                   = 2,     ///< ibc-prediction mode
-  NUMBER_OF_PREDICTION_MODES = 3,
+  MODE_PLT                   = 3,     ///< plt-prediction mode
+  NUMBER_OF_PREDICTION_MODES = 4,
 };
 
 /// reference list index
@@ -573,13 +563,6 @@ enum MvpDir
   MD_ABOVE_LEFT         ///< MVP of above left block
 };
 
-enum StoredResidualType
-{
-  RESIDUAL_RECONSTRUCTED          = 0,
-  RESIDUAL_ENCODER_SIDE           = 1,
-  NUMBER_OF_STORED_RESIDUAL_TYPES = 2
-};
-
 enum TransformDirection
 {
   TRANSFORM_FORWARD              = 0,
@@ -601,10 +584,8 @@ enum MESearchMethod
 enum CoeffScanType
 {
   SCAN_DIAG = 0,        ///< up-right diagonal scan
-#if HEVC_USE_MDCS
-  SCAN_HOR  = 1,        ///< horizontal first scan
-  SCAN_VER  = 2,        ///< vertical first scan
-#endif
+  SCAN_TRAV_HOR = 1,
+  SCAN_TRAV_VER = 2,
   SCAN_NUMBER_OF_TYPES
 };
 
@@ -615,16 +596,6 @@ enum CoeffScanGroupType
   SCAN_NUMBER_OF_GROUP_TYPES = 2
 };
 
-enum SignificanceMapContextType
-{
-  CONTEXT_TYPE_4x4    = 0,
-  CONTEXT_TYPE_8x8    = 1,
-  CONTEXT_TYPE_NxN    = 2,
-  CONTEXT_TYPE_SINGLE = 3,
-  CONTEXT_NUMBER_OF_TYPES = 4
-};
-
-#if HEVC_USE_SCALING_LISTS
 enum ScalingListMode
 {
   SCALING_LIST_OFF,
@@ -634,7 +605,8 @@ enum ScalingListMode
 
 enum ScalingListSize
 {
-  SCALING_LIST_2x2 = 0,
+  SCALING_LIST_1x1 = 0,
+  SCALING_LIST_2x2,
   SCALING_LIST_4x4,
   SCALING_LIST_8x8,
   SCALING_LIST_16x16,
@@ -642,23 +614,18 @@ enum ScalingListSize
   SCALING_LIST_64x64,
   SCALING_LIST_128x128,
   SCALING_LIST_SIZE_NUM,
-  SCALING_LIST_FIRST_CODED = SCALING_LIST_4x4, // smallest scaling coded as High Level Parameter
-  SCALING_LIST_LAST_CODED  = SCALING_LIST_32x32
+  //for user define matrix
+  SCALING_LIST_FIRST_CODED = SCALING_LIST_2x2,
+  SCALING_LIST_LAST_CODED = SCALING_LIST_64x64
 };
-#endif
-
-// Slice / Slice segment encoding modes
-enum SliceConstraint
+enum ScalingList1dStartIdx
 {
-  NO_SLICES              = 0,          ///< don't use slices / slice segments
-  FIXED_NUMBER_OF_CTU    = 1,          ///< Limit maximum number of largest coding tree units in a slice / slice segments
-  FIXED_NUMBER_OF_BYTES  = 2,          ///< Limit maximum number of bytes in a slice / slice segment
-#if HEVC_TILES_WPP
-  FIXED_NUMBER_OF_TILES  = 3,          ///< slices / slice segments span an integer number of tiles
-  NUMBER_OF_SLICE_CONSTRAINT_MODES = 4
-#else
-  NUMBER_OF_SLICE_CONSTRAINT_MODES = 3
-#endif
+  SCALING_LIST_1D_START_2x2    = 0,
+  SCALING_LIST_1D_START_4x4    = 2,
+  SCALING_LIST_1D_START_8x8    = 8,
+  SCALING_LIST_1D_START_16x16  = 14,
+  SCALING_LIST_1D_START_32x32  = 20,
+  SCALING_LIST_1D_START_64x64  = 26,
 };
 
 // For use with decoded picture hash SEI messages, generated by encoder.
@@ -719,13 +686,9 @@ namespace Profile
 {
   enum Name
   {
-    NONE = 0,
-    MAIN = 1,
-    MAIN10 = 2,
-    MAINSTILLPICTURE = 3,
-    MAINREXT = 4,
-    HIGHTHROUGHPUTREXT = 5,
-    NEXT = 6
+    NONE        = 0,
+    MAIN_10     = 1,
+    MAIN_444_10 = 2
   };
 }
 
@@ -735,6 +698,7 @@ namespace Level
   {
     MAIN = 0,
     HIGH = 1,
+    NUMBER_OF_TIERS=2
   };
 
   enum Name
@@ -805,135 +769,45 @@ enum PPSExtensionFlagIndex
 //       effort can be done without use of macros to alter the names used to indicate the different NAL unit types.
 enum NalUnitType
 {
-#if JVET_M0101_HLS
-  NAL_UNIT_CODED_SLICE_TRAIL = 0, // 0
-  NAL_UNIT_CODED_SLICE_STSA,      // 1
+  NAL_UNIT_CODED_SLICE_TRAIL = 0,   // 0
+  NAL_UNIT_CODED_SLICE_STSA,        // 1
+  NAL_UNIT_CODED_SLICE_RADL,        // 2
+  NAL_UNIT_CODED_SLICE_RASL,        // 3
 
-  //KJS: keep RADL/RASL since there is no real decision on these types yet
-  NAL_UNIT_CODED_SLICE_RADL,      // 2   should be NAL_UNIT_RESERVED_VCL_2,
-  NAL_UNIT_CODED_SLICE_RASL,      // 3   should be NAL_UNIT_RESERVED_VCL_3,
-  
   NAL_UNIT_RESERVED_VCL_4,
   NAL_UNIT_RESERVED_VCL_5,
   NAL_UNIT_RESERVED_VCL_6,
-  NAL_UNIT_RESERVED_VCL_7,
-  
-  NAL_UNIT_CODED_SLICE_IDR_W_RADL,  // 8
-  NAL_UNIT_CODED_SLICE_IDR_N_LP,    // 9
-  NAL_UNIT_CODED_SLICE_CRA,         // 10
-  
-  NAL_UNIT_RESERVED_IRAP_VCL11,
-  NAL_UNIT_RESERVED_IRAP_VCL12,
-  NAL_UNIT_RESERVED_IRAP_VCL13,
-
-  NAL_UNIT_RESERVED_VCL14,
-
-#if HEVC_VPS
-  NAL_UNIT_VPS,                     // probably not coming back
-#else
-  NAL_UNIT_RESERVED_VCL15,
-#endif
-
-  NAL_UNIT_RESERVED_NVCL16,         // probably DPS
-
-  NAL_UNIT_SPS,                     // 17
-  NAL_UNIT_PPS,                     // 18
-  NAL_UNIT_APS,                     // 19 NAL unit type number needs to be reaaranged.
+  NAL_UNIT_CODED_SLICE_IDR_W_RADL,  // 7
+  NAL_UNIT_CODED_SLICE_IDR_N_LP,    // 8
+  NAL_UNIT_CODED_SLICE_CRA,         // 9
+  NAL_UNIT_CODED_SLICE_GDR,         // 10
+
+  NAL_UNIT_RESERVED_IRAP_VCL_11,
+  NAL_UNIT_RESERVED_IRAP_VCL_12,
+
+  NAL_UNIT_DPS,                     // 13
+  NAL_UNIT_VPS,                     // 14
+  NAL_UNIT_SPS,                     // 15
+  NAL_UNIT_PPS,                     // 16
+  NAL_UNIT_PREFIX_APS,              // 17
+  NAL_UNIT_SUFFIX_APS,              // 18
+  NAL_UNIT_PH,                      // 19
   NAL_UNIT_ACCESS_UNIT_DELIMITER,   // 20
   NAL_UNIT_EOS,                     // 21
   NAL_UNIT_EOB,                     // 22
   NAL_UNIT_PREFIX_SEI,              // 23
   NAL_UNIT_SUFFIX_SEI,              // 24
-  NAL_UNIT_FILLER_DATA,             // 25  keep: may be added with HRD 
 
-  NAL_UNIT_RESERVED_NVCL26,
-  NAL_UNIT_RESERVED_NVCL27,
+  NAL_UNIT_FD,                      // 25
+
+  NAL_UNIT_RESERVED_NVCL_26,
+  NAL_UNIT_RESERVED_NVCL_27,
+
   NAL_UNIT_UNSPECIFIED_28,
   NAL_UNIT_UNSPECIFIED_29,
   NAL_UNIT_UNSPECIFIED_30,
   NAL_UNIT_UNSPECIFIED_31,
-  NAL_UNIT_INVALID,
-#else
-  NAL_UNIT_CODED_SLICE_TRAIL_N = 0, // 0
-  NAL_UNIT_CODED_SLICE_TRAIL_R,     // 1
-
-  NAL_UNIT_CODED_SLICE_TSA_N,       // 2
-  NAL_UNIT_CODED_SLICE_TSA_R,       // 3
-
-  NAL_UNIT_CODED_SLICE_STSA_N,      // 4
-  NAL_UNIT_CODED_SLICE_STSA_R,      // 5
-
-  NAL_UNIT_CODED_SLICE_RADL_N,      // 6
-  NAL_UNIT_CODED_SLICE_RADL_R,      // 7
-
-  NAL_UNIT_CODED_SLICE_RASL_N,      // 8
-  NAL_UNIT_CODED_SLICE_RASL_R,      // 9
-
-  NAL_UNIT_RESERVED_VCL_N10,
-  NAL_UNIT_RESERVED_VCL_R11,
-  NAL_UNIT_RESERVED_VCL_N12,
-  NAL_UNIT_RESERVED_VCL_R13,
-  NAL_UNIT_RESERVED_VCL_N14,
-  NAL_UNIT_RESERVED_VCL_R15,
-
-  NAL_UNIT_CODED_SLICE_BLA_W_LP,    // 16
-  NAL_UNIT_CODED_SLICE_BLA_W_RADL,  // 17
-  NAL_UNIT_CODED_SLICE_BLA_N_LP,    // 18
-  NAL_UNIT_CODED_SLICE_IDR_W_RADL,  // 19
-  NAL_UNIT_CODED_SLICE_IDR_N_LP,    // 20
-  NAL_UNIT_CODED_SLICE_CRA,         // 21
-  NAL_UNIT_RESERVED_IRAP_VCL22,
-  NAL_UNIT_RESERVED_IRAP_VCL23,
-
-  NAL_UNIT_RESERVED_VCL24,
-  NAL_UNIT_RESERVED_VCL25,
-  NAL_UNIT_RESERVED_VCL26,
-  NAL_UNIT_RESERVED_VCL27,
-  NAL_UNIT_RESERVED_VCL28,
-  NAL_UNIT_RESERVED_VCL29,
-  NAL_UNIT_RESERVED_VCL30,
-  NAL_UNIT_RESERVED_VCL31,
-
-#if HEVC_VPS
-  NAL_UNIT_VPS,                     // 32
-#else
-  NAL_UNIT_RESERVED_32,
-#endif
-  NAL_UNIT_SPS,                     // 33
-  NAL_UNIT_PPS,                     // 34
-  NAL_UNIT_APS,                     //NAL unit type number needs to be reaaranged.
-  NAL_UNIT_ACCESS_UNIT_DELIMITER,   // 35
-  NAL_UNIT_EOS,                     // 36
-  NAL_UNIT_EOB,                     // 37
-  NAL_UNIT_FILLER_DATA,             // 38
-  NAL_UNIT_PREFIX_SEI,              // 39
-  NAL_UNIT_SUFFIX_SEI,              // 40
-
-  NAL_UNIT_RESERVED_NVCL41,
-  NAL_UNIT_RESERVED_NVCL42,
-  NAL_UNIT_RESERVED_NVCL43,
-  NAL_UNIT_RESERVED_NVCL44,
-  NAL_UNIT_RESERVED_NVCL45,
-  NAL_UNIT_RESERVED_NVCL46,
-  NAL_UNIT_RESERVED_NVCL47,
-  NAL_UNIT_UNSPECIFIED_48,
-  NAL_UNIT_UNSPECIFIED_49,
-  NAL_UNIT_UNSPECIFIED_50,
-  NAL_UNIT_UNSPECIFIED_51,
-  NAL_UNIT_UNSPECIFIED_52,
-  NAL_UNIT_UNSPECIFIED_53,
-  NAL_UNIT_UNSPECIFIED_54,
-  NAL_UNIT_UNSPECIFIED_55,
-  NAL_UNIT_UNSPECIFIED_56,
-  NAL_UNIT_UNSPECIFIED_57,
-  NAL_UNIT_UNSPECIFIED_58,
-  NAL_UNIT_UNSPECIFIED_59,
-  NAL_UNIT_UNSPECIFIED_60,
-  NAL_UNIT_UNSPECIFIED_61,
-  NAL_UNIT_UNSPECIFIED_62,
-  NAL_UNIT_UNSPECIFIED_63,
-  NAL_UNIT_INVALID,
-#endif
+  NAL_UNIT_INVALID
 };
 
 #if SHARP_LUMA_DELTA_QP
@@ -945,13 +819,6 @@ enum LumaLevelToDQPMode
 };
 #endif
 
-enum SaveLoadTag
-{
-  SAVE_LOAD_INIT = 0,
-  SAVE_ENC_INFO  = 1,
-  LOAD_ENC_INFO  = 2
-};
-
 enum MergeType
 {
   MRG_TYPE_DEFAULT_N        = 0, // 0
@@ -967,12 +834,6 @@ enum TriangleSplit
   TRIANGLE_DIR_NUM
 };
 
-enum SharedMrgState
-{
-  NO_SHARE            = 0,
-  GEN_ON_SHARED_BOUND = 1,
-  SHARING             = 2
-};
 //////////////////////////////////////////////////////////////////////////
 // Encoder modes to try out
 //////////////////////////////////////////////////////////////////////////
@@ -991,8 +852,9 @@ enum EncModeFeature
 enum ImvMode
 {
   IMV_OFF = 0,
-  IMV_DEFAULT,
+  IMV_FPEL,
   IMV_4PEL,
+  IMV_HPEL,
   NUM_IMV_MODES
 };
 
@@ -1041,6 +903,12 @@ struct BitDepths
   int recon[MAX_NUM_CHANNEL_TYPE]; ///< the bit depth as indicated in the SPS
 };
 
+enum PLTRunMode
+{
+  PLT_RUN_INDEX = 0,
+  PLT_RUN_COPY  = 1,
+  NUM_PLT_RUN   = 2
+};
 /// parameters for deblocking filter
 struct LFCUParam
 {
@@ -1357,13 +1225,13 @@ template<typename T>
 class dynamic_cache
 {
   std::vector<T*> m_cache;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   int64_t         m_cacheId;
 #endif
 
 public:
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   dynamic_cache()
   {
     static int cacheId = 0;
@@ -1395,7 +1263,7 @@ public:
     {
       ret = m_cache.back();
       m_cache.pop_back();
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
       CHECK( ret->cacheId != m_cacheId, "Putting item into wrong cache!" );
       CHECK( !ret->cacheUsed,           "Fetched an element that should've been in cache!!" );
 #endif
@@ -1405,7 +1273,7 @@ public:
       ret = new T;
     }
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     ret->cacheId   = m_cacheId;
     ret->cacheUsed = false;
 
@@ -1415,7 +1283,7 @@ public:
 
   void cache( T* el )
   {
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" );
     CHECK( el->cacheUsed,            "Putting cached item back into cache!" );
 
@@ -1427,7 +1295,7 @@ public:
 
   void cache( std::vector<T*>& vel )
   {
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     for( auto el : vel )
     {
       CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" );
@@ -1455,137 +1323,6 @@ struct XUCache
 
 #define SIGN(x) ( (x) >= 0 ? 1 : -1 )
 
-#define MAX_NUM_ALF_CLASSES             25
-#define MAX_NUM_ALF_LUMA_COEFF          13
-#define MAX_NUM_ALF_CHROMA_COEFF        7
-#define MAX_ALF_FILTER_LENGTH           7
-#define MAX_NUM_ALF_COEFF               (MAX_ALF_FILTER_LENGTH * MAX_ALF_FILTER_LENGTH / 2 + 1)
-
-enum AlfFilterType
-{
-  ALF_FILTER_5,
-  ALF_FILTER_7,
-  ALF_NUM_OF_FILTER_TYPES
-};
-
-struct AlfFilterShape
-{
-  AlfFilterShape( int size )
-    : filterLength( size ),
-    numCoeff( size * size / 4 + 1 ),
-    filterSize( size * size / 2 + 1 )
-  {
-    if( size == 5 )
-    {
-      pattern = {
-                 0,
-             1,  2,  3,
-         4,  5,  6,  5,  4,
-             3,  2,  1,
-                 0
-      };
-
-      weights = {
-                 2,
-              2, 2, 2,
-           2, 2, 1, 1
-      };
-
-      golombIdx = {
-                 0,
-              0, 1, 0,
-           0, 1, 2, 2
-      };
-
-      filterType = ALF_FILTER_5;
-    }
-    else if( size == 7 )
-    {
-      pattern = {
-                     0,
-                 1,  2,  3,
-             4,  5,  6,  7,  8,
-         9, 10, 11, 12, 11, 10, 9,
-             8,  7,  6,  5,  4,
-                 3,  2,  1,
-                     0
-      };
-
-      weights = {
-                    2,
-                2,  2,  2,
-            2,  2,  2,  2,  2,
-        2,  2,  2,  1,  1
-      };
-
-      golombIdx = {
-                    0,
-                 0, 1, 0,
-              0, 1, 2, 1, 0,
-           0, 1, 2, 3, 3
-      };
-
-      filterType = ALF_FILTER_7;
-    }
-    else
-    {
-      filterType = ALF_NUM_OF_FILTER_TYPES;
-      CHECK( 0, "Wrong ALF filter shape" );
-    }
-  }
-
-  AlfFilterType filterType;
-  int filterLength;
-  int numCoeff;      //TO DO: check whether we need both numCoeff and filterSize
-  int filterSize;
-  std::vector<int> pattern;
-  std::vector<int> weights;
-  std::vector<int> golombIdx;
-};
-
-struct AlfSliceParam
-{
-  bool                         enabledFlag[MAX_NUM_COMPONENT];                          // alf_slice_enable_flag, alf_chroma_idc
-  short                        lumaCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_coeff_luma_delta[i][j]
-  short                        chromaCoeff[MAX_NUM_ALF_CHROMA_COEFF];                   // alf_coeff_chroma[i]
-  short                        filterCoeffDeltaIdx[MAX_NUM_ALF_CLASSES];                // filter_coeff_delta[i]
-  bool                         alfLumaCoeffFlag[MAX_NUM_ALF_CLASSES];                   // alf_luma_coeff_flag[i]
-  int                          numLumaFilters;                                          // number_of_filters_minus1 + 1
-  bool                         alfLumaCoeffDeltaFlag;                                   // alf_luma_coeff_delta_flag
-  bool                         alfLumaCoeffDeltaPredictionFlag;                         // alf_luma_coeff_delta_prediction_flag
-  std::vector<AlfFilterShape>* filterShapes;
-
-  AlfSliceParam()
-  {
-    reset();
-  }
-
-  void reset()
-  {
-    std::memset( enabledFlag, false, sizeof( enabledFlag ) );
-    std::memset( lumaCoeff, 0, sizeof( lumaCoeff ) );
-    std::memset( chromaCoeff, 0, sizeof( chromaCoeff ) );
-    std::memset( filterCoeffDeltaIdx, 0, sizeof( filterCoeffDeltaIdx ) );
-    std::memset( alfLumaCoeffFlag, true, sizeof( alfLumaCoeffFlag ) );
-    numLumaFilters = 1;
-    alfLumaCoeffDeltaFlag = false;
-    alfLumaCoeffDeltaPredictionFlag = false;
-  }
-
-  const AlfSliceParam& operator = ( const AlfSliceParam& src )
-  {
-    std::memcpy( enabledFlag, src.enabledFlag, sizeof( enabledFlag ) );
-    std::memcpy( lumaCoeff, src.lumaCoeff, sizeof( lumaCoeff ) );
-    std::memcpy( chromaCoeff, src.chromaCoeff, sizeof( chromaCoeff ) );
-    std::memcpy( filterCoeffDeltaIdx, src.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) );
-    std::memcpy( alfLumaCoeffFlag, src.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) );
-    numLumaFilters = src.numLumaFilters;
-    alfLumaCoeffDeltaFlag = src.alfLumaCoeffDeltaFlag;
-    alfLumaCoeffDeltaPredictionFlag = src.alfLumaCoeffDeltaPredictionFlag;
-    filterShapes = src.filterShapes;
-    return *this;
-  }
-};
 
 //! \}
 
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 7ca06ebc69661b066d2539bef29f550becdedc50..041b241b8e494577d48561d1af13d8cc07b6e6bb 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -265,26 +265,51 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other )
   mmvdSkip = other.mmvdSkip;
   affine            = other.affine;
   affineType        = other.affineType;
+  colorTransform = other.colorTransform;
   triangle          = other.triangle;
-  transQuantBypass  = other.transQuantBypass;
-  ipcm              = other.ipcm;
+  bdpcmMode         = other.bdpcmMode;
+  bdpcmModeChroma   = other.bdpcmModeChroma;
   qp                = other.qp;
   chromaQpAdj       = other.chromaQpAdj;
   rootCbf           = other.rootCbf;
   sbtInfo           = other.sbtInfo;
-#if HEVC_TILES_WPP
+  mtsFlag           = other.mtsFlag;
+  lfnstIdx          = other.lfnstIdx;
   tileIdx           = other.tileIdx;
-#endif
   imv               = other.imv;
   imvNumCand        = other.imvNumCand;
-  GBiIdx            = other.GBiIdx;
+  BcwIdx            = other.BcwIdx;
   for (int i = 0; i<2; i++)
     refIdxBi[i] = other.refIdxBi[i];
 
-  shareParentPos    = other.shareParentPos;
-  shareParentSize   = other.shareParentSize;
   smvdMode        = other.smvdMode;
   ispMode           = other.ispMode;
+  mipFlag           = other.mipFlag;
+
+  for (int idx = 0; idx < MAX_NUM_CHANNEL_TYPE; idx++)
+  {
+    curPLTSize[idx]   = other.curPLTSize[idx];
+    useEscape[idx]    = other.useEscape[idx];
+    useRotation[idx]  = other.useRotation[idx];
+    reusePLTSize[idx] = other.reusePLTSize[idx];
+    lastPLTSize[idx]  = other.lastPLTSize[idx];
+    if (slice->getSPS()->getPLTMode())
+    {
+      memcpy(reuseflag[idx], other.reuseflag[idx], MAXPLTPREDSIZE * sizeof(bool));
+    }
+  }
+
+  if (slice->getSPS()->getPLTMode())
+  {
+    for (int idx = 0; idx < MAX_NUM_COMPONENT; idx++)
+    {
+      memcpy(curPLT[idx], other.curPLT[idx], MAXPLTSIZE * sizeof(Pel));
+    }
+  }
+
+  treeType          = other.treeType;
+  modeType          = other.modeType;
+  modeTypeSeries    = other.modeTypeSeries;
   return *this;
 }
 
@@ -300,26 +325,121 @@ void CodingUnit::initData()
   mmvdSkip = false;
   affine            = false;
   affineType        = 0;
+  colorTransform = false;
   triangle          = false;
-  transQuantBypass  = false;
-  ipcm              = false;
+  bdpcmMode         = 0;
+  bdpcmModeChroma   = 0;
   qp                = 0;
   chromaQpAdj       = 0;
   rootCbf           = true;
   sbtInfo           = 0;
-#if HEVC_TILES_WPP
+  mtsFlag           = 0;
+  lfnstIdx          = 0;
   tileIdx           = 0;
-#endif
   imv               = 0;
   imvNumCand        = 0;
-  GBiIdx            = GBI_DEFAULT;
+  BcwIdx            = BCW_DEFAULT;
   for (int i = 0; i < 2; i++)
     refIdxBi[i] = -1;
-  shareParentPos = Position(-1, -1);
-  shareParentSize.width = -1;
-  shareParentSize.height = -1;
   smvdMode        = 0;
   ispMode           = 0;
+  mipFlag           = false;
+
+  for (int idx = 0; idx < MAX_NUM_CHANNEL_TYPE; idx++)
+  {
+    curPLTSize[idx]   = 0;
+    reusePLTSize[idx] = 0;
+    lastPLTSize[idx]  = 0;
+    useEscape[idx]    = false;
+    useRotation[idx]  = false;
+    memset(reuseflag[idx], false, MAXPLTPREDSIZE * sizeof(bool));
+  }
+
+  for (int idx = 0; idx < MAX_NUM_COMPONENT; idx++)
+  {
+    memset(curPLT[idx], 0, MAXPLTSIZE * sizeof(Pel));
+  }
+
+  treeType          = TREE_D;
+  modeType          = MODE_TYPE_ALL;
+  modeTypeSeries    = 0;
+}
+
+const bool CodingUnit::isSepTree() const
+{
+  return treeType != TREE_D || CS::isDualITree( *cs );
+}
+
+const bool CodingUnit::checkCCLMAllowed() const
+{
+  bool allowCCLM = false;
+
+  if( !CS::isDualITree( *cs ) ) //single tree I slice or non-I slice (Note: judging chType is no longer equivalent to checking dual-tree I slice since the local dual-tree is introduced)
+  {
+    allowCCLM = true;
+  }
+  else if( slice->getSPS()->getCTUSize() <= 32 ) //dual tree, CTUsize < 64
+  {
+    allowCCLM = true;
+  }
+  else //dual tree, CTU size 64 or 128
+  {
+    int depthFor64x64Node = slice->getSPS()->getCTUSize() == 128 ? 1 : 0;
+    const PartSplit cuSplitTypeDepth1 = CU::getSplitAtDepth( *this, depthFor64x64Node );
+    const PartSplit cuSplitTypeDepth2 = CU::getSplitAtDepth( *this, depthFor64x64Node + 1 );
+
+    //allow CCLM if 64x64 chroma tree node uses QT split or HBT+VBT split combination
+    if( cuSplitTypeDepth1 == CU_QUAD_SPLIT || (cuSplitTypeDepth1 == CU_HORZ_SPLIT && cuSplitTypeDepth2 == CU_VERT_SPLIT) )
+    {
+      if( chromaFormat == CHROMA_420 )
+      {
+        CHECK( !(blocks[COMPONENT_Cb].width <= 16 && blocks[COMPONENT_Cb].height <= 16), "chroma cu size shall be <= 16x16 for YUV420 format" );
+      }
+      allowCCLM = true;
+    }
+    //allow CCLM if 64x64 chroma tree node uses NS (No Split) and becomes a chroma CU containing 32x32 chroma blocks
+    else if( cuSplitTypeDepth1 == CU_DONT_SPLIT )
+    {
+      if( chromaFormat == CHROMA_420 )
+      {
+        CHECK( !(blocks[COMPONENT_Cb].width == 32 && blocks[COMPONENT_Cb].height == 32), "chroma cu size shall be 32x32 for YUV420 format" );
+      }
+      allowCCLM = true;
+    }
+    //allow CCLM if 64x32 chroma tree node uses NS and becomes a chroma CU containing 32x16 chroma blocks
+    else if( cuSplitTypeDepth1 == CU_HORZ_SPLIT && cuSplitTypeDepth2 == CU_DONT_SPLIT )
+    {
+      if( chromaFormat == CHROMA_420 )
+      {
+        CHECK( !(blocks[COMPONENT_Cb].width == 32 && blocks[COMPONENT_Cb].height == 16), "chroma cu size shall be 32x16 for YUV420 format" );
+      }
+      allowCCLM = true;
+    }
+
+    //further check luma conditions
+    if( allowCCLM )
+    {
+      //disallow CCLM if luma 64x64 block uses BT or TT or NS with ISP
+      const Position lumaRefPos( chromaPos().x << getComponentScaleX( COMPONENT_Cb, chromaFormat ), chromaPos().y << getComponentScaleY( COMPONENT_Cb, chromaFormat ) );
+      const CodingUnit* colLumaCu = cs->picture->cs->getCU( lumaRefPos, CHANNEL_TYPE_LUMA );
+
+      if( colLumaCu->lwidth() < 64 || colLumaCu->lheight() < 64 ) //further split at 64x64 luma node
+      {
+        const PartSplit cuSplitTypeDepth1Luma = CU::getSplitAtDepth( *colLumaCu, depthFor64x64Node );
+        CHECK( !(cuSplitTypeDepth1Luma >= CU_QUAD_SPLIT && cuSplitTypeDepth1Luma <= CU_TRIV_SPLIT), "split mode shall be BT, TT or QT" );
+        if( cuSplitTypeDepth1Luma != CU_QUAD_SPLIT )
+        {
+          allowCCLM = false;
+        }
+      }
+      else if( colLumaCu->lwidth() == 64 && colLumaCu->lheight() == 64 && colLumaCu->ispMode ) //not split at 64x64 luma node and use ISP mode
+      {
+        allowCCLM = false;
+      }
+    }
+  }
+
+  return allowCCLM;
 }
 
 const uint8_t CodingUnit::checkAllowedSbt() const
@@ -330,11 +450,15 @@ const uint8_t CodingUnit::checkAllowedSbt() const
   }
 
   //check on prediction mode
-  if( predMode == MODE_INTRA || predMode == MODE_IBC ) //intra or IBC
+  if (predMode == MODE_INTRA || predMode == MODE_IBC || predMode == MODE_PLT ) //intra, palette or IBC
+  {
+    return 0;
+  }
+  if( firstPU->ciipFlag )
   {
     return 0;
   }
-  if( firstPU->mhIntraFlag )
+  if( triangle )
   {
     return 0;
   }
@@ -346,7 +470,7 @@ const uint8_t CodingUnit::checkAllowedSbt() const
   memset( allow_type, false, NUMBER_SBT_IDX * sizeof( bool ) );
 
   //parameter
-  int maxSbtCUSize = cs->sps->getMaxSbtSize();
+  int maxSbtCUSize = cs->sps->getMaxTbSize();
   int minSbtCUSize = 1 << ( MIN_CU_LOG2 + 1 );
 
   //check on size
@@ -397,10 +521,12 @@ void PredictionUnit::initData()
   // intra data - need this default initialization for PCM
   intraDir[0] = DC_IDX;
   intraDir[1] = PLANAR_IDX;
+  mipTransposedFlag = false;
   multiRefIdx = 0;
 
   // inter data
   mergeFlag   = false;
+  regularMergeFlag = false;
   mergeIdx    = MAX_UCHAR;
   triangleSplitDir  = MAX_UCHAR;
   triangleMergeIdx0 = MAX_UCHAR;
@@ -432,10 +558,7 @@ void PredictionUnit::initData()
       mvAffi[i][j].setZero();
     }
   }
-  mhIntraFlag = false;
-  shareParentPos = Position(-1, -1);
-  shareParentSize.width = -1;
-  shareParentSize.height = -1;
+  ciipFlag = false;
   mmvdEncOptMode = 0;
 }
 
@@ -445,6 +568,7 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData)
   {
     intraDir[i] = predData.intraDir[i];
   }
+  mipTransposedFlag = predData.mipTransposedFlag;
   multiRefIdx = predData.multiRefIdx;
 
   return *this;
@@ -453,6 +577,7 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData)
 PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData)
 {
   mergeFlag   = predData.mergeFlag;
+  regularMergeFlag = predData.regularMergeFlag;
   mergeIdx    = predData.mergeIdx;
   triangleSplitDir  = predData.triangleSplitDir  ;
   triangleMergeIdx0 = predData.triangleMergeIdx0 ;
@@ -484,9 +609,7 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData)
       mvAffi[i][j] = predData.mvAffi[i][j];
     }
   }
-  mhIntraFlag = predData.mhIntraFlag;
-  shareParentPos = predData.shareParentPos;
-  shareParentSize = predData.shareParentSize;
+  ciipFlag = predData.ciipFlag;
   return *this;
 }
 
@@ -496,9 +619,11 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other )
   {
     intraDir[ i ] = other.intraDir[ i ];
   }
+  mipTransposedFlag = other.mipTransposedFlag;
   multiRefIdx = other.multiRefIdx;
 
   mergeFlag   = other.mergeFlag;
+  regularMergeFlag = other.regularMergeFlag;
   mergeIdx    = other.mergeIdx;
   triangleSplitDir  = other.triangleSplitDir  ;
   triangleMergeIdx0 = other.triangleMergeIdx0 ;
@@ -530,9 +655,7 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other )
       mvAffi[i][j] = other.mvAffi[i][j];
     }
   }
-  mhIntraFlag = other.mhIntraFlag;
-  shareParentPos = other.shareParentPos;
-  shareParentSize = other.shareParentSize;
+  ciipFlag = other.ciipFlag;
   return *this;
 }
 
@@ -583,6 +706,11 @@ TransformUnit::TransformUnit(const UnitArea& unit) : UnitArea(unit), cu(nullptr)
     m_pcmbuf[i] = nullptr;
   }
 
+  for (unsigned i = 0; i < MAX_NUM_TBLOCKS - 1; i++)
+  {
+    m_runType[i] = nullptr;
+  }
+
   initData();
 }
 
@@ -594,6 +722,11 @@ TransformUnit::TransformUnit(const ChromaFormat _chromaFormat, const Area &_area
     m_pcmbuf[i] = nullptr;
   }
 
+  for (unsigned i = 0; i < MAX_NUM_TBLOCKS - 1; i++)
+  {
+    m_runType[i] = nullptr;
+  }
+
   initData();
 }
 
@@ -604,14 +737,14 @@ void TransformUnit::initData()
     cbf[i]           = 0;
     rdpcm[i]         = NUMBER_OF_RDPCM_MODES;
     compAlpha[i]     = 0;
+    mtsIdx[i]        = MTS_DCT2_DCT2;
   }
   depth              = 0;
-  mtsIdx             = 0;
   noResidual         = false;
+  jointCbCr          = 0;
   m_chromaResScaleInv = 0;
 }
-
-void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf)
+void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf, bool **runType)
 {
   uint32_t numBlocks = getNumberValidTBlocks(*cs->pcv);
 
@@ -620,6 +753,11 @@ void TransformUnit::init(TCoeff **coeffs, Pel **pcmbuf)
     m_coeffs[i] = coeffs[i];
     m_pcmbuf[i] = pcmbuf[i];
   }
+
+  for (uint32_t i = 0; i < numBlocks - 1; i++)
+  {
+    m_runType[i] = runType[i];
+  }
 }
 
 TransformUnit& TransformUnit::operator=(const TransformUnit& other)
@@ -635,14 +773,18 @@ TransformUnit& TransformUnit::operator=(const TransformUnit& other)
 
     if (m_coeffs[i] && other.m_coeffs[i] && m_coeffs[i] != other.m_coeffs[i]) memcpy(m_coeffs[i], other.m_coeffs[i], sizeof(TCoeff) * area);
     if (m_pcmbuf[i] && other.m_pcmbuf[i] && m_pcmbuf[i] != other.m_pcmbuf[i]) memcpy(m_pcmbuf[i], other.m_pcmbuf[i], sizeof(Pel   ) * area);
-
+    if (cu->slice->getSPS()->getPLTMode() && i < 2)
+    {
+      if (m_runType[i]   && other.m_runType[i]   && m_runType[i]   != other.m_runType[i]  ) memcpy(m_runType[i],   other.m_runType[i],   sizeof(bool) * area);
+    }
     cbf[i]           = other.cbf[i];
     rdpcm[i]         = other.rdpcm[i];
     compAlpha[i]     = other.compAlpha[i];
+    mtsIdx[i] = other.mtsIdx[i];
   }
   depth              = other.depth;
-  mtsIdx             = other.mtsIdx;
   noResidual         = other.noResidual;
+  jointCbCr          = other.jointCbCr;
   return *this;
 }
 
@@ -656,14 +798,19 @@ void TransformUnit::copyComponentFrom(const TransformUnit& other, const Componen
 
   if (m_coeffs[i] && other.m_coeffs[i] && m_coeffs[i] != other.m_coeffs[i]) memcpy(m_coeffs[i], other.m_coeffs[i], sizeof(TCoeff) * area);
   if (m_pcmbuf[i] && other.m_pcmbuf[i] && m_pcmbuf[i] != other.m_pcmbuf[i]) memcpy(m_pcmbuf[i], other.m_pcmbuf[i], sizeof(Pel   ) * area);
+  if ((i == COMPONENT_Y || i == COMPONENT_Cb))
+  {
+    if (m_runType[i] && other.m_runType[i] && m_runType[i] != other.m_runType[i])   memcpy(m_runType[i], other.m_runType[i], sizeof(bool) * area);
+  }
 
   cbf[i]           = other.cbf[i];
   rdpcm[i]         = other.rdpcm[i];
   compAlpha[i]     = other.compAlpha[i];
 
   depth            = other.depth;
-  mtsIdx           = isLuma( i ) ? other.mtsIdx : mtsIdx;
+  mtsIdx[i]        = other.mtsIdx[i];
   noResidual       = other.noResidual;
+  jointCbCr        = isChroma( i ) ? other.jointCbCr : jointCbCr;
 }
 
        CoeffBuf TransformUnit::getCoeffs(const ComponentID id)       { return  CoeffBuf(m_coeffs[id], blocks[id]); }
@@ -672,6 +819,18 @@ const CCoeffBuf TransformUnit::getCoeffs(const ComponentID id) const { return CC
        PelBuf   TransformUnit::getPcmbuf(const ComponentID id)       { return  PelBuf  (m_pcmbuf[id], blocks[id]); }
 const CPelBuf   TransformUnit::getPcmbuf(const ComponentID id) const { return CPelBuf  (m_pcmbuf[id], blocks[id]); }
 
+       PelBuf       TransformUnit::getcurPLTIdx(const ComponentID id)         { return        PelBuf(m_pcmbuf[id], blocks[id]); }
+const CPelBuf       TransformUnit::getcurPLTIdx(const ComponentID id)   const { return       CPelBuf(m_pcmbuf[id], blocks[id]); }
+
+       PLTtypeBuf   TransformUnit::getrunType  (const ComponentID id)         { return   PLTtypeBuf(m_runType[id], blocks[id]); }
+const CPLTtypeBuf   TransformUnit::getrunType  (const ComponentID id)   const { return  CPLTtypeBuf(m_runType[id], blocks[id]); }
+
+       PLTescapeBuf TransformUnit::getescapeValue(const ComponentID id)       { return  PLTescapeBuf(m_coeffs[id], blocks[id]); }
+const CPLTescapeBuf TransformUnit::getescapeValue(const ComponentID id) const { return CPLTescapeBuf(m_coeffs[id], blocks[id]); }
+
+      Pel*          TransformUnit::getPLTIndex   (const ComponentID id)       { return  m_pcmbuf[id];    }
+      bool*         TransformUnit::getRunTypes   (const ComponentID id)       { return  m_runType[id];   }
+
 void TransformUnit::checkTuNoResidual( unsigned idx )
 {
   if( CU::getSbtIdx( cu->sbtInfo ) == SBT_OFF_DCT )
@@ -684,5 +843,23 @@ void TransformUnit::checkTuNoResidual( unsigned idx )
     noResidual = true;
   }
 }
+
+int TransformUnit::getTbAreaAfterCoefZeroOut(ComponentID compID) const
+{
+  int tbArea = blocks[compID].width * blocks[compID].height;
+  int tbZeroOutWidth = blocks[compID].width;
+  int tbZeroOutHeight = blocks[compID].height;
+
+  if ( cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32 && compID == COMPONENT_Y )
+  {
+    tbZeroOutWidth = (blocks[compID].width == 32) ? 16 : tbZeroOutWidth;
+    tbZeroOutHeight = (blocks[compID].height == 32) ? 16 : tbZeroOutHeight;
+  }
+  tbZeroOutWidth = std::min<int>(JVET_C0024_ZERO_OUT_TH, tbZeroOutWidth);
+  tbZeroOutHeight = std::min<int>(JVET_C0024_ZERO_OUT_TH, tbZeroOutHeight);
+  tbArea = tbZeroOutWidth * tbZeroOutHeight;
+  return tbArea;
+}
+
 int          TransformUnit::getChromaAdj()                     const { return m_chromaResScaleInv; }
 void         TransformUnit::setChromaAdj(int i)                      { m_chromaResScaleInv = i;    }
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index 69542a605ebba55693780746e2b43ec17b4dc603..91130a7221d62eb12ddbc15dbdbfa5d5caccd706 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,10 @@
 // ---------------------------------------------------------------------------
 // tools
 // ---------------------------------------------------------------------------
-
+struct PLTBuf {
+  uint8_t        curPLTSize[MAX_NUM_CHANNEL_TYPE];
+  Pel            curPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE];
+};
 inline Position recalcPosition(const ChromaFormat _cf, const ComponentID srcCId, const ComponentID dstCId, const Position &pos)
 {
   if( toChannelType( srcCId ) == toChannelType( dstCId ) )
@@ -267,9 +270,8 @@ struct UnitAreaRelative : public UnitArea
 };
 
 class SPS;
-#if HEVC_VPS
 class VPS;
-#endif
+class DPS;
 class PPS;
 class Slice;
 
@@ -299,27 +301,38 @@ struct CodingUnit : public UnitArea
   int8_t          chromaQpAdj;
   int8_t          qp;
   SplitSeries    splitSeries;
+  TreeType       treeType;
+  ModeType       modeType;
+  ModeTypeSeries modeTypeSeries;
   bool           skip;
   bool           mmvdSkip;
   bool           affine;
   int            affineType;
+  bool           colorTransform;
   bool           triangle;
-  bool           transQuantBypass;
-  bool           ipcm;
+  int            bdpcmMode;
+  int            bdpcmModeChroma;
   uint8_t          imv;
   bool           rootCbf;
   uint8_t        sbtInfo;
-#if HEVC_TILES_WPP
   uint32_t           tileIdx;
-#endif
-  uint8_t         GBiIdx;
+  uint8_t         mtsFlag;
+  uint32_t        lfnstIdx;
+  uint8_t         BcwIdx;
   int             refIdxBi[2];
+  bool           mipFlag;
+
   // needed for fast imv mode decisions
   int8_t          imvNumCand;
-  Position       shareParentPos;
-  Size           shareParentSize;
   uint8_t          smvdMode;
   uint8_t        ispMode;
+  bool           useEscape[MAX_NUM_CHANNEL_TYPE];
+  bool           useRotation[MAX_NUM_CHANNEL_TYPE];
+  bool           reuseflag[MAX_NUM_CHANNEL_TYPE][MAXPLTPREDSIZE];
+  uint8_t        lastPLTSize[MAX_NUM_CHANNEL_TYPE];
+  uint8_t        reusePLTSize[MAX_NUM_CHANNEL_TYPE];
+  uint8_t        curPLTSize[MAX_NUM_CHANNEL_TYPE];
+  Pel            curPLT[MAX_NUM_COMPONENT][MAXPLTSIZE];
 
   CodingUnit() : chType( CH_L ) { }
   CodingUnit(const UnitArea &unit);
@@ -337,7 +350,7 @@ struct CodingUnit : public UnitArea
 
   TransformUnit *firstTU;
   TransformUnit *lastTU;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
   int64_t cacheId;
   bool    cacheUsed;
@@ -348,6 +361,10 @@ struct CodingUnit : public UnitArea
   void              setSbtPos( uint8_t pos ) { CHECK( pos >= 4, "sbt_pos wrong" ); sbtInfo = ( pos << 4 ) + ( sbtInfo & 0xcf ); }
   uint8_t           getSbtTuSplit() const;
   const uint8_t     checkAllowedSbt() const;
+  const bool        checkCCLMAllowed() const;
+  const bool        isSepTree() const;
+  const bool        isConsInter() const { return modeType == MODE_TYPE_INTER; }
+  const bool        isConsIntra() const { return modeType == MODE_TYPE_INTRA; }
 };
 
 // ---------------------------------------------------------------------------
@@ -357,12 +374,14 @@ struct CodingUnit : public UnitArea
 struct IntraPredictionData
 {
   uint32_t  intraDir[MAX_NUM_CHANNEL_TYPE];
+  bool      mipTransposedFlag;
   int       multiRefIdx;
 };
 
 struct InterPredictionData
 {
   bool      mergeFlag;
+  bool      regularMergeFlag;
   uint8_t     mergeIdx;
   uint8_t     triangleSplitDir;
   uint8_t     triangleMergeIdx0;
@@ -380,10 +399,8 @@ struct InterPredictionData
   Mv        mvdL0SubPu[MAX_NUM_SUBCU_DMVR];
   Mv        mvdAffi [NUM_REF_PIC_LIST_01][3];
   Mv        mvAffi[NUM_REF_PIC_LIST_01][3];
-  bool      mhIntraFlag;
+  bool      ciipFlag;
 
-  Position  shareParentPos;
-  Size      shareParentSize;
   Mv        bv;                             // block vector for IBC
   Mv        bvd;                            // block vector difference for IBC
   uint8_t   mmvdEncOptMode;                  // 0: no action 1: skip chroma MC for MMVD candidate pre-selection 2: skip chroma MC and BIO for MMVD candidate pre-selection
@@ -408,8 +425,6 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte
   PredictionUnit& operator=(const MotionInfo& mi);
 
   unsigned        idx;
-  Position shareParentPos;
-  Size     shareParentSize;
 
   PredictionUnit *next;
 
@@ -419,7 +434,7 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte
   MotionBuf         getMotionBuf();
   CMotionBuf        getMotionBuf() const;
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 
   int64_t cacheId;
   bool    cacheUsed;
@@ -438,8 +453,9 @@ struct TransformUnit : public UnitArea
   int              m_chromaResScaleInv;
 
   uint8_t        depth;
-  uint8_t        mtsIdx;
+  uint8_t        mtsIdx     [ MAX_NUM_TBLOCKS ];
   bool           noResidual;
+  uint8_t        jointCbCr;
   uint8_t        cbf        [ MAX_NUM_TBLOCKS ];
   RDPCMMode    rdpcm        [ MAX_NUM_TBLOCKS ];
   int8_t        compAlpha   [ MAX_NUM_TBLOCKS ];
@@ -453,12 +469,12 @@ struct TransformUnit : public UnitArea
   unsigned       idx;
   TransformUnit *next;
   TransformUnit *prev;
-
-  void init(TCoeff **coeffs, Pel **pcmbuf);
+  void init(TCoeff **coeffs, Pel **pcmbuf, bool **runType);
 
   TransformUnit& operator=(const TransformUnit& other);
   void copyComponentFrom  (const TransformUnit& other, const ComponentID compID);
   void checkTuNoResidual( unsigned idx );
+  int  getTbAreaAfterCoefZeroOut(ComponentID compID) const;
 
          CoeffBuf getCoeffs(const ComponentID id);
   const CCoeffBuf getCoeffs(const ComponentID id) const;
@@ -466,8 +482,16 @@ struct TransformUnit : public UnitArea
   const CPelBuf   getPcmbuf(const ComponentID id) const;
         int       getChromaAdj( )                 const;
         void      setChromaAdj(int i);
-
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+         PelBuf   getcurPLTIdx(const ComponentID id);
+  const CPelBuf   getcurPLTIdx(const ComponentID id) const;
+         PLTtypeBuf   getrunType(const ComponentID id);
+  const CPLTtypeBuf   getrunType(const ComponentID id) const;
+         PLTescapeBuf getescapeValue(const ComponentID id);
+  const CPLTescapeBuf getescapeValue(const ComponentID id) const;
+        Pel*      getPLTIndex(const ComponentID id);
+        bool*     getRunTypes(const ComponentID id);
+
+#if ENABLE_SPLIT_PARALLELISM
   int64_t cacheId;
   bool    cacheUsed;
 
@@ -475,6 +499,7 @@ struct TransformUnit : public UnitArea
 private:
   TCoeff *m_coeffs[ MAX_NUM_TBLOCKS ];
   Pel    *m_pcmbuf[ MAX_NUM_TBLOCKS ];
+  bool   *m_runType[ MAX_NUM_TBLOCKS - 1 ];
 };
 
 // ---------------------------------------------------------------------------
diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp
index 36c65850bbee50ca31c0d0974a9c94f984ee8f53..71b35c0b274b80d1913d2f3705fb67d415048ae5 100644
--- a/source/Lib/CommonLib/UnitPartitioner.cpp
+++ b/source/Lib/CommonLib/UnitPartitioner.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,7 @@ PartLevel::PartLevel()
 , canQtSplit          ( true          )
 , qgEnable            ( true          )
 , qgChromaEnable      ( true          )
+, modeType            ( MODE_TYPE_ALL )
 {
 }
 
@@ -69,6 +70,7 @@ PartLevel::PartLevel( const PartSplit _split, const Partitioning& _parts )
 , canQtSplit          ( true          )
 , qgEnable            ( true          )
 , qgChromaEnable      ( true          )
+, modeType            ( MODE_TYPE_ALL )
 {
 }
 
@@ -83,6 +85,7 @@ PartLevel::PartLevel( const PartSplit _split, Partitioning&& _parts )
 , canQtSplit          ( true                                 )
 , qgEnable            ( true                                 )
 , qgChromaEnable      ( true                                 )
+, modeType            ( MODE_TYPE_ALL )
 {
 }
 
@@ -106,6 +109,27 @@ SplitSeries Partitioner::getSplitSeries() const
   return splitSeries;
 }
 
+ModeTypeSeries Partitioner::getModeTypeSeries() const
+{
+  ModeTypeSeries modeTypeSeries = 0;
+  int depth = 0;
+
+  for( const auto &level : m_partStack )
+  {
+    if( level.split == CTU_LEVEL ) continue;
+    else modeTypeSeries += static_cast<int>(level.modeType) << (depth * 3);
+
+    depth++;
+  }
+
+  return modeTypeSeries;
+}
+
+bool Partitioner::isSepTree( const CodingStructure &cs )
+{
+  return treeType != TREE_D || CS::isDualITree( cs );
+}
+
 void Partitioner::setCUData( CodingUnit& cu )
 {
   cu.depth       = currDepth;
@@ -113,6 +137,7 @@ void Partitioner::setCUData( CodingUnit& cu )
   cu.mtDepth     = currMtDepth;
   cu.qtDepth     = currQtDepth;
   cu.splitSeries = getSplitSeries();
+  cu.modeTypeSeries = getModeTypeSeries();
 }
 
 void Partitioner::copyState( const Partitioner& other )
@@ -141,22 +166,15 @@ void Partitioner::copyState( const Partitioner& other )
 void AdaptiveDepthPartitioner::setMaxMinDepth( unsigned& minDepth, unsigned& maxDepth, const CodingStructure& cs ) const
 {
   unsigned          stdMinDepth = 0;
-  unsigned          stdMaxDepth = ( g_aucLog2[cs.sps->getCTUSize()] - g_aucLog2[cs.sps->getMinQTSize( cs.slice->getSliceType(), chType )]);
+  unsigned          stdMaxDepth = ( floorLog2(cs.sps->getCTUSize()) - floorLog2(cs.sps->getMinQTSize( cs.slice->getSliceType(), chType )));
   const Position    pos         = currArea().blocks[chType].pos();
   const unsigned    curSliceIdx = cs.slice->getIndependentSliceIdx();
-#if HEVC_TILES_WPP
-  const unsigned    curTileIdx  = cs.picture->tileMap->getTileIdxMap( currArea().lumaPos() );
-
-  const CodingUnit* cuLeft        = cs.getCURestricted( pos.offset( -1,                               0 ), curSliceIdx, curTileIdx, chType );
-  const CodingUnit* cuBelowLeft   = cs.getCURestricted( pos.offset( -1, currArea().blocks[chType].height), curSliceIdx, curTileIdx, chType );
-  const CodingUnit* cuAbove       = cs.getCURestricted( pos.offset(  0,                              -1 ), curSliceIdx, curTileIdx, chType );
-  const CodingUnit* cuAboveRight  = cs.getCURestricted( pos.offset( currArea().blocks[chType].width, -1 ), curSliceIdx, curTileIdx, chType );
-#else
-  const CodingUnit* cuLeft        = cs.getCURestricted( pos.offset( -1,                               0 ), curSliceIdx, chType );
-  const CodingUnit* cuBelowLeft   = cs.getCURestricted( pos.offset( -1, currArea().blocks[chType].height), curSliceIdx, chType );
-  const CodingUnit* cuAbove       = cs.getCURestricted( pos.offset(  0,                              -1 ), curSliceIdx, chType );
-  const CodingUnit* cuAboveRight  = cs.getCURestricted( pos.offset( currArea().blocks[chType].width, -1 ), curSliceIdx, chType );
-#endif
+  const unsigned    curTileIdx  = cs.pps->getTileIdx( currArea().lumaPos() );
+
+  const CodingUnit* cuLeft        = cs.getCURestricted( pos.offset( -1,                               0 ), pos, curSliceIdx, curTileIdx, chType );
+  const CodingUnit* cuBelowLeft   = cs.getCURestricted( pos.offset( -1, currArea().blocks[chType].height), pos, curSliceIdx, curTileIdx, chType );
+  const CodingUnit* cuAbove       = cs.getCURestricted( pos.offset(  0,                              -1 ), pos, curSliceIdx, curTileIdx, chType );
+  const CodingUnit* cuAboveRight  = cs.getCURestricted( pos.offset( currArea().blocks[chType].width, -1 ), pos, curSliceIdx, curTileIdx, chType );
 
   minDepth = stdMaxDepth;
   maxDepth = stdMinDepth;
@@ -241,6 +259,8 @@ void QTBTPartitioner::initCtu( const UnitArea& ctuArea, const ChannelType _chTyp
 
   m_partStack.clear();
   m_partStack.push_back( PartLevel( CTU_LEVEL, Partitioning{ ctuArea } ) );
+  treeType = TREE_D;
+  modeType = MODE_TYPE_ALL;
 }
 
 void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructure& cs )
@@ -256,14 +276,17 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur
   {
   case CU_QUAD_SPLIT:
     m_partStack.push_back( PartLevel( split, PartitionerImpl::getCUSubPartitions( currArea(), cs ) ) );
+    m_partStack.back().modeType = modeType;
     break;
   case CU_HORZ_SPLIT:
   case CU_VERT_SPLIT:
     m_partStack.push_back( PartLevel( split, PartitionerImpl::getCUSubPartitions( currArea(), cs, split ) ) );
+    m_partStack.back().modeType = modeType;
     break;
   case CU_TRIH_SPLIT:
   case CU_TRIV_SPLIT:
     m_partStack.push_back( PartLevel( split, PartitionerImpl::getCUSubPartitions( currArea(), cs, split ) ) );
+    m_partStack.back().modeType = modeType;
     break;
   case TU_MAX_TR_SPLIT:
     m_partStack.push_back( PartLevel( split, PartitionerImpl::getMaxTuTiling( currArea(), cs ) ) );
@@ -325,8 +348,8 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur
     currQtDepth++;
     currSubdiv++;
   }
-  qgEnable       &= (currSubdiv <= cs.pps->getCuQpDeltaSubdiv());
-  qgChromaEnable &= (currSubdiv <= cs.pps->getPpsRangeExtension().getCuChromaQpOffsetSubdiv());
+  qgEnable       &= (currSubdiv <= cs.slice->getCuQpDeltaSubdiv());
+  qgChromaEnable &= (currSubdiv <= cs.slice->getCuChromaQpOffsetSubdiv());
   m_partStack.back().qgEnable       = qgEnable;
   m_partStack.back().qgChromaEnable = qgChromaEnable;
   if (qgEnable)
@@ -351,6 +374,7 @@ void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& ca
 
   // the minimal and maximal sizes are given in luma samples
   const CompArea&  area  = currArea().Y();
+  const CompArea&  areaC = currArea().Cb();
         PartLevel& level = m_partStack.back();
 
   const PartSplit lastSplit = level.split;
@@ -359,14 +383,19 @@ void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& ca
   // don't allow QT-splitting below a BT split
   if( lastSplit != CTU_LEVEL && lastSplit != CU_QUAD_SPLIT ) canQt = false;
   if( area.width <= minQtSize )                              canQt = false;
-
+  if( chType == CHANNEL_TYPE_CHROMA && areaC.width <= MIN_DUALTREE_CHROMA_WIDTH ) canQt = false;
+  if( treeType == TREE_C )
+  {
+    canQt = canBh = canTh = canBv = canTv = false;
+    return;
+  }
   if( implicitSplit != CU_DONT_SPLIT )
   {
     canNo = canTh = canTv = false;
 
     canBh = implicitSplit == CU_HORZ_SPLIT;
     canBv = implicitSplit == CU_VERT_SPLIT;
-
+    if (chType == CHANNEL_TYPE_CHROMA && areaC.width == 4) canBv = false;
     return;
   }
 
@@ -394,30 +423,34 @@ void QTBTPartitioner::canSplit( const CodingStructure &cs, bool& canNo, bool& ca
     return;
   }
 
+  if( area.width > maxBtSize || area.height > maxBtSize )
+  {
+    canBh = canBv = false;
+  }
+
   // specific check for BT splits
-  if( area.height <= minBtSize || area.height > maxBtSize )                            canBh = false;
+  if( area.height <= minBtSize )                            canBh = false;
   if( area.width > MAX_TB_SIZEY && area.height <= MAX_TB_SIZEY ) canBh = false;
-
-  if( area.width <= minBtSize || area.width > maxBtSize )                              canBv = false;
+  if( chType == CHANNEL_TYPE_CHROMA && areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE )     canBh = false;
+  if( area.width <= minBtSize )                              canBv = false;
   if( area.width <= MAX_TB_SIZEY && area.height > MAX_TB_SIZEY ) canBv = false;
-
+  if (chType == CHANNEL_TYPE_CHROMA && (areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE || areaC.width == 4))     canBv = false;
+  if( modeType == MODE_TYPE_INTER && area.width * area.height == 32 )  canBv = canBh = false;
   if( area.height <= 2 * minTtSize || area.height > maxTtSize || area.width > maxTtSize )
                                                                                        canTh = false;
   if( area.width > MAX_TB_SIZEY || area.height > MAX_TB_SIZEY )  canTh = false;
-
+  if( chType == CHANNEL_TYPE_CHROMA && areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE*2 )     canTh = false;
   if( area.width <= 2 * minTtSize || area.width > maxTtSize || area.height > maxTtSize )
                                                                                        canTv = false;
   if( area.width > MAX_TB_SIZEY || area.height > MAX_TB_SIZEY )  canTv = false;
+  if (chType == CHANNEL_TYPE_CHROMA && (areaC.width * areaC.height <= MIN_DUALTREE_CHROMA_SIZE * 2 || areaC.width == 8))     canTv = false;
+  if( modeType == MODE_TYPE_INTER && area.width * area.height == 64 )  canTv = canTh = false;
 }
 
 bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs )
 {
   const CompArea area       = currArea().Y();
-#if MAX_TB_SIZE_SIGNALLING
   const unsigned maxTrSize  = cs.sps->getMaxTbSize();
-#else
-  const unsigned maxTrSize  = MAX_TB_SIZEY;
-#endif
 
   bool canNo, canQt, canBh, canTh, canBv, canTv;
 
@@ -723,16 +756,6 @@ bool TUIntraSubPartitioner::canSplit( const PartSplit split, const CodingStructu
   }
 }
 
-
-//////////////////////////////////////////////////////////////////////////
-// PartitionerFactory
-//////////////////////////////////////////////////////////////////////////
-
-Partitioner* PartitionerFactory::get( const Slice& slice )
-{
-  return new QTBTPartitioner;
-}
-
 //////////////////////////////////////////////////////////////////////////
 // Partitioner methods describing the actual partitioning logic
 //////////////////////////////////////////////////////////////////////////
@@ -935,11 +958,11 @@ void PartitionerImpl::getTUIntraSubPartitions( Partitioning &sub, const UnitArea
   uint32_t nPartitions;
   uint32_t splitDimensionSize = CU::getISPSplitDim( tuArea.lumaSize().width, tuArea.lumaSize().height, splitType );
 
-  bool isDualTree = CS::isDualITree( cs );
+  bool isDualTree = CS::isDualITree( cs ) || cs.treeType != TREE_D;
 
   if( splitType == TU_1D_HORZ_SPLIT )
   {
-    nPartitions = tuArea.lumaSize().height >> g_aucLog2[splitDimensionSize];
+    nPartitions = tuArea.lumaSize().height >> floorLog2(splitDimensionSize);
 
     sub.resize( nPartitions );
 
@@ -956,7 +979,7 @@ void PartitionerImpl::getTUIntraSubPartitions( Partitioning &sub, const UnitArea
   }
   else if( splitType == TU_1D_VERT_SPLIT )
   {
-    nPartitions = tuArea.lumaSize().width >> g_aucLog2[splitDimensionSize];
+    nPartitions = tuArea.lumaSize().width >> floorLog2(splitDimensionSize);
 
     sub.resize( nPartitions );
 
@@ -1005,9 +1028,9 @@ static const int g_zScanToY[1 << ( g_maxRtGridSize << 1 )] =
    0,  0,  1,  1,  0,  0,  1,  1,
    2,  2,  3,  3,  2,  2,  3,  3,
    4,  4,  5,  5,  4,  4,  5,  5,
-   6,  6,  7,  7,  6,  5,  7,  7,
+   6,  6,  7,  7,  6,  6,  7,  7,
    4,  4,  5,  5,  4,  4,  5,  5,
-   6,  6,  7,  7,  6,  5,  7,  7,
+   6,  6,  7,  7,  6,  6,  7,  7,
 };
 static const int g_rsScanToZ[1 << ( g_maxRtGridSize << 1 )] =
 {
@@ -1025,12 +1048,8 @@ Partitioning PartitionerImpl::getMaxTuTiling( const UnitArea &cuArea, const Codi
 {
   static_assert( MAX_LOG2_DIFF_CU_TR_SIZE <= g_maxRtGridSize, "Z-scan tables are only provided for MAX_LOG2_DIFF_CU_TR_SIZE for up to 3 (8x8 tiling)!" );
 
-  const CompArea area = cuArea.Y().valid() ? cuArea.Y() : cuArea.Cb();
-#if MAX_TB_SIZE_SIGNALLING
-  const int maxTrSize = cs.sps->getMaxTbSize() >> ( isLuma( area.compID ) ? 0 : 1 );
-#else
-  const int maxTrSize = MAX_TB_SIZEY >> ( isLuma( area.compID ) ? 0 : 1 );
-#endif
+  const Size area     = cuArea.lumaSize();
+  const int maxTrSize = (area.width>64 || area.height>64) ? 64 : cs.sps->getMaxTbSize();
   const int numTilesH = std::max<int>( 1, area.width  / maxTrSize );
   const int numTilesV = std::max<int>( 1, area.height / maxTrSize );
   const int numTiles  = numTilesH * numTilesV;
diff --git a/source/Lib/CommonLib/UnitPartitioner.h b/source/Lib/CommonLib/UnitPartitioner.h
index 4fbe68f312f0629ac25932646445a26e74cc0a09..590947759772b858bdbf5e3a047afbd3021aca77 100644
--- a/source/Lib/CommonLib/UnitPartitioner.h
+++ b/source/Lib/CommonLib/UnitPartitioner.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,7 @@ struct PartLevel
   bool         canQtSplit;
   bool         qgEnable;
   bool         qgChromaEnable;
+  int          modeType;
 
   PartLevel();
   PartLevel( const PartSplit _split, const Partitioning&  _parts );
@@ -123,6 +124,8 @@ public:
 
   unsigned currImplicitBtDepth;
   ChannelType chType;
+  TreeType treeType;
+  ModeType modeType;
 
   virtual ~Partitioner                    () { }
 
@@ -134,6 +137,7 @@ public:
   const bool currQgChromaEnable           () const { return currPartLevel().qgChromaEnable; }
 
   SplitSeries getSplitSeries              () const;
+  ModeTypeSeries getModeTypeSeries        () const;
 
   virtual void initCtu                    ( const UnitArea& ctuArea, const ChannelType _chType, const Slice& slice )    = 0;
   virtual void splitCurrArea              ( const PartSplit split, const CodingStructure &cs )                          = 0;
@@ -150,6 +154,9 @@ public:
   virtual bool canSplit                   ( const PartSplit split,                          const CodingStructure &cs ) = 0;
   virtual bool isSplitImplicit            ( const PartSplit split,                          const CodingStructure &cs ) = 0;
   virtual PartSplit getImplicitSplit      (                                                 const CodingStructure &cs ) = 0;
+  bool isSepTree                          ( const CodingStructure &cs );
+  bool isConsInter                        () { return modeType == MODE_TYPE_INTER; }
+  bool isConsIntra                        () { return modeType == MODE_TYPE_INTRA; }
 };
 
 class AdaptiveDepthPartitioner : public Partitioner
@@ -190,6 +197,8 @@ public:
 #if _DEBUG
     m_currArea   = _initialState.currArea();
 #endif
+    treeType     = _initialState.treeType;
+    modeType     = _initialState.modeType;
   }
 
   void initCtu               (const UnitArea& ctuArea, const ChannelType chType, const Slice& slice) {}; // not needed
@@ -203,14 +212,6 @@ public:
   PartSplit getImplicitSplit (const CodingStructure &cs) { return CU_DONT_SPLIT; }; //not needed
 };
 
-
-
-
-namespace PartitionerFactory
-{
-  Partitioner* get( const Slice& slice );
-};
-
 //////////////////////////////////////////////////////////////////////////
 // Partitioner namespace - contains methods calculating the actual splits
 //////////////////////////////////////////////////////////////////////////
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index 534ec8017a984c5543336823cc3209d91520d3f8..b11d68861538736236a89972506bbaba0f5aa877 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -58,12 +58,12 @@ uint64_t CS::getEstBits(const CodingStructure &cs)
 
 bool CS::isDualITree( const CodingStructure &cs )
 {
-  return cs.slice->isIRAP() && !cs.pcv->ISingleTree;
+  return cs.slice->isIntra() && !cs.pcv->ISingleTree;
 }
 
 UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const ChannelType chType )
 {
-  return isDualITree( cs ) ? area.singleChan( chType ) : area;
+  return isDualITree( cs ) || cs.treeType != TREE_D ? area.singleChan( chType ) : area;
 }
 void CS::setRefinedMotionField(CodingStructure &cs)
 {
@@ -87,6 +87,8 @@ void CS::setRefinedMotionField(CodingStructure &cs)
             subPu.mv[1] = pu.mv[1];
             subPu.mv[REF_PIC_LIST_0] += pu.mvdL0SubPu[num];
             subPu.mv[REF_PIC_LIST_1] -= pu.mvdL0SubPu[num];
+            subPu.mv[REF_PIC_LIST_0].clipToStorageBitDepth();
+            subPu.mv[REF_PIC_LIST_1].clipToStorageBitDepth();
             pu.mvdL0SubPu[num].setZero();
             num++;
             PU::spanMotionInfo(subPu);
@@ -98,6 +100,36 @@ void CS::setRefinedMotionField(CodingStructure &cs)
 }
 // CU tools
 
+bool CU::getRprScaling( const SPS* sps, const PPS* curPPS, Picture* refPic, int& xScale, int& yScale )
+{
+  const Window& curScalingWindow = curPPS->getScalingWindow();
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  int curPicWidth = curPPS->getPicWidthInLumaSamples()   - SPS::getWinUnitX( sps->getChromaFormatIdc() ) * (curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset());
+  int curPicHeight = curPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( sps->getChromaFormatIdc() ) * (curScalingWindow.getWindowTopOffset()  + curScalingWindow.getWindowBottomOffset());
+#else
+  int curPicWidth = curPPS->getPicWidthInLumaSamples() - curScalingWindow.getWindowLeftOffset() - curScalingWindow.getWindowRightOffset();
+  int curPicHeight = curPPS->getPicHeightInLumaSamples() - curScalingWindow.getWindowTopOffset() - curScalingWindow.getWindowBottomOffset();
+#endif
+
+  const Window& refScalingWindow = refPic->getScalingWindow();
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  int refPicWidth = refPic->getPicWidthInLumaSamples()   - SPS::getWinUnitX( sps->getChromaFormatIdc() ) * (refScalingWindow.getWindowLeftOffset() + refScalingWindow.getWindowRightOffset());
+  int refPicHeight = refPic->getPicHeightInLumaSamples() - SPS::getWinUnitY( sps->getChromaFormatIdc() ) * (refScalingWindow.getWindowTopOffset()  + refScalingWindow.getWindowBottomOffset());
+#else
+  int refPicWidth = refPic->getPicWidthInLumaSamples() - refScalingWindow.getWindowLeftOffset() - refScalingWindow.getWindowRightOffset();
+  int refPicHeight = refPic->getPicHeightInLumaSamples() - refScalingWindow.getWindowTopOffset() - refScalingWindow.getWindowBottomOffset();
+#endif
+
+  xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth;
+  yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight;
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+  return refPic->isRefScaled( curPPS );
+#else
+  return refPicWidth != curPicWidth || refPicHeight != curPicHeight;
+#endif
+}
+
 bool CU::isIntra(const CodingUnit &cu)
 {
   return cu.predMode == MODE_INTRA;
@@ -113,36 +145,36 @@ bool CU::isIBC(const CodingUnit &cu)
   return cu.predMode == MODE_IBC;
 }
 
-bool CU::isRDPCMEnabled(const CodingUnit& cu)
+bool CU::isPLT(const CodingUnit &cu)
 {
-  return cu.cs->sps->getSpsRangeExtension().getRdpcmEnabledFlag(cu.predMode == MODE_INTRA ? RDPCM_SIGNAL_IMPLICIT : RDPCM_SIGNAL_EXPLICIT);
+  return cu.predMode == MODE_PLT;
 }
 
-bool CU::isLosslessCoded(const CodingUnit &cu)
+bool CU::isRDPCMEnabled(const CodingUnit& cu)
 {
-  return cu.cs->pps->getTransquantBypassEnabledFlag() && cu.transQuantBypass;
+  return cu.cs->sps->getSpsRangeExtension().getRdpcmEnabledFlag(cu.predMode == MODE_INTRA ? RDPCM_SIGNAL_IMPLICIT : RDPCM_SIGNAL_EXPLICIT);
 }
 
+
 bool CU::isSameSlice(const CodingUnit& cu, const CodingUnit& cu2)
 {
   return cu.slice->getIndependentSliceIdx() == cu2.slice->getIndependentSliceIdx();
 }
 
-#if HEVC_TILES_WPP
 bool CU::isSameTile(const CodingUnit& cu, const CodingUnit& cu2)
 {
   return cu.tileIdx == cu2.tileIdx;
 }
 
+
 bool CU::isSameSliceAndTile(const CodingUnit& cu, const CodingUnit& cu2)
 {
   return ( cu.slice->getIndependentSliceIdx() == cu2.slice->getIndependentSliceIdx() ) && ( cu.tileIdx == cu2.tileIdx );
 }
-#endif
 
 bool CU::isSameCtu(const CodingUnit& cu, const CodingUnit& cu2)
 {
-  uint32_t ctuSizeBit = g_aucLog2[cu.cs->sps->getMaxCUWidth()];
+  uint32_t ctuSizeBit = floorLog2(cu.cs->sps->getMaxCUWidth());
 
   Position pos1Ctu(cu.lumaPos().x  >> ctuSizeBit, cu.lumaPos().y  >> ctuSizeBit);
   Position pos2Ctu(cu2.lumaPos().x >> ctuSizeBit, cu2.lumaPos().y >> ctuSizeBit);
@@ -150,30 +182,13 @@ bool CU::isSameCtu(const CodingUnit& cu, const CodingUnit& cu2)
   return pos1Ctu.x == pos2Ctu.x && pos1Ctu.y == pos2Ctu.y;
 }
 
-uint32_t CU::getIntraSizeIdx(const CodingUnit &cu)
-{
-  uint8_t uiWidth = cu.lumaSize().width;
-
-  uint32_t  uiCnt   = 0;
-
-  while (uiWidth)
-  {
-    uiCnt++;
-    uiWidth >>= 1;
-  }
-
-  uiCnt -= 2;
-
-  return uiCnt > 6 ? 6 : uiCnt;
-}
-
 bool CU::isLastSubCUOfCtu( const CodingUnit &cu )
 {
-  const SPS &sps      = *cu.cs->sps;
-  const Area cuAreaY = CS::isDualITree( *cu.cs ) ? Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) ) : ( const Area& ) cu.Y();
+  const Area cuAreaY = cu.isSepTree() ? Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) ) : (const Area&)cu.Y();
 
-  return ( ( ( ( cuAreaY.x + cuAreaY.width  ) & cu.cs->pcv->maxCUWidthMask  ) == 0 || cuAreaY.x + cuAreaY.width  == sps.getPicWidthInLumaSamples()  ) &&
-           ( ( ( cuAreaY.y + cuAreaY.height ) & cu.cs->pcv->maxCUHeightMask ) == 0 || cuAreaY.y + cuAreaY.height == sps.getPicHeightInLumaSamples() ) );
+
+  return ( ( ( ( cuAreaY.x + cuAreaY.width  ) & cu.cs->pcv->maxCUWidthMask  ) == 0 || cuAreaY.x + cuAreaY.width  == cu.cs->pps->getPicWidthInLumaSamples()  ) &&
+           ( ( ( cuAreaY.y + cuAreaY.height ) & cu.cs->pcv->maxCUHeightMask ) == 0 || cuAreaY.y + cuAreaY.height == cu.cs->pps->getPicHeightInLumaSamples() ) );
 }
 
 uint32_t CU::getCtuAddr( const CodingUnit &cu )
@@ -185,7 +200,15 @@ int CU::predictQP( const CodingUnit& cu, const int prevQP )
 {
   const CodingStructure &cs = *cu.cs;
 
-  if ( !cu.blocks[cu.chType].x && !( cu.blocks[cu.chType].y & ( cs.pcv->maxCUHeightMask >> getChannelTypeScaleY( cu.chType, cu.chromaFormat ) ) ) && ( cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType) != NULL ) )
+  uint32_t  ctuRsAddr       = getCtuAddr( cu );
+  uint32_t  ctuXPosInCtus   = ctuRsAddr % cs.pcv->widthInCtus;
+  uint32_t  tileColIdx      = cu.slice->getPPS()->ctuToTileCol( ctuXPosInCtus );
+  uint32_t  tileXPosInCtus  = cu.slice->getPPS()->getTileColumnBd( tileColIdx );
+  if( ctuXPosInCtus == tileXPosInCtus &&
+      !( cu.blocks[cu.chType].x & ( cs.pcv->maxCUWidthMask  >> getChannelTypeScaleX( cu.chType, cu.chromaFormat ) ) ) &&
+      !( cu.blocks[cu.chType].y & ( cs.pcv->maxCUHeightMask >> getChannelTypeScaleY( cu.chType, cu.chromaFormat ) ) ) && 
+      ( cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType) != NULL ) && 
+      CU::isSameSliceAndTile( *cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType), cu ) )
   {
     return ( ( cs.getCU( cu.blocks[cu.chType].pos().offset( 0, -1 ), cu.chType ) )->qp );
   }
@@ -217,6 +240,19 @@ void CU::addPUs( CodingUnit& cu )
   cu.cs->addPU( CS::getArea( *cu.cs, cu, cu.chType ), cu.chType );
 }
 
+void CU::saveMotionInHMVP( const CodingUnit& cu, const bool isToBeDone )
+{
+  const PredictionUnit& pu = *cu.firstPU;
+
+  if (!cu.triangle && !cu.affine && !isToBeDone )
+  {
+    MotionInfo mi = pu.getMotionInfo();
+
+    mi.BcwIdx = (mi.interDir == 3) ? cu.BcwIdx : BCW_DEFAULT;
+
+    cu.cs->addMiToLut(CU::isIBC(cu) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, mi);
+  }
+}
 
 PartSplit CU::getSplitAtDepth( const CodingUnit& cu, const unsigned depth )
 {
@@ -235,31 +271,14 @@ PartSplit CU::getSplitAtDepth( const CodingUnit& cu, const unsigned depth )
   else   { THROW( "Unknown split mode"    ); return CU_QUAD_SPLIT; }
 }
 
-bool CU::hasNonTsCodedBlock( const CodingUnit& cu )
+ModeType CU::getModeTypeAtDepth( const CodingUnit& cu, const unsigned depth )
 {
-  bool hasAnyNonTSCoded = false;
-
-  for( auto &currTU : traverseTUs( cu ) )
-  {
-    for( uint32_t i = 0; i < ::getNumberValidTBlocks( *cu.cs->pcv ); i++ )
-    {
-      hasAnyNonTSCoded |= ( currTU.blocks[i].valid() && ( isLuma(ComponentID(i)) ? currTU.mtsIdx != 1 : true ) && TU::getCbf( currTU, ComponentID( i ) ) );
-    }
-  }
-
-  return hasAnyNonTSCoded;
+  ModeType modeType = ModeType( (cu.modeTypeSeries >> (depth * 3)) & 0x07 );
+  CHECK( depth > cu.depth, " depth is wrong" );
+  return modeType;
 }
 
-uint32_t CU::getNumNonZeroCoeffNonTs( const CodingUnit& cu )
-{
-  uint32_t count = 0;
-  for( auto &currTU : traverseTUs( cu ) )
-  {
-    count += TU::getNumNonZeroCoeffsNonTS( currTU );
-  }
 
-  return count;
-}
 
 bool CU::divideTuInRows( const CodingUnit &cu )
 {
@@ -267,75 +286,6 @@ bool CU::divideTuInRows( const CodingUnit &cu )
   return cu.ispMode == HOR_INTRA_SUBPARTITIONS ? true : false;
 }
 
-bool CU::firstTestISPHorSplit( const int width, const int height, const ComponentID compID, const CodingUnit *cuLeft, const CodingUnit *cuAbove )
-{
-  //this function decides which split mode (horizontal or vertical) is tested first (encoder only)
-  //we check the logarithmic aspect ratios of the block
-  int aspectRatio = g_aucLog2[width] - g_aucLog2[height];
-  if( aspectRatio > 0 )
-  {
-    return true;
-  }
-  else if( aspectRatio < 0 )
-  {
-    return false;
-  }
-  else //if (aspectRatio == 0)
-  {
-    //we gather data from the neighboring CUs
-    const int cuLeftWidth    = cuLeft  != nullptr                                    ? cuLeft->blocks[compID].width   : -1;
-    const int cuLeftHeight   = cuLeft  != nullptr                                    ? cuLeft->blocks[compID].height  : -1;
-    const int cuAboveWidth   = cuAbove != nullptr                                    ? cuAbove->blocks[compID].width  : -1;
-    const int cuAboveHeight  = cuAbove != nullptr                                    ? cuAbove->blocks[compID].height : -1;
-    const int cuLeft1dSplit  = cuLeft  != nullptr &&  cuLeft->predMode == MODE_INTRA ? cuLeft->ispMode                :  0;
-    const int cuAbove1dSplit = cuAbove != nullptr && cuAbove->predMode == MODE_INTRA ? cuAbove->ispMode               :  0;
-    if( cuLeftWidth != -1 && cuAboveWidth == -1 )
-    {
-      int cuLeftAspectRatio = g_aucLog2[cuLeftWidth] - g_aucLog2[cuLeftHeight];
-      return cuLeftAspectRatio < 0 ? false : cuLeftAspectRatio > 0 ? true : cuLeft1dSplit == VER_INTRA_SUBPARTITIONS ? false : true;
-    }
-    else if( cuLeftWidth == -1 && cuAboveWidth != -1 )
-    {
-      int cuAboveAspectRatio = g_aucLog2[cuAboveWidth] - g_aucLog2[cuAboveHeight];
-      return cuAboveAspectRatio < 0 ? false : cuAboveAspectRatio > 0 ? true : cuAbove1dSplit == VER_INTRA_SUBPARTITIONS ? false : true;
-    }
-    else if( cuLeftWidth != -1 && cuAboveWidth != -1 )
-    {
-      int cuLeftAspectRatio = g_aucLog2[cuLeftWidth] - g_aucLog2[cuLeftHeight];
-      int cuAboveAspectRatio = g_aucLog2[cuAboveWidth] - g_aucLog2[cuAboveHeight];
-      if( cuLeftAspectRatio < 0 && cuAboveAspectRatio < 0 )
-      {
-        return false;
-      }
-      else if( cuLeftAspectRatio > 0 && cuAboveAspectRatio > 0 )
-      {
-        return true;
-      }
-      else if( cuLeftAspectRatio == 0 && cuAboveAspectRatio == 0 )
-      {
-        if( cuLeft1dSplit != 0 && cuAbove1dSplit != 0 )
-        {
-          return cuLeft1dSplit == VER_INTRA_SUBPARTITIONS && cuAbove1dSplit == VER_INTRA_SUBPARTITIONS ? false : true;
-        }
-        else if( cuLeft1dSplit != 0 && cuAbove1dSplit == 0 )
-        {
-          return cuLeft1dSplit == VER_INTRA_SUBPARTITIONS ? false : true;
-        }
-        else if( cuLeft1dSplit == 0 && cuAbove1dSplit != 0 )
-        {
-          return cuAbove1dSplit == VER_INTRA_SUBPARTITIONS ? false : true;
-        }
-        return true;
-      }
-      else
-      {
-        return cuLeftAspectRatio > cuAboveAspectRatio ? cuLeftAspectRatio > 0 : cuAboveAspectRatio > 0;
-      }
-      //return true;
-    }
-    return true;
-  }
-}
 
 PartSplit CU::getISPType( const CodingUnit &cu, const ComponentID compID )
 {
@@ -370,43 +320,45 @@ bool CU::isISPFirst( const CodingUnit &cu, const CompArea &tuArea, const Compone
   return tuArea == cu.firstTU->blocks[compID];
 }
 
-ISPType CU::canUseISPSplit( const CodingUnit &cu, const ComponentID compID )
+bool CU::canUseISP( const CodingUnit &cu, const ComponentID compID )
 {
   const int width     = cu.blocks[compID].width;
   const int height    = cu.blocks[compID].height;
-#if MAX_TB_SIZE_SIGNALLING
   const int maxTrSize = cu.cs->sps->getMaxTbSize();
-#else
-  const int maxTrSize = MAX_TB_SIZEY;
-#endif
-  return CU::canUseISPSplit( width, height, maxTrSize );
+  return CU::canUseISP( width, height, maxTrSize );
 }
 
-ISPType CU::canUseISPSplit( const int width, const int height, const int maxTrSize )
+bool CU::canUseISP( const int width, const int height, const int maxTrSize )
 {
-  bool widthCannotBeUsed = false, heightCannotBeUsed = false;
-
-  const uint32_t minTuSizeForISP = MIN_TB_SIZEY;
-  bool  notEnoughSamplesToSplit = ( g_aucLog2[width] + g_aucLog2[height] <= ( g_aucLog2[minTuSizeForISP] << 1 ) );
-  widthCannotBeUsed  = width  > maxTrSize || notEnoughSamplesToSplit;
-  heightCannotBeUsed = height > maxTrSize || notEnoughSamplesToSplit;
-
-  if( !widthCannotBeUsed && !heightCannotBeUsed )
-  {
-    return CAN_USE_VER_AND_HORL_SPLITS; //both splits can be used
-  }
-  else if( widthCannotBeUsed && !heightCannotBeUsed )
+  bool  notEnoughSamplesToSplit = ( floorLog2(width) + floorLog2(height) <= ( floorLog2(MIN_TB_SIZEY) << 1 ) );
+  bool  cuSizeLargerThanMaxTrSize = width > maxTrSize || height > maxTrSize;
+  if ( notEnoughSamplesToSplit || cuSizeLargerThanMaxTrSize )
   {
-    return VER_INTRA_SUBPARTITIONS; //only the vertical split can be performed
+    return false;
   }
-  else if( !widthCannotBeUsed && heightCannotBeUsed )
+  return true;
+}
+
+bool CU::canUseLfnstWithISP( const CompArea& cuArea, const ISPType ispSplitType )
+{
+  if( ispSplitType == NOT_INTRA_SUBPARTITIONS )
   {
-    return HOR_INTRA_SUBPARTITIONS; //only the horizontal split can be performed
+    return false;
   }
-  else
+  Size tuSize = ( ispSplitType == HOR_INTRA_SUBPARTITIONS ) ? Size( cuArea.width, CU::getISPSplitDim( cuArea.width, cuArea.height, TU_1D_HORZ_SPLIT ) ) :
+    Size( CU::getISPSplitDim( cuArea.width, cuArea.height, TU_1D_VERT_SPLIT ), cuArea.height );
+
+  if( !( tuSize.width >= MIN_TB_SIZEY && tuSize.height >= MIN_TB_SIZEY ) )
   {
-    return NOT_INTRA_SUBPARTITIONS; //neither of the splits can be used
+    return false;
   }
+  return true;
+}
+
+bool CU::canUseLfnstWithISP( const CodingUnit& cu, const ChannelType chType )
+{
+  CHECK( !isLuma( chType ), "Wrong ISP mode!" );
+  return CU::canUseLfnstWithISP( cu.blocks[chType == CHANNEL_TYPE_LUMA ? 0 : 1], (ISPType)cu.ispMode );
 }
 
 uint32_t CU::getISPSplitDim( const int width, const int height, const PartSplit ispType )
@@ -425,14 +377,36 @@ uint32_t CU::getISPSplitDim( const int width, const int height, const PartSplit
     nonSplitDimensionSize = height;
   }
 
-  const int minNumberOfSamplesPerCu = 1 << ( ( g_aucLog2[MIN_TB_SIZEY] << 1 ) );
-  const int factorToMinSamples = nonSplitDimensionSize < minNumberOfSamplesPerCu ? minNumberOfSamplesPerCu >> g_aucLog2[nonSplitDimensionSize] : 1;
+  const int minNumberOfSamplesPerCu = 1 << ( ( floorLog2(MIN_TB_SIZEY) << 1 ) );
+  const int factorToMinSamples = nonSplitDimensionSize < minNumberOfSamplesPerCu ? minNumberOfSamplesPerCu >> floorLog2(nonSplitDimensionSize) : 1;
   partitionSize = ( splitDimensionSize >> divShift ) < factorToMinSamples ? factorToMinSamples : ( splitDimensionSize >> divShift );
 
-  CHECK( g_aucLog2[partitionSize] + g_aucLog2[nonSplitDimensionSize] < g_aucLog2[minNumberOfSamplesPerCu], "A partition has less than the minimum amount of samples!" );
+  CHECK( floorLog2(partitionSize) + floorLog2(nonSplitDimensionSize) < floorLog2(minNumberOfSamplesPerCu), "A partition has less than the minimum amount of samples!" );
   return partitionSize;
 }
 
+bool CU::allLumaCBFsAreZero(const CodingUnit& cu)
+{
+  if (!cu.ispMode)
+  {
+    return TU::getCbf(*cu.firstTU, COMPONENT_Y) == false;
+  }
+  else
+  {
+    int numTotalTUs = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth());
+    TransformUnit* tuPtr = cu.firstTU;
+    for (int tuIdx = 0; tuIdx < numTotalTUs; tuIdx++)
+    {
+      if (TU::getCbf(*tuPtr, COMPONENT_Y) == true)
+      {
+        return false;
+      }
+      tuPtr = tuPtr->next;
+    }
+    return true;
+  }
+}
+
 
 PUTraverser CU::traversePUs( CodingUnit& cu )
 {
@@ -459,10 +433,8 @@ cTUTraverser CU::traverseTUs( const CodingUnit& cu )
 int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType &channelType /*= CHANNEL_TYPE_LUMA*/ )
 {
   const int numMPMs = NUM_MOST_PROBABLE_MODES;
-  const int extendRefLine = (channelType == CHANNEL_TYPE_LUMA) ? pu.multiRefIdx : 0;
-  const ISPType ispType = isLuma( channelType ) ? ISPType( pu.cu->ispMode ) : NOT_INTRA_SUBPARTITIONS;
-  const bool isHorSplit = ispType == HOR_INTRA_SUBPARTITIONS;
   {
+    CHECK(channelType != CHANNEL_TYPE_LUMA, "Not harmonized yet");
     int numCand      = -1;
     int leftIntraDir = PLANAR_IDX, aboveIntraDir = PLANAR_IDX;
 
@@ -474,14 +446,14 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType
     const PredictionUnit *puLeft = pu.cs->getPURestricted(posLB.offset(-1, 0), pu, channelType);
     if (puLeft && CU::isIntra(*puLeft->cu))
     {
-      leftIntraDir = puLeft->intraDir[channelType];
+      leftIntraDir = PU::getIntraDirLuma( *puLeft );
     }
 
     // Get intra direction of above PU
     const PredictionUnit *puAbove = pu.cs->getPURestricted(posRT.offset(0, -1), pu, channelType);
     if (puAbove && CU::isIntra(*puAbove->cu) && CU::isSameCtu(*pu.cu, *puAbove->cu))
     {
-      aboveIntraDir = puAbove->intraDir[channelType];
+      aboveIntraDir = PU::getIntraDirLuma( *puAbove );
     }
 
     CHECK(2 >= numMPMs, "Invalid number of most probable modes");
@@ -489,185 +461,9 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType
     const int offset = (int)NUM_LUMA_MODE - 6;
     const int mod = offset + 3;
 
-    if (extendRefLine)
-    {
-      int modeIdx = 0;
-      int angularMode[2] = { 0, 0 };
-
-      if (leftIntraDir > DC_IDX)
-      {
-        angularMode[modeIdx++] = leftIntraDir;
-      }
-      if (aboveIntraDir > DC_IDX && aboveIntraDir != leftIntraDir)
-      {
-        angularMode[modeIdx++] = aboveIntraDir;
-      }
-      if (modeIdx == 0)
-      {
-        mpm[0] = VER_IDX;
-        mpm[1] = HOR_IDX;
-        mpm[2] = 2;
-        mpm[3] = DIA_IDX;
-        mpm[4] = VDIA_IDX;
-        mpm[5] = 26;
-      }
-      else if (modeIdx == 1)
-      {
-        mpm[0] = angularMode[0];
-        mpm[1] = ((angularMode[0] + offset) % mod) + 2;
-        mpm[2] = ((angularMode[0] - 1) % mod) + 2;
-        mpm[3] = ((angularMode[0] + offset - 1) % mod) + 2;
-        mpm[4] = (angularMode[0] % mod) + 2;
-        mpm[5] = ((angularMode[0] + offset - 2) % mod) + 2;
-      }
-      else
-      {
-        mpm[0] = angularMode[0];
-        mpm[1] = angularMode[1];
-        int maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1;
-        int minCandModeIdx = 1 - maxCandModeIdx;
-        if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 1)
-        {
-          mpm[2] = ((angularMode[minCandModeIdx] + offset) % mod) + 2;
-          mpm[3] = ((angularMode[maxCandModeIdx] - 1) % mod) + 2;
-          mpm[4] = ((angularMode[minCandModeIdx] + offset - 1) % mod) + 2;
-          mpm[5] = ( angularMode[maxCandModeIdx] % mod) + 2;
-        }
-        else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] >= 62)
-        {
-          mpm[2] = ((angularMode[minCandModeIdx] - 1) % mod) + 2;
-          mpm[3] = ((angularMode[maxCandModeIdx] + offset) % mod) + 2;
-          mpm[4] = ((angularMode[minCandModeIdx]) % mod) + 2;
-          mpm[5] = ((angularMode[maxCandModeIdx] + offset - 1) % mod) + 2;
-        }
-        else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 2)
-        {
-          mpm[2] = ((angularMode[minCandModeIdx] - 1) % mod) + 2;
-          mpm[3] = ((angularMode[minCandModeIdx] + offset) % mod) + 2;
-          mpm[4] = ((angularMode[maxCandModeIdx] - 1) % mod) + 2;
-          mpm[5] = ((angularMode[minCandModeIdx] + offset - 1) % mod) + 2;
-        }
-        else
-        {
-          mpm[2] = ((angularMode[minCandModeIdx] + offset) % mod) + 2;
-          mpm[3] = ((angularMode[minCandModeIdx] - 1) % mod) + 2;
-          mpm[4] = ((angularMode[maxCandModeIdx] + offset) % mod) + 2;
-          mpm[5] = ((angularMode[maxCandModeIdx] - 1) % mod) + 2;
-        }
-      }
-    }
-    else if( ispType != NOT_INTRA_SUBPARTITIONS )
     {
-      //default case
       mpm[0] = PLANAR_IDX;
-      if( isHorSplit )
-      {
-        mpm[1] = HOR_IDX;
-        mpm[2] = 25;
-        mpm[3] = 10;
-        mpm[4] = 65;
-        mpm[5] = VER_IDX;
-      }
-      else
-      {
-        mpm[1] = VER_IDX;
-        mpm[2] = 43;
-        mpm[3] = 60;
-        mpm[4] = 3;
-        mpm[5] = HOR_IDX;
-      }
-      int canonicalMode = mpm[1];
-      if( leftIntraDir == aboveIntraDir ) //L=A
-      {
-        numCand = 1;
-        if( leftIntraDir > DC_IDX )
-        {
-          mpm[0] =     leftIntraDir;
-          mpm[1] = ( ( leftIntraDir + offset ) % mod ) + 2;
-          mpm[2] = ( ( leftIntraDir - 1 ) % mod ) + 2;
-          if( ( isHorSplit && leftIntraDir < DIA_IDX ) || ( !isHorSplit && leftIntraDir >= DIA_IDX ) )
-          {
-            mpm[3] = ( ( leftIntraDir + offset - 1 ) % mod ) + 2;
-            mpm[4] =   ( leftIntraDir                % mod ) + 2;
-            mpm[5] = ( ( leftIntraDir + offset - 2 ) % mod ) + 2;;
-          }
-          else
-          {
-            if( isHorSplit )
-            {
-              mpm[3] = HOR_IDX;
-              mpm[4] = 5;
-            }
-            else
-            {
-              mpm[3] = VER_IDX;
-              mpm[4] = VDIA_IDX - 3;
-            }
-            mpm[5] = PLANAR_IDX;
-          }
-        }
-      }
-      else //L!=A
-      {
-        numCand = 2;
-        if( ( leftIntraDir > DC_IDX ) && ( aboveIntraDir > DC_IDX ) )
-        {
-          int distLeftToCanonicalMode  = abs( leftIntraDir - canonicalMode );
-          int distAboveToCanonicalMode = abs( aboveIntraDir - canonicalMode );
-          mpm[0] = aboveIntraDir;
-          mpm[1] = leftIntraDir;
-          if( distLeftToCanonicalMode <= distAboveToCanonicalMode )
-          {
-            mpm[0] = leftIntraDir;
-            mpm[1] = aboveIntraDir;
-          }
-          int maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1;
-          int minCandModeIdx = 1 - maxCandModeIdx;
-          if( mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 1 )
-          {
-            mpm[2] = ( ( mpm[minCandModeIdx] + offset )     % mod ) + 2;
-            mpm[3] = ( ( mpm[maxCandModeIdx] - 1 )          % mod ) + 2;
-            mpm[4] = ( ( mpm[minCandModeIdx] + offset - 1 ) % mod ) + 2;
-            mpm[5] =   ( mpm[maxCandModeIdx]                % mod ) + 2;
-          }
-          else if( mpm[maxCandModeIdx] - mpm[minCandModeIdx] >= 62 )
-          {
-            mpm[2] = ( ( mpm[minCandModeIdx] - 1 )          % mod ) + 2;
-            mpm[3] = ( ( mpm[maxCandModeIdx] + offset )     % mod ) + 2;
-            mpm[4] = ( ( mpm[minCandModeIdx] )              % mod ) + 2;
-            mpm[5] = ( ( mpm[maxCandModeIdx] + offset - 1 ) % mod ) + 2;
-          }
-          else if( mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 2 )
-          {
-            mpm[2] = ( ( mpm[minCandModeIdx] - 1 )          % mod ) + 2;
-            mpm[3] = ( ( mpm[minCandModeIdx] + offset )     % mod ) + 2;
-            mpm[4] = ( ( mpm[maxCandModeIdx] - 1 )          % mod ) + 2;
-            mpm[5] = ( ( mpm[minCandModeIdx] + offset - 1 ) % mod ) + 2;
-          }
-          else
-          {
-            mpm[2] = ( ( mpm[minCandModeIdx] + offset )     % mod ) + 2;
-            mpm[3] = ( ( mpm[minCandModeIdx] - 1 )          % mod ) + 2;
-            mpm[4] = ( ( mpm[maxCandModeIdx] + offset )     % mod ) + 2;
-            mpm[5] = ( ( mpm[maxCandModeIdx] - 1 )          % mod ) + 2;
-          }
-        }
-        else if( leftIntraDir + aboveIntraDir > 2 )
-        {
-          //mpm[0] = PLANAR_IDX;
-          int angMode = leftIntraDir > DC_IDX ? leftIntraDir : aboveIntraDir;
-          mpm[1] = angMode;
-          mpm[2] = ( ( angMode + offset )     % mod ) + 2;
-          mpm[3] = ( ( angMode - 1 )          % mod ) + 2;
-          mpm[4] = ( ( angMode + offset - 1 ) % mod ) + 2;
-          mpm[5] = ( ( angMode )              % mod ) + 2;
-        }
-      }
-    }
-    else
-    {
-      mpm[0] = leftIntraDir;
-      mpm[1] = (mpm[0] == PLANAR_IDX) ? DC_IDX : PLANAR_IDX;
+      mpm[1] = DC_IDX;
       mpm[2] = VER_IDX;
       mpm[3] = HOR_IDX;
       mpm[4] = VER_IDX - 4;
@@ -678,42 +474,60 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType
         numCand = 1;
         if (leftIntraDir > DC_IDX)
         {
-          mpm[0] = leftIntraDir;
-          mpm[1] = PLANAR_IDX;
-          mpm[2] = DC_IDX;
-          mpm[3] = ((leftIntraDir + offset) % mod) + 2;
-          mpm[4] = ((leftIntraDir - 1) % mod) + 2;
-          mpm[5] = ((leftIntraDir + offset - 1) % mod) + 2;
+          mpm[0] = PLANAR_IDX;
+          mpm[1] = leftIntraDir;
+          mpm[2] = ((leftIntraDir + offset) % mod) + 2;
+          mpm[3] = ((leftIntraDir - 1) % mod) + 2;
+          mpm[4] = ((leftIntraDir + offset - 1) % mod) + 2;
+          mpm[5] = ( leftIntraDir               % mod) + 2;
         }
       }
       else //L!=A
       {
         numCand = 2;
-        mpm[0] = leftIntraDir;
-        mpm[1] = aboveIntraDir;
-        bool maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1;
+        int  maxCandModeIdx = mpm[0] > mpm[1] ? 0 : 1;
 
         if ((leftIntraDir > DC_IDX) && (aboveIntraDir > DC_IDX))
         {
-          mpm[2] = PLANAR_IDX;
-          mpm[3] = DC_IDX;
-          if ((mpm[maxCandModeIdx] - mpm[!maxCandModeIdx] < 63) && (mpm[maxCandModeIdx] - mpm[!maxCandModeIdx] > 1))
+          mpm[0] = PLANAR_IDX;
+          mpm[1] = leftIntraDir;
+          mpm[2] = aboveIntraDir;
+          maxCandModeIdx = mpm[1] > mpm[2] ? 1 : 2;
+          int minCandModeIdx = mpm[1] > mpm[2] ? 2 : 1;
+          if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 1)
           {
+            mpm[3] = ((mpm[minCandModeIdx] + offset)     % mod) + 2;
+            mpm[4] = ((mpm[maxCandModeIdx] - 1)          % mod) + 2;
+            mpm[5] = ((mpm[minCandModeIdx] + offset - 1) % mod) + 2;
+          }
+          else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] >= 62)
+          {
+            mpm[3] = ((mpm[minCandModeIdx] - 1)      % mod) + 2;
             mpm[4] = ((mpm[maxCandModeIdx] + offset) % mod) + 2;
-            mpm[5] = ((mpm[maxCandModeIdx] - 1) % mod) + 2;
+            mpm[5] = ( mpm[minCandModeIdx]           % mod) + 2;
+          }
+          else if (mpm[maxCandModeIdx] - mpm[minCandModeIdx] == 2)
+          {
+            mpm[3] = ((mpm[minCandModeIdx] - 1)      % mod) + 2;
+            mpm[4] = ((mpm[minCandModeIdx] + offset) % mod) + 2;
+            mpm[5] = ((mpm[maxCandModeIdx] - 1)      % mod) + 2;
           }
           else
           {
-            mpm[4] = ((mpm[maxCandModeIdx] + offset - 1) % mod) + 2;
-            mpm[5] = ((mpm[maxCandModeIdx]) % mod) + 2;
+            mpm[3] = ((mpm[minCandModeIdx] + offset) % mod) + 2;
+            mpm[4] = ((mpm[minCandModeIdx] - 1)      % mod) + 2;
+            mpm[5] = ((mpm[maxCandModeIdx] + offset) % mod) + 2;
           }
         }
         else if (leftIntraDir + aboveIntraDir >= 2)
         {
-          mpm[2] = (mpm[!maxCandModeIdx] == PLANAR_IDX) ? DC_IDX : PLANAR_IDX;
-          mpm[3] = ((mpm[maxCandModeIdx] + offset) % mod) + 2;
-          mpm[4] = ((mpm[maxCandModeIdx] - 1) % mod) + 2;
-          mpm[5] = ((mpm[maxCandModeIdx] + offset - 1) % mod) + 2;
+          mpm[0] = PLANAR_IDX;
+          mpm[1] = (leftIntraDir < aboveIntraDir) ? aboveIntraDir : leftIntraDir;
+          maxCandModeIdx = 1;
+          mpm[2] = ((mpm[maxCandModeIdx] + offset)     % mod) + 2;
+          mpm[3] = ((mpm[maxCandModeIdx] - 1)          % mod) + 2;
+          mpm[4] = ((mpm[maxCandModeIdx] + offset - 1) % mod) + 2;
+          mpm[5] = ( mpm[maxCandModeIdx]               % mod) + 2;
         }
       }
     }
@@ -726,6 +540,24 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType
   }
 }
 
+bool PU::isMIP(const PredictionUnit &pu, const ChannelType &chType)
+{
+  return (chType == CHANNEL_TYPE_LUMA && pu.cu->mipFlag);
+}
+
+
+uint32_t PU::getIntraDirLuma( const PredictionUnit &pu )
+{
+  if (isMIP(pu))
+  {
+    return PLANAR_IDX;
+  }
+  else
+  {
+    return pu.intraDir[CHANNEL_TYPE_LUMA];
+  }
+}
+
 
 void PU::getIntraChromaCandModes( const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE] )
 {
@@ -739,10 +571,7 @@ void PU::getIntraChromaCandModes( const PredictionUnit &pu, unsigned modeList[NU
     modeList[6] = MDLM_T_IDX;
     modeList[7] = DM_CHROMA_IDX;
 
-    Position topLeftPos = pu.blocks[pu.chType].lumaPos();
-    Position refPos = topLeftPos.offset( pu.blocks[pu.chType].lumaSize().width >> 1, pu.blocks[pu.chType].lumaSize().height >> 1 );
-    const PredictionUnit *lumaPU = CS::isDualITree( *pu.cs ) ? pu.cs->picture->cs->getPU( refPos, CHANNEL_TYPE_LUMA ) : &pu;
-    const uint32_t lumaMode = lumaPU->intraDir[CHANNEL_TYPE_LUMA];
+    const uint32_t lumaMode = getCoLocatedIntraLumaMode(pu);
     for( int i = 0; i < 4; i++ )
     {
       if( lumaMode == modeList[i] )
@@ -754,180 +583,33 @@ void PU::getIntraChromaCandModes( const PredictionUnit &pu, unsigned modeList[NU
   }
 }
 
-
 bool PU::isLMCMode(unsigned mode)
 {
   return (mode >= LM_CHROMA_IDX && mode <= MDLM_T_IDX);
 }
+
 bool PU::isLMCModeEnabled(const PredictionUnit &pu, unsigned mode)
 {
-  if ( pu.cs->sps->getUseLMChroma() )
+  if ( pu.cs->sps->getUseLMChroma() && pu.cu->checkCCLMAllowed() )
   {
     return true;
   }
   return false;
 }
 
-int PU::getLMSymbolList(const PredictionUnit &pu, int *pModeList)
+int PU::getLMSymbolList(const PredictionUnit &pu, int *modeList)
 {
-  int iIdx = 0;
+  int idx = 0;
 
-  pModeList[ iIdx++ ] = LM_CHROMA_IDX;
-    pModeList[ iIdx++ ] = -1;
-  pModeList[iIdx++] = MDLM_L_IDX;
-  pModeList[iIdx++] = MDLM_T_IDX;
-  return iIdx;
+  modeList[idx++] = LM_CHROMA_IDX;
+  modeList[idx++] = MDLM_L_IDX;
+  modeList[idx++] = MDLM_T_IDX;
+  return idx;
 }
 
-
-
 bool PU::isChromaIntraModeCrossCheckMode( const PredictionUnit &pu )
 {
-  return pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX;
-}
-
-int PU::getMHIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType &channelType /*= CHANNEL_TYPE_LUMA*/, const bool isChromaMDMS /*= false*/, const unsigned startIdx /*= 0*/)
-{
-  const int numMPMs = 3; // Multi-hypothesis intra uses only 3 MPM
-  {
-    int numCand = -1;
-    uint32_t leftIntraDir = DC_IDX, aboveIntraDir = DC_IDX;
-
-    const CompArea& area = pu.block(getFirstComponentOfChannel(channelType));
-    const Position& pos = area.pos();
-
-    // Get intra direction of left PU
-    const PredictionUnit *puLeft = pu.cs->getPURestricted(pos.offset(-1, 0), pu, channelType);
-
-    if (puLeft && (CU::isIntra(*puLeft->cu) || puLeft->mhIntraFlag))
-    {
-      leftIntraDir = puLeft->intraDir[channelType];
-
-      if (isChroma(channelType) && leftIntraDir == DM_CHROMA_IDX)
-      {
-        leftIntraDir = puLeft->intraDir[0];
-      }
-    }
-
-    // Get intra direction of above PU
-    const PredictionUnit* puAbove = pu.cs->getPURestricted(pos.offset(0, -1), pu, channelType);
-
-    if (puAbove && (CU::isIntra(*puAbove->cu) || puAbove->mhIntraFlag) && CU::isSameCtu(*pu.cu, *puAbove->cu))
-    {
-      aboveIntraDir = puAbove->intraDir[channelType];
-
-      if (isChroma(channelType) && aboveIntraDir == DM_CHROMA_IDX)
-      {
-        aboveIntraDir = puAbove->intraDir[0];
-      }
-    }
-
-    CHECK(2 >= numMPMs, "Invalid number of most probable modes");
-
-    uint32_t leftIntraDir2 = leftIntraDir;
-    uint32_t aboveIntraDir2 = aboveIntraDir;
-
-    leftIntraDir2 = (leftIntraDir2 > DC_IDX) ? ((leftIntraDir2 <= DIA_IDX) ? HOR_IDX : VER_IDX) : leftIntraDir2;
-    aboveIntraDir2 = (aboveIntraDir2 > DC_IDX) ? ((aboveIntraDir2 <= DIA_IDX) ? HOR_IDX : VER_IDX) : aboveIntraDir2;
-
-    if (leftIntraDir2 == aboveIntraDir2)
-    {
-      numCand = 1;
-
-      if (leftIntraDir2 > DC_IDX) // angular modes
-      {
-        mpm[0] = leftIntraDir2;
-        mpm[1] = PLANAR_IDX;
-        mpm[2] = DC_IDX;
-      }
-      else //non-angular
-      {
-        mpm[0] = PLANAR_IDX;
-        mpm[1] = DC_IDX;
-        mpm[2] = VER_IDX;
-      }
-    }
-    else
-    {
-      numCand = 2;
-
-      mpm[0] = leftIntraDir2;
-      mpm[1] = aboveIntraDir2;
-
-      if (leftIntraDir2 && aboveIntraDir2) //both modes are non-planar
-      {
-        mpm[2] = PLANAR_IDX;
-      }
-      else
-      {
-        mpm[2] = (leftIntraDir2 + aboveIntraDir2) < 2 ? VER_IDX : DC_IDX;
-      }
-    }
-    int narrowCase = getNarrowShape(pu.lwidth(), pu.lheight());
-    if (narrowCase > 0)
-    {
-      bool isMPM[NUM_LUMA_MODE];
-      for (int idx = 0; idx < NUM_LUMA_MODE; idx++)
-      {
-        isMPM[idx] = false;
-      }
-      for (int idx = 0; idx < numMPMs; idx++)
-      {
-        isMPM[mpm[idx]] = true;
-      }
-      if (narrowCase == 1 && isMPM[HOR_IDX])
-      {
-        for (int idx = 0; idx < numMPMs; idx++)
-        {
-          if (mpm[idx] == HOR_IDX)
-          {
-            if (!isMPM[PLANAR_IDX])
-              mpm[idx] = PLANAR_IDX;
-            else if (!isMPM[DC_IDX])
-              mpm[idx] = DC_IDX;
-            else if (!isMPM[VER_IDX])
-              mpm[idx] = VER_IDX;
-            break;
-          }
-        }
-      }
-      if (narrowCase == 2 && isMPM[VER_IDX])
-      {
-        for (int idx = 0; idx < numMPMs; idx++)
-        {
-          if (mpm[idx] == VER_IDX)
-          {
-            if (!isMPM[PLANAR_IDX])
-              mpm[idx] = PLANAR_IDX;
-            else if (!isMPM[DC_IDX])
-              mpm[idx] = DC_IDX;
-            else if (!isMPM[HOR_IDX])
-              mpm[idx] = HOR_IDX;
-            break;
-          }
-        }
-      }
-    }
-    CHECK(numCand == 0, "No candidates found");
-    CHECK(mpm[0] == mpm[1] || mpm[0] == mpm[2] || mpm[2] == mpm[1], "redundant MPM");
-    return numCand;
-  }
-}
-int PU::getNarrowShape(const int width, const int height)
-{
-  int longSide = (width > height) ? width : height;
-  int shortSide = (width > height) ? height : width;
-  if (longSide > (2 * shortSide))
-  {
-    if (longSide == width)
-      return 1;
-    else
-      return 2;
-  }
-  else
-  {
-    return 0;
-  }
+  return !pu.cu->bdpcmModeChroma && pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX;
 }
 
 uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chType )
@@ -936,111 +618,100 @@ uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chT
 
   if( uiIntraMode == DM_CHROMA_IDX && !isLuma( chType ) )
   {
-    Position topLeftPos = pu.blocks[pu.chType].lumaPos();
-    Position refPos = topLeftPos.offset( pu.blocks[pu.chType].lumaSize().width >> 1, pu.blocks[pu.chType].lumaSize().height >> 1 );
-    const PredictionUnit &lumaPU = CS::isDualITree( *pu.cs ) ? *pu.cs->picture->cs->getPU( refPos, CHANNEL_TYPE_LUMA ) : *pu.cs->getPU( topLeftPos, CHANNEL_TYPE_LUMA );
-
-    uiIntraMode = lumaPU.intraDir[0];
+    uiIntraMode = getCoLocatedIntraLumaMode(pu);
   }
-  if( pu.chromaFormat == CHROMA_422 && !isLuma( chType ) )
+  if( pu.chromaFormat == CHROMA_422 && !isLuma( chType ) && uiIntraMode < NUM_LUMA_MODE ) // map directional, planar and dc
   {
     uiIntraMode = g_chroma422IntraAngleMappingTable[uiIntraMode];
   }
   return uiIntraMode;
 }
 
-bool PU::xCheckSimilarMotion(const int mergeCandIndex, const int prevCnt, const MergeCtx mergeCandList, bool hasPruned[MRG_MAX_NUM_CANDS])
+uint32_t PU::getCoLocatedIntraLumaMode( const PredictionUnit &pu )
+{
+  Position topLeftPos = pu.blocks[pu.chType].lumaPos();
+  Position refPos = topLeftPos.offset( pu.blocks[pu.chType].lumaSize().width >> 1, pu.blocks[pu.chType].lumaSize().height >> 1 );
+  const PredictionUnit &lumaPU = pu.cu->isSepTree() ? *pu.cs->picture->cs->getPU( refPos, CHANNEL_TYPE_LUMA ) : *pu.cs->getPU( topLeftPos, CHANNEL_TYPE_LUMA );
+
+  return PU::getIntraDirLuma( lumaPU );
+}
+
+int PU::getWideAngIntraMode( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID )
 {
-  for (uint32_t ui = 0; ui < prevCnt; ui++)
+  if( dirMode < 2 )
   {
-    if (hasPruned[ui])
-    {
-      continue;
-    }
-    if (mergeCandList.interDirNeighbours[ui] == mergeCandList.interDirNeighbours[mergeCandIndex])
-    {
-      if (mergeCandList.interDirNeighbours[ui] == 3)
-      {
-        int offset0 = (ui * 2);
-        int offset1 = (mergeCandIndex * 2);
-        if (mergeCandList.mvFieldNeighbours[offset0].refIdx == mergeCandList.mvFieldNeighbours[offset1].refIdx &&
-            mergeCandList.mvFieldNeighbours[offset0 + 1].refIdx == mergeCandList.mvFieldNeighbours[offset1 + 1].refIdx &&
-            mergeCandList.mvFieldNeighbours[offset0].mv == mergeCandList.mvFieldNeighbours[offset1].mv &&
-            mergeCandList.mvFieldNeighbours[offset0 + 1].mv == mergeCandList.mvFieldNeighbours[offset1 + 1].mv
-          )
-        {
-          hasPruned[ui] = true;
-          return true;
-        }
-      }
-      else
-      {
-        int offset0 = (ui * 2) + mergeCandList.interDirNeighbours[ui] - 1;
-        int offset1 = (mergeCandIndex * 2) + mergeCandList.interDirNeighbours[ui] - 1;
-        if (mergeCandList.mvFieldNeighbours[offset0].refIdx == mergeCandList.mvFieldNeighbours[offset1].refIdx &&
-            mergeCandList.mvFieldNeighbours[offset0].mv == mergeCandList.mvFieldNeighbours[offset1].mv
-          )
-        {
-          hasPruned[ui] = true;
-          return true;
-        }
-      }
-    }
+    return ( int ) dirMode;
   }
 
-  return false;
-}
+  CodingStructure& cs           = *tu.cs;
+  const CompArea&  area         = tu.blocks[ compID ];
+  PelBuf           pred         = cs.getPredBuf( area );
+  int              width        = int( pred.width );
+  int              height       = int( pred.height );
+  int              modeShift[ ] = { 0, 6, 10, 12, 14, 15 };
+  int              deltaSize    = abs( floorLog2( width ) - floorLog2( height ) );
+  int              predMode     = dirMode;
 
-#if JVET_L0090_PAIR_AVG
+  if( width > height && dirMode < 2 + modeShift[ deltaSize ] )
+  {
+    predMode += ( VDIA_IDX - 1 );
+  }
+  else if( height > width && predMode > VDIA_IDX - modeShift[ deltaSize ] )
+  {
+    predMode -= ( VDIA_IDX + 1 );
+  }
 
-bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos
-  , bool ibcFlag
-  , bool isShared
-)
-#else
+  return predMode;
+}
 
-bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool isCandInter[MRG_MAX_NUM_CANDS], bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos
-  , int mmvdList
-)
-#endif
+
+bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt
+  , const bool isAvailableA1, const MotionInfo miLeft, const bool isAvailableB1, const MotionInfo miAbove
+  , const bool ibcFlag
+  , const bool isGt4x4
+  )
 {
   const Slice& slice = *cs.slice;
   MotionInfo miNeighbor;
-  bool hasPruned[MRG_MAX_NUM_CANDS];
-  memset(hasPruned, 0, MRG_MAX_NUM_CANDS * sizeof(bool));
-  if (isAvailableSubPu)
-  {
-    hasPruned[subPuMvpPos] = true;
-  }
-  auto &lut = ibcFlag ? ( isShared ? cs.motionLut.lutShareIbc : cs.motionLut.lutIbc ) : ( isShared ? cs.motionLut.lutShare : cs.motionLut.lut );
-  int num_avai_candInLUT = (int) lut.size();
+
+  auto &lut = ibcFlag ? cs.motionLut.lutIbc : cs.motionLut.lut;
+  int num_avai_candInLUT = (int)lut.size();
 
   for (int mrgIdx = 1; mrgIdx <= num_avai_candInLUT; mrgIdx++)
   {
     miNeighbor = lut[num_avai_candInLUT - mrgIdx];
-    mrgCtx.interDirNeighbours[cnt] = miNeighbor.interDir;
-    mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miNeighbor.mv[0], miNeighbor.refIdx[0]);
-    if (slice.isInterB())
-    {
-      mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miNeighbor.mv[1], miNeighbor.refIdx[1]);
-    }
-    if (mrgIdx > 2 || !xCheckSimilarMotion(cnt, prevCnt, mrgCtx, hasPruned))
+
+    if ( mrgIdx > 2 || ((mrgIdx > 1 || !isGt4x4) && ibcFlag)
+      || ((!isAvailableA1 || (miLeft != miNeighbor)) && (!isAvailableB1 || (miAbove != miNeighbor))) )
     {
-#if !JVET_L0090_PAIR_AVG
-      isCandInter[cnt] = true;
-#endif
-      mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? miNeighbor.GBiIdx : GBI_DEFAULT;
-      if (mrgCandIdx == cnt && canFastExit)
+      mrgCtx.interDirNeighbours[cnt] = miNeighbor.interDir;
+      mrgCtx.useAltHpelIf      [cnt] = !ibcFlag && miNeighbor.useAltHpelIf;
+      mrgCtx.BcwIdx            [cnt] = (miNeighbor.interDir == 3) ? miNeighbor.BcwIdx : BCW_DEFAULT;
+
+      mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miNeighbor.mv[0], miNeighbor.refIdx[0]);
+      if (slice.isInterB())
+      {
+        mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miNeighbor.mv[1], miNeighbor.refIdx[1]);
+      }
+
+      if (mrgCandIdx == cnt)
       {
         return true;
       }
       cnt ++;
+
       if (cnt  == maxNumMergeCandMin1)
       {
         break;
       }
     }
   }
+
+  if (cnt < maxNumMergeCandMin1)
+  {
+    mrgCtx.useAltHpelIf[cnt] = false;
+  }
+
   return false;
 }
 
@@ -1048,16 +719,16 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
 {
   const CodingStructure &cs = *pu.cs;
   const Slice &slice = *pu.cs->slice;
-  const uint32_t maxNumMergeCand = slice.getMaxNumMergeCand();
-  const bool canFastExit = pu.cs->pps->getLog2ParallelMergeLevelMinus2() == 0;
+  const uint32_t maxNumMergeCand = slice.getPicHeader()->getMaxNumIBCMergeCand();
 
   for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui)
   {
-    mrgCtx.GBiIdx[ui] = GBI_DEFAULT;
+    mrgCtx.BcwIdx[ui] = BCW_DEFAULT;
     mrgCtx.interDirNeighbours[ui] = 0;
     mrgCtx.mrgTypeNeighbours[ui] = MRG_TYPE_IBC;
     mrgCtx.mvFieldNeighbours[ui * 2].refIdx = NOT_VALID;
     mrgCtx.mvFieldNeighbours[ui * 2 + 1].refIdx = NOT_VALID;
+    mrgCtx.useAltHpelIf[ui] = false;
   }
 
   mrgCtx.numValidMergeCand = maxNumMergeCand;
@@ -1065,16 +736,16 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
 
   int cnt = 0;
 
-  const Position posLT = pu.shareParentPos;
-  const Position posRT = pu.shareParentPos.offset(pu.shareParentSize.width - 1, 0);
-  const Position posLB = pu.shareParentPos.offset(0, pu.shareParentSize.height - 1);
+  const Position posRT = pu.Y().topRight();
+  const Position posLB = pu.Y().bottomLeft();
 
   MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft;
 
   //left
   const PredictionUnit* puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType);
+  bool isGt4x4 = pu.lwidth() * pu.lheight() > 16;
   const bool isAvailableA1 = puLeft && isDiffMER(pu, *puLeft) && pu.cu != puLeft->cu && CU::isIBC(*puLeft->cu);
-  if (isAvailableA1)
+  if (isGt4x4 && isAvailableA1)
   {
     miLeft = puLeft->getMotionInfo(posLB.offset(-1, 0));
 
@@ -1082,7 +753,7 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
     mrgCtx.interDirNeighbours[cnt] = miLeft.interDir;
     // get Mv from Left
     mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]);
-    if (mrgCandIdx == cnt && canFastExit)
+    if (mrgCandIdx == cnt)
     {
       return;
     }
@@ -1095,11 +766,10 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
     return;
   }
 
-
   // above
   const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType);
   bool isAvailableB1 = puAbove && isDiffMER(pu, *puAbove) && pu.cu != puAbove->cu && CU::isIBC(*puAbove->cu);
-  if (isAvailableB1)
+  if (isGt4x4 && isAvailableB1)
   {
     miAbove = puAbove->getMotionInfo(posRT.offset(0, -1));
 
@@ -1109,72 +779,7 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
       mrgCtx.interDirNeighbours[cnt] = miAbove.interDir;
       // get Mv from Above
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]);
-      if (mrgCandIdx == cnt && canFastExit)
-      {
-        return;
-      }
-
-      cnt++;
-    }
-  }
-
-  // early termination
-  if (cnt == maxNumMergeCand)
-  {
-    return;
-  }
-
-  int spatialCandPos = cnt;
-
-  // above right
-  const PredictionUnit *puAboveRight = cs.getPURestricted(posRT.offset(1, -1), pu, pu.chType);
-  bool isAvailableB0 = puAboveRight && isDiffMER(pu, *puAboveRight) && CU::isIBC(*puAboveRight->cu);
-  if (isAvailableB0)
-  {
-    miAboveRight = puAboveRight->getMotionInfo(posRT.offset(1, -1));
-
-#if HM_JEM_MERGE_CANDS
-    if ((!isAvailableB1 || (miAbove != miAboveRight)) && (!isAvailableA1 || (miLeft != miAboveRight)))
-#else
-    if (!isAvailableB1 || (miAbove != miAboveRight))
-#endif
-    {
-      // get Inter Dir
-      mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir;
-      // get Mv from Above-right
-      mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAboveRight.mv[0], miAboveRight.refIdx[0]);
-
-      if (mrgCandIdx == cnt && canFastExit)
-      {
-        return;
-      }
-
-      cnt++;
-    }
-  }
-  // early termination
-  if (cnt == maxNumMergeCand)
-  {
-    return;
-  }
-
-  //left bottom
-  const PredictionUnit *puLeftBottom = cs.getPURestricted(posLB.offset(-1, 1), pu, pu.chType);
-  bool isAvailableA0 = puLeftBottom && isDiffMER(pu, *puLeftBottom) && CU::isIBC(*puLeftBottom->cu);
-  if (isAvailableA0)
-  {
-    miBelowLeft = puLeftBottom->getMotionInfo(posLB.offset(-1, 1));
-
-#if HM_JEM_MERGE_CANDS
-    if ((!isAvailableA1 || (miBelowLeft != miLeft)) && (!isAvailableB1 || (miBelowLeft != miAbove)) && (!isAvailableB0 || (miBelowLeft != miAboveRight)))
-#else
-    if (!isAvailableA1 || (miBelowLeft != miLeft))
-#endif
-    {
-      // get Inter Dir
-      mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir;
-      mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miBelowLeft.mv[0], miBelowLeft.refIdx[0]);
-      if (mrgCandIdx == cnt && canFastExit)
+      if (mrgCandIdx == cnt)
       {
         return;
       }
@@ -1182,104 +787,39 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
       cnt++;
     }
   }
-  // early termination
-  if (cnt == maxNumMergeCand)
-  {
-    return;
-  }
-
-  // above left
-  if (cnt < 4)
-  {
-    const PredictionUnit *puAboveLeft = cs.getPURestricted(posLT.offset(-1, -1), pu, pu.chType);
-    bool isAvailableB2 = puAboveLeft && isDiffMER(pu, *puAboveLeft) && CU::isIBC(*puAboveLeft->cu);
-    if (isAvailableB2)
-    {
-      miAboveLeft = puAboveLeft->getMotionInfo(posLT.offset(-1, -1));
-
-#if HM_JEM_MERGE_CANDS
-      if ((!isAvailableA1 || (miLeft != miAboveLeft)) && (!isAvailableB1 || (miAbove != miAboveLeft)) && (!isAvailableA0 || (miBelowLeft != miAboveLeft)) && (!isAvailableB0 || (miAboveRight != miAboveLeft)))
-#else
-      if ((!isAvailableA1 || (miLeft != miAboveLeft)) && (!isAvailableB1 || (miAbove != miAboveLeft)))
-#endif
-      {
-        // get Inter Dir
-        mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir;
-        mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAboveLeft.mv[0], miAboveLeft.refIdx[0]);
-        if (mrgCandIdx == cnt && canFastExit)
-        {
-          return;
-        }
 
-        cnt++;
-      }
-    }
-  }
   // early termination
   if (cnt == maxNumMergeCand)
   {
     return;
   }
 
-  int maxNumMergeCandMin1 = maxNumMergeCand - 1;
-  if (cnt != maxNumMergeCandMin1)
+  if (cnt != maxNumMergeCand)
   {
-    bool isAvailableSubPu = false;
-    unsigned subPuMvpPos = 0;
-
-    bool  isShared = ((pu.Y().lumaSize().width != pu.shareParentSize.width) || (pu.Y().lumaSize().height != pu.shareParentSize.height));
-
-#if JVET_L0090_PAIR_AVG
-    bool bFound = addMergeHMVPCand(cs, mrgCtx, canFastExit
-      , mrgCandIdx
-      , maxNumMergeCandMin1, cnt
-      , spatialCandPos
-      , isAvailableSubPu, subPuMvpPos
+    bool bFound = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCand, cnt
+      , isAvailableA1, miLeft, isAvailableB1, miAbove
       , true
-      , isShared
-    );
-#else
-    bool bFound = addMergeHMVPCand(slice, mrgCtx, isCandInter, canFastExit
-      , mrgCandIdx
-      , maxNumMergeCandMin1, cnt, cnt, isAvailableSubPu, subPuMvpPos
-    );
-#endif
+      , isGt4x4
+      );
+
     if (bFound)
     {
       return;
     }
   }
 
-#if JVET_L0090_PAIR_AVG
-  // pairwise-average candidates
-    if (cnt>1 && cnt <maxNumMergeCand)
+    while (cnt < maxNumMergeCand)
     {
-       mrgCtx.mvFieldNeighbours[cnt * 2    ].setMvField(Mv(0, 0), NOT_VALID);
-       mrgCtx.mvFieldNeighbours[cnt * 2 + 1].setMvField(Mv(0, 0), NOT_VALID);
-
-       const Mv& MvI = mrgCtx.mvFieldNeighbours[0 * 2].mv;
-       const Mv& MvJ = mrgCtx.mvFieldNeighbours[1 * 2].mv;
-       // average two MVs
-       Mv avgMv = MvI;
-
-       avgMv += MvJ;
-       mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_IBC;
-       roundAffineMv(avgMv.hor, avgMv.ver, 1);
-       avgMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
-      mrgCtx.mvFieldNeighbours[cnt * 2 ].setMvField(avgMv, MAX_NUM_REF);
+      mrgCtx.mvFieldNeighbours[cnt * 2].setMvField(Mv(0, 0), MAX_NUM_REF);
       mrgCtx.interDirNeighbours[cnt] = 1;
+      if (mrgCandIdx == cnt)
+      {
+        return;
+      }
       cnt++;
     }
 
-    // early termination
-    if (cnt == maxNumMergeCand)
-    {
-      return;
-    }
-#endif
-
   mrgCtx.numValidMergeCand = cnt;
-
 }
 
 void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
@@ -1288,24 +828,16 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
 {
   const CodingStructure &cs  = *pu.cs;
   const Slice &slice         = *pu.cs->slice;
-  const uint32_t maxNumMergeCand = slice.getMaxNumMergeCand();
-  const bool canFastExit     = pu.cs->pps->getLog2ParallelMergeLevelMinus2() == 0;
-
-#if !JVET_L0090_PAIR_AVG
-  // this variable is unused if remove HEVC combined candidates
-  bool isCandInter[MRG_MAX_NUM_CANDS];
-#endif
+  const uint32_t maxNumMergeCand = slice.getPicHeader()->getMaxNumMergeCand();
 
   for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui)
   {
-#if !JVET_L0090_PAIR_AVG
-    isCandInter[ui] = false;
-#endif
-    mrgCtx.GBiIdx[ui] = GBI_DEFAULT;
+    mrgCtx.BcwIdx[ui] = BCW_DEFAULT;
     mrgCtx.interDirNeighbours[ui] = 0;
     mrgCtx.mrgTypeNeighbours [ui] = MRG_TYPE_DEFAULT_N;
     mrgCtx.mvFieldNeighbours[(ui << 1)    ].refIdx = NOT_VALID;
     mrgCtx.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID;
+    mrgCtx.useAltHpelIf[ui] = false;
   }
 
   mrgCtx.numValidMergeCand = maxNumMergeCand;
@@ -1313,36 +845,32 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
 
   int cnt = 0;
 
-
-  const Position posLT = pu.shareParentPos;
-  const Position posRT = pu.shareParentPos.offset(pu.shareParentSize.width - 1, 0);
-  const Position posLB = pu.shareParentPos.offset(0, pu.shareParentSize.height - 1);
+  const Position posLT = pu.Y().topLeft();
+  const Position posRT = pu.Y().topRight();
+  const Position posLB = pu.Y().bottomLeft();
   MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft;
 
-  //left
-  const PredictionUnit* puLeft = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType );
+  // above
+  const PredictionUnit *puAbove = cs.getPURestricted(posRT.offset(0, -1), pu, pu.chType);
 
-  const bool isAvailableA1 = puLeft && isDiffMER( pu, *puLeft ) && pu.cu != puLeft->cu && CU::isInter( *puLeft->cu );
+  bool isAvailableB1 = puAbove && isDiffMER(pu, *puAbove) && pu.cu != puAbove->cu && CU::isInter(*puAbove->cu);
 
-  if( isAvailableA1 )
+  if (isAvailableB1)
   {
-    miLeft = puLeft->getMotionInfo( posLB.offset(-1, 0) );
-
-#if !JVET_L0090_PAIR_AVG
-    isCandInter[cnt] = true;
-#endif
+    miAbove = puAbove->getMotionInfo(posRT.offset(0, -1));
 
     // get Inter Dir
-    mrgCtx.interDirNeighbours[cnt] = miLeft.interDir;
-    mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->GBiIdx : GBI_DEFAULT;
-    // get Mv from Left
-    mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]);
+    mrgCtx.interDirNeighbours[cnt] = miAbove.interDir;
+    mrgCtx.useAltHpelIf[cnt] = miAbove.useAltHpelIf;
+    // get Mv from Above
+    mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->BcwIdx : BCW_DEFAULT;
+    mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]);
 
     if (slice.isInterB())
     {
-      mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]);
+      mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miAbove.mv[1], miAbove.refIdx[1]);
     }
-    if (mrgCandIdx == cnt && canFastExit)
+    if (mrgCandIdx == cnt)
     {
       return;
     }
@@ -1356,33 +884,29 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     return;
   }
 
+  //left
+  const PredictionUnit* puLeft = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType);
 
-  // above
-  const PredictionUnit *puAbove = cs.getPURestricted( posRT.offset( 0, -1 ), pu, pu.chType );
-
-  bool isAvailableB1 = puAbove && isDiffMER( pu, *puAbove ) && pu.cu != puAbove->cu && CU::isInter( *puAbove->cu );
+  const bool isAvailableA1 = puLeft && isDiffMER(pu, *puLeft) && pu.cu != puLeft->cu && CU::isInter(*puLeft->cu);
 
-  if( isAvailableB1 )
+  if (isAvailableA1)
   {
-    miAbove = puAbove->getMotionInfo( posRT.offset( 0, -1 ) );
+    miLeft = puLeft->getMotionInfo(posLB.offset(-1, 0));
 
-    if( !isAvailableA1 || ( miAbove != miLeft ) )
+    if (!isAvailableB1 || (miAbove != miLeft))
     {
-#if !JVET_L0090_PAIR_AVG
-      isCandInter[cnt] = true;
-#endif
-
       // get Inter Dir
-      mrgCtx.interDirNeighbours[cnt] = miAbove.interDir;
-      // get Mv from Above
-      mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->GBiIdx : GBI_DEFAULT;
-      mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] );
+      mrgCtx.interDirNeighbours[cnt] = miLeft.interDir;
+      mrgCtx.useAltHpelIf[cnt] = miLeft.useAltHpelIf;
+      mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->BcwIdx : BCW_DEFAULT;
+      // get Mv from Left
+      mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]);
 
-      if( slice.isInterB() )
+      if (slice.isInterB())
       {
-        mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAbove.mv[1], miAbove.refIdx[1] );
+        mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]);
       }
-      if (mrgCandIdx == cnt && canFastExit)
+      if (mrgCandIdx == cnt)
       {
         return;
       }
@@ -1397,8 +921,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     return;
   }
 
-  int spatialCandPos = cnt;
-
   // above right
   const PredictionUnit *puAboveRight = cs.getPURestricted( posRT.offset( 1, -1 ), pu, pu.chType );
 
@@ -1408,20 +930,14 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   {
     miAboveRight = puAboveRight->getMotionInfo( posRT.offset( 1, -1 ) );
 
-#if HM_JEM_MERGE_CANDS
-    if( ( !isAvailableB1 || ( miAbove != miAboveRight ) ) && ( !isAvailableA1 || ( miLeft != miAboveRight ) ) )
-#else
     if( !isAvailableB1 || ( miAbove != miAboveRight ) )
-#endif
     {
-#if !JVET_L0090_PAIR_AVG
-      isCandInter[cnt] = true;
-#endif
 
       // get Inter Dir
       mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir;
+      mrgCtx.useAltHpelIf[cnt] = miAboveRight.useAltHpelIf;
       // get Mv from Above-right
-      mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->GBiIdx : GBI_DEFAULT;
+      mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->BcwIdx : BCW_DEFAULT;
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] );
 
       if( slice.isInterB() )
@@ -1429,7 +945,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
         mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveRight.mv[1], miAboveRight.refIdx[1] );
       }
 
-      if (mrgCandIdx == cnt && canFastExit)
+      if (mrgCandIdx == cnt)
       {
         return;
       }
@@ -1452,19 +968,13 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   {
     miBelowLeft = puLeftBottom->getMotionInfo( posLB.offset( -1, 1 ) );
 
-#if HM_JEM_MERGE_CANDS
-    if( ( !isAvailableA1 || ( miBelowLeft != miLeft ) ) && ( !isAvailableB1 || ( miBelowLeft != miAbove ) ) && ( !isAvailableB0 || ( miBelowLeft != miAboveRight ) ) )
-#else
     if( !isAvailableA1 || ( miBelowLeft != miLeft ) )
-#endif
     {
-#if !JVET_L0090_PAIR_AVG
-      isCandInter[cnt] = true;
-#endif
 
       // get Inter Dir
       mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir;
-      mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->GBiIdx : GBI_DEFAULT;
+      mrgCtx.useAltHpelIf[cnt] = miBelowLeft.useAltHpelIf;
+      mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->BcwIdx : BCW_DEFAULT;
       // get Mv from Bottom-Left
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miBelowLeft.mv[0], miBelowLeft.refIdx[0] );
 
@@ -1473,7 +983,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
         mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miBelowLeft.mv[1], miBelowLeft.refIdx[1] );
       }
 
-      if (mrgCandIdx == cnt && canFastExit)
+      if (mrgCandIdx == cnt)
       {
         return;
       }
@@ -1499,19 +1009,13 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     {
       miAboveLeft = puAboveLeft->getMotionInfo( posLT.offset( -1, -1 ) );
 
-#if HM_JEM_MERGE_CANDS
-      if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) && ( !isAvailableA0 || ( miBelowLeft != miAboveLeft ) ) && ( !isAvailableB0 || ( miAboveRight != miAboveLeft ) ) )
-#else
       if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) )
-#endif
       {
-#if !JVET_L0090_PAIR_AVG
-        isCandInter[cnt] = true;
-#endif
 
         // get Inter Dir
         mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir;
-        mrgCtx.GBiIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->GBiIdx : GBI_DEFAULT;
+        mrgCtx.useAltHpelIf[cnt] = miAboveLeft.useAltHpelIf;
+        mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->BcwIdx : BCW_DEFAULT;
         // get Mv from Above-Left
         mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveLeft.mv[0], miAboveLeft.refIdx[0] );
 
@@ -1520,7 +1024,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
           mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveLeft.mv[1], miAboveLeft.refIdx[1] );
         }
 
-        if (mrgCandIdx == cnt && canFastExit)
+        if (mrgCandIdx == cnt)
         {
           return;
         }
@@ -1535,45 +1039,23 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     return;
   }
 
-  if (slice.getEnableTMVPFlag())
+  if (slice.getPicHeader()->getEnableTMVPFlag() && (pu.lumaSize().width + pu.lumaSize().height > 12))
   {
     //>> MTK colocated-RightBottom
     // offset the pos to be sure to "point" to the same position the uiAbsPartIdx would've pointed to
-    Position posRB = pu.shareParentPos.offset(pu.shareParentSize.width-3, pu.shareParentSize.height - 3);
+    Position posRB = pu.Y().bottomRight().offset( -3, -3 );
     const PreCalcValues& pcv = *cs.pcv;
 
     Position posC0;
-    Position posC1 = pu.shareParentPos.offset((pu.shareParentSize.width/2), (pu.shareParentSize.height/2));
-
+    Position posC1 = pu.Y().center();
     bool C0Avail = false;
-    bool C1Avail = (posC1.x < pcv.lumaWidth) && (posC1.y  < pcv.lumaHeight);
-
     if (((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight))
     {
+      int posYInCtu = posRB.y & pcv.maxCUHeightMask;
+      if (posYInCtu + 4 < pcv.maxCUHeight)
       {
-        Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask );
-
-        if( ( posInCtu.x + 4 < pcv.maxCUWidth ) &&           // is not at the last column of CTU
-            ( posInCtu.y + 4 < pcv.maxCUHeight ) )           // is not at the last row    of CTU
-        {
-          posC0 = posRB.offset( 4, 4 );
-          C0Avail = true;
-        }
-        else if( posInCtu.x + 4 < pcv.maxCUWidth )           // is not at the last column of CTU But is last row of CTU
-        {
-          posC0 = posRB.offset( 4, 4 );
-          // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility
-        }
-        else if( posInCtu.y + 4 < pcv.maxCUHeight )          // is not at the last row of CTU But is last column of CTU
-        {
-          posC0 = posRB.offset( 4, 4 );
-          C0Avail = true;
-        }
-        else //is the right bottom corner of CTU
-        {
-          posC0 = posRB.offset( 4, 4 );
-          // same as for last column but not last row
-        }
+        posC0 = posRB.offset(4, 4);
+        C0Avail = true;
       }
     }
 
@@ -1581,9 +1063,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     int       iRefIdx     = 0;
     int       dir         = 0;
     unsigned  uiArrayAddr = cnt;
-    bool      bExistMV    = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC0, cColMv, iRefIdx ) )
-                                      || ( C1Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, iRefIdx ));
-
+    bool      bExistMV    = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC0, cColMv, iRefIdx, false ) )
+                              || getColocatedMVP( pu, REF_PIC_LIST_0, posC1, cColMv, iRefIdx, false );
     if (bExistMV)
     {
       dir     |= 1;
@@ -1592,8 +1073,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
 
     if (slice.isInterB())
     {
-      bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC0, cColMv, iRefIdx ) )
-                           || (C1Avail &&  getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, iRefIdx ) );
+      bExistMV = ( C0Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC0, cColMv, iRefIdx, false ) )
+                   || getColocatedMVP( pu, REF_PIC_LIST_1, posC1, cColMv, iRefIdx, false );
       if (bExistMV)
       {
         dir     |= 2;
@@ -1604,26 +1085,12 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     if( dir != 0 )
     {
       bool addTMvp = true;
-#if HM_JEM_MERGE_CANDS
-      int iSpanCand = cnt;
-      for( int i = 0; i < iSpanCand; i++ )
-      {
-        if( mrgCtx.interDirNeighbours[  i           ] == dir &&
-            mrgCtx.mvFieldNeighbours [  i << 1      ] == mrgCtx.mvFieldNeighbours[  uiArrayAddr << 1      ] &&
-            mrgCtx.mvFieldNeighbours [( i << 1 ) + 1] == mrgCtx.mvFieldNeighbours[( uiArrayAddr << 1 ) + 1] )
-        {
-          addTMvp = false;
-        }
-      }
-#endif
       if( addTMvp )
       {
         mrgCtx.interDirNeighbours[uiArrayAddr] = dir;
-#if !JVET_L0090_PAIR_AVG
-        isCandInter              [uiArrayAddr] = true;
-#endif
-        mrgCtx.GBiIdx[uiArrayAddr] = GBI_DEFAULT;
-        if (mrgCandIdx == cnt && canFastExit)
+        mrgCtx.BcwIdx[uiArrayAddr] = BCW_DEFAULT;
+        mrgCtx.useAltHpelIf[uiArrayAddr] = false;
+        if (mrgCandIdx == cnt)
         {
           return;
         }
@@ -1642,32 +1109,19 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   int maxNumMergeCandMin1 = maxNumMergeCand - 1;
   if (cnt != maxNumMergeCandMin1)
   {
-    bool isAvailableSubPu = false;
-    unsigned subPuMvpPos = 0;
-#if JVET_L0090_PAIR_AVG
-    bool  isShared = ((pu.Y().lumaSize().width != pu.shareParentSize.width) || (pu.Y().lumaSize().height != pu.shareParentSize.height));
-    bool bFound = addMergeHMVPCand(cs, mrgCtx, canFastExit
-      , mrgCandIdx
-      , maxNumMergeCandMin1, cnt
-      , spatialCandPos
-      , isAvailableSubPu, subPuMvpPos
+    bool isGt4x4 = true;
+    bool bFound = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCandMin1, cnt
+      , isAvailableA1, miLeft, isAvailableB1, miAbove
       , CU::isIBC(*pu.cu)
-      , isShared
-    );
-#else
-    bool bFound = addMergeHMVPCand(slice, mrgCtx, isCandInter, canFastExit
-      , (mmvdList != 0 && mrgCandIdx != -1) ? (const int)mrgCandIdxIBC : mrgCandIdx
-      , maxNumMergeCandMin1, cnt, cnt, isAvailableSubPu, subPuMvpPos
-      , mmvdList
-    );
-#endif
+      , isGt4x4
+      );
+
     if (bFound)
     {
       return;
     }
   }
 
-#if JVET_L0090_PAIR_AVG
   // pairwise-average candidates
   {
 
@@ -1680,6 +1134,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
       unsigned char interDir = 0;
 
 
+      mrgCtx.useAltHpelIf[cnt] = (mrgCtx.useAltHpelIf[0] == mrgCtx.useAltHpelIf[1]) ? mrgCtx.useAltHpelIf[0] : false;
       for( int refListId = 0; refListId < (slice.isInterB() ? 2 : 1); refListId++ )
       {
         const short refIdxI = mrgCtx.mvFieldNeighbours[0 * 2 + refListId].refIdx;
@@ -1731,53 +1186,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
       return;
     }
   }
-#endif
 
   uint32_t uiArrayAddr = cnt;
-#if !JVET_L0090_PAIR_AVG
-  uint32_t uiCutoff    = std::min( uiArrayAddr, 3u );
-  if (slice.isInterB())
-  {
-    static const uint32_t NUM_PRIORITY_LIST = 12;
-    static const uint32_t uiPriorityList0[NUM_PRIORITY_LIST] = { 0 , 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
-    static const uint32_t uiPriorityList1[NUM_PRIORITY_LIST] = { 1 , 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
-
-    for (int idx = 0; idx < uiCutoff * (uiCutoff - 1) && uiArrayAddr != maxNumMergeCand; idx++)
-    {
-      CHECK( idx >= NUM_PRIORITY_LIST, "Invalid priority list number" );
-      int i = uiPriorityList0[idx];
-      int j = uiPriorityList1[idx];
-      if (isCandInter[i] && isCandInter[j] && (mrgCtx.interDirNeighbours[i] & 0x1) && (mrgCtx.interDirNeighbours[j] & 0x2))
-      {
-        isCandInter[uiArrayAddr] = true;
-        mrgCtx.interDirNeighbours[uiArrayAddr] = 3;
-        mrgCtx.GBiIdx[uiArrayAddr] = ((mrgCtx.interDirNeighbours[uiArrayAddr] == 3)) ? CU::deriveGbiIdx(mrgCtx.GBiIdx[i], mrgCtx.GBiIdx[j]) : GBI_DEFAULT;
-
-        // get Mv from cand[i] and cand[j]
-        mrgCtx.mvFieldNeighbours[ uiArrayAddr << 1     ].setMvField(mrgCtx.mvFieldNeighbours[ i << 1     ].mv, mrgCtx.mvFieldNeighbours[ i << 1     ].refIdx);
-        mrgCtx.mvFieldNeighbours[(uiArrayAddr << 1) + 1].setMvField(mrgCtx.mvFieldNeighbours[(j << 1) + 1].mv, mrgCtx.mvFieldNeighbours[(j << 1) + 1].refIdx);
-
-        int iRefPOCL0 = slice.getRefPOC(REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[(uiArrayAddr << 1)    ].refIdx);
-        int iRefPOCL1 = slice.getRefPOC(REF_PIC_LIST_1, mrgCtx.mvFieldNeighbours[(uiArrayAddr << 1) + 1].refIdx);
-
-        if( iRefPOCL0 == iRefPOCL1 && mrgCtx.mvFieldNeighbours[( uiArrayAddr << 1 )].mv == mrgCtx.mvFieldNeighbours[( uiArrayAddr << 1 ) + 1].mv )
-        {
-          isCandInter[uiArrayAddr] = false;
-        }
-        else
-        {
-          uiArrayAddr++;
-        }
-      }
-    }
-  }
-
-  // early termination
-  if (uiArrayAddr == maxNumMergeCand)
-  {
-    return;
-  }
-#endif
 
   int iNumRefIdx = slice.isInterB() ? std::min(slice.getNumRefIdx(REF_PIC_LIST_0), slice.getNumRefIdx(REF_PIC_LIST_1)) : slice.getNumRefIdx(REF_PIC_LIST_0);
 
@@ -1785,12 +1195,10 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   int refcnt = 0;
   while (uiArrayAddr < maxNumMergeCand)
   {
-#if !JVET_L0090_PAIR_AVG
-    isCandInter               [uiArrayAddr     ] = true;
-#endif
     mrgCtx.interDirNeighbours [uiArrayAddr     ] = 1;
-    mrgCtx.GBiIdx             [uiArrayAddr     ] = GBI_DEFAULT;
+    mrgCtx.BcwIdx             [uiArrayAddr     ] = BCW_DEFAULT;
     mrgCtx.mvFieldNeighbours  [uiArrayAddr << 1].setMvField(Mv(0, 0), r);
+    mrgCtx.useAltHpelIf[uiArrayAddr] = false;
 
     if (slice.isInterB())
     {
@@ -1817,18 +1225,35 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   }
   mrgCtx.numValidMergeCand = uiArrayAddr;
 }
+
 bool PU::checkDMVRCondition(const PredictionUnit& pu)
 {
-  if (pu.cs->sps->getUseDMVR())
+  WPScalingParam *wp0;
+  WPScalingParam *wp1;
+  int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+  int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+  pu.cu->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
+  pu.cu->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
+  if (pu.cs->sps->getUseDMVR() && (!pu.cs->picHeader->getDisDmvrFlag()))
   {
     return pu.mergeFlag
       && pu.mergeType == MRG_TYPE_DEFAULT_N
+      && !pu.ciipFlag
       && !pu.cu->affine
       && !pu.mmvdMergeFlag
       && !pu.cu->mmvdSkip
       && PU::isBiPredFromDifferentDirEqDistPoc(pu)
       && (pu.lheight() >= 8)
-      && ((pu.lheight() * pu.lwidth()) >= 64)
+      && (pu.lwidth() >= 8)
+      && ((pu.lheight() * pu.lwidth()) >= 128)
+      && (pu.cu->BcwIdx == BCW_DEFAULT)
+      && ((!wp0[COMPONENT_Y].bPresentFlag) && (!wp1[COMPONENT_Y].bPresentFlag))
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+      && ( refIdx0 < 0 ? true : (pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( pu.cs->pps ) == false) )
+      && ( refIdx1 < 0 ? true : (pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( pu.cs->pps ) == false) )
+#else
+      && ( refIdx0 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ) == SCALE_1X ) && ( refIdx1 < 0 ? true : pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ) == SCALE_1X )
+#endif
       ;
   }
   else
@@ -1836,80 +1261,7 @@ bool PU::checkDMVRCondition(const PredictionUnit& pu)
     return false;
   }
 }
-// for ibc pu validation
-bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize)
-{
-  const int ctuSizeLog2 = g_aucLog2[ctuSize];
-
-  int refRightX = xPos + xBv + width - 1;
-  int refBottomY = yPos + yBv + height - 1;
-
-  int refLeftX = xPos + xBv;
-  int refTopY = yPos + yBv;
-
-  if ((xPos + xBv) < 0)
-  {
-    return false;
-  }
-  if (refRightX >= picWidth)
-  {
-    return false;
-  }
-
-  if ((yPos + yBv) < 0)
-  {
-    return false;
-  }
-  if (refBottomY >= picHeight)
-  {
-    return false;
-  }
-  if ((xBv + width) > 0 && (yBv + height) > 0)
-  {
-    return false;
-  }
 
-  // cannot be in the above CTU row
-  if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2)
-    return false;
-
-  // cannot be in the below CTU row
-  if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2)
-  {
-    return false;
-  }
-
-  // in the same CTU line
-  if ((refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2) && (refLeftX >> ctuSizeLog2 >= (xPos >> ctuSizeLog2) - 1))
-  {
-
-    // in the same CTU, or left CTU
-    // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer
-    if ((refLeftX >> ctuSizeLog2) == ((xPos >> ctuSizeLog2) - 1))
-    {
-      // ref block's collocated block in current CTU
-      const Position refPosCol = pu.Y().topLeft().offset(xBv + ctuSize, yBv);
-      int offset64x = (refPosCol.x >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1);
-      int offset64y = (refPosCol.y >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1);
-      const Position refPosCol64x64 = {offset64x, offset64y};
-      if (pu.cs->isDecomp(refPosCol64x64, toChannelType(COMPONENT_Y)))
-        return false;
-    }
-  }
-  else
-    return false;
-
-  // in the same CTU, or valid area from left CTU. Check if the reference block is already coded
-  const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv);
-  const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv);
-  const ChannelType      chType = toChannelType(COMPONENT_Y);
-  if (!pu.cs->isDecomp(refPosBR, chType))
-    return false;
-  if (!pu.cs->isDecomp(refPosLT, chType))
-    return false;
-  return true;
-
-}// for ibc pu validation
 
 static int xGetDistScaleFactor(const int &iCurrPOC, const int &iCurrRefPOC, const int &iColPOC, const int &iColRefPOC)
 {
@@ -1999,6 +1351,7 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx,
         mrgCtx.mmvdBaseMv[currBaseNum][0] = MvField(Mv(0, 0), -1);
         mrgCtx.mmvdBaseMv[currBaseNum][1] = mrgCtx.mvFieldNeighbours[(k << 1) + 1];
       }
+      mrgCtx.mmvdUseAltHpelIf[currBaseNum] = mrgCtx.useAltHpelIf[k];
 
       currBaseNum++;
 
@@ -2006,20 +1359,8 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx,
         break;
     }
   }
-
-  if (currBaseNum < MMVD_BASE_MV_NUM)
-  {
-    for (k = currBaseNum; k < MMVD_BASE_MV_NUM; k++)
-    {
-      mrgCtx.mmvdBaseMv[k][0] = MvField(Mv(0, 0), 0);
-      const Slice &slice = *pu.cs->slice;
-      mrgCtx.mmvdBaseMv[k][1] = MvField(Mv(0, 0), (slice.isInterB() ? 0 : -1));
-      mrgCtx.GBiIdx[k] = GBI_DEFAULT;
-      mrgCtx.interDirNeighbours[k] = (mrgCtx.mmvdBaseMv[k][0].refIdx >= 0) + (mrgCtx.mmvdBaseMv[k][1].refIdx >= 0) * 2;
-    }
-  }
 }
-bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &_pos, Mv& rcMv, const int &refIdx )
+bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &_pos, Mv& rcMv, const int &refIdx, bool sbFlag)
 {
   // don't perform MV compression when generally disabled or subPuMvp is used
   const unsigned scale = 4 * std::max<int>(1, 4 * AMVP_DECIMATION_FACTOR / 4);
@@ -2055,14 +1396,26 @@ bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList
   }
   int iColRefIdx = mi.refIdx[eColRefPicList];
 
-  if (iColRefIdx < 0)
+  if (sbFlag && !slice.getCheckLDC())
+  {
+    eColRefPicList = eRefPicList;
+    iColRefIdx = mi.refIdx[eColRefPicList];
+    if (iColRefIdx < 0)
+    {
+      return false;
+    }
+  }
+  else
   {
-    eColRefPicList = RefPicList(1 - eColRefPicList);
-    iColRefIdx = mi.refIdx[eColRefPicList];
-
     if (iColRefIdx < 0)
     {
-      return false;
+      eColRefPicList = RefPicList(1 - eColRefPicList);
+      iColRefIdx = mi.refIdx[eColRefPicList];
+
+      if (iColRefIdx < 0)
+      {
+        return false;
+      }
     }
   }
 
@@ -2127,90 +1480,124 @@ bool PU::isDiffMER(const PredictionUnit &pu1, const PredictionUnit &pu2)
   const unsigned xP = pu2.lumaPos().x;
   const unsigned yP = pu2.lumaPos().y;
 
-  unsigned plevel = pu1.cs->pps->getLog2ParallelMergeLevelMinus2() + 2;
-
-  if ((xN >> plevel) != (xP >> plevel))
+  if ((xN >> 2) != (xP >> 2))
   {
     return true;
   }
 
-  if ((yN >> plevel) != (yP >> plevel))
+  if ((yN >> 2) != (yP >> 2))
   {
     return true;
   }
 
   return false;
 }
-void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred)
+
+bool PU::isAddNeighborMv(const Mv& currMv, Mv* neighborMvs, int numNeighborMv)
 {
+  bool existed = false;
+  for (uint32_t cand = 0; cand < numNeighborMv && !existed; cand++)
+  {
+    if (currMv == neighborMvs[cand])
+    {
+      existed = true;
+    }
+  }
 
-  //-- Get Spatial MV
-  Position posLT = pu.Y().topLeft();
-  Position posRT = pu.Y().topRight();
-  Position posLB = pu.Y().bottomLeft();
+  if (!existed)
+  {
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
 
-  unsigned int left = 0, above = 0;
+void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred)
+{
+  const PreCalcValues   &pcv = *pu.cs->pcv;
+  const int  cuWidth = pu.blocks[COMPONENT_Y].width;
+  const int  cuHeight = pu.blocks[COMPONENT_Y].height;
+  const int  log2UnitWidth = floorLog2(pcv.minCUWidth);
+  const int  log2UnitHeight = floorLog2(pcv.minCUHeight);
+  const int  totalAboveUnits = (cuWidth >> log2UnitWidth) + 1;
+  const int  totalLeftUnits = (cuHeight >> log2UnitHeight) + 1;
 
-  //left
-  const PredictionUnit *neibLeftPU = NULL;
-  neibLeftPU = pu.cs->getPURestricted(posLB.offset(-1, 0), pu, pu.cs->chType);
-  left = (neibLeftPU) ? CU::isIBC(*neibLeftPU->cu) : 0;
+  nbPred = 0;
+  Position posLT = pu.Y().topLeft();
 
-  if (left)
+  // above-left
+  const PredictionUnit *aboveLeftPU = pu.cs->getPURestricted(posLT.offset(-1, -1), pu, CHANNEL_TYPE_LUMA);
+  if (aboveLeftPU && CU::isIBC(*aboveLeftPU->cu))
   {
-    MvPred[nbPred++] = neibLeftPU->bv;
-    if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred]))
-      nbPred++;
+    if (isAddNeighborMv(aboveLeftPU->bv, mvPred, nbPred))
+    {
+      mvPred[nbPred++] = aboveLeftPU->bv;
+    }
   }
 
-  //above
-  const PredictionUnit *neibAbovePU = NULL;
-  neibAbovePU = pu.cs->getPURestricted(posRT.offset(0, -1), pu, pu.cs->chType);
-  above = (neibAbovePU) ? CU::isIBC(*neibAbovePU->cu) : 0;
-
-  if (above)
+  // above neighbors
+  for (uint32_t dx = 0; dx < totalAboveUnits && nbPred < IBC_NUM_CANDIDATES; dx++)
   {
-    MvPred[nbPred++] = neibAbovePU->bv;
-    if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred]))
-      nbPred++;
+    const PredictionUnit* tmpPU = pu.cs->getPURestricted(posLT.offset((dx << log2UnitWidth), -1), pu, CHANNEL_TYPE_LUMA);
+    if (tmpPU && CU::isIBC(*tmpPU->cu))
+    {
+      if (isAddNeighborMv(tmpPU->bv, mvPred, nbPred))
+      {
+        mvPred[nbPred++] = tmpPU->bv;
+      }
+    }
   }
 
-  // Below Left predictor search
-  const PredictionUnit *neibBelowLeftPU = NULL;
-  neibBelowLeftPU = pu.cs->getPURestricted(posLB.offset(-1, 1), pu, pu.cs->chType);
-  unsigned int belowLeft = (neibBelowLeftPU) ? CU::isIBC(*neibBelowLeftPU->cu) : 0;
-
-  if (belowLeft)
+  // left neighbors
+  for (uint32_t dy = 0; dy < totalLeftUnits && nbPred < IBC_NUM_CANDIDATES; dy++)
   {
-    MvPred[nbPred++] = neibBelowLeftPU->bv;
-    if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred]))
-      nbPred++;
+    const PredictionUnit* tmpPU = pu.cs->getPURestricted(posLT.offset(-1, (dy << log2UnitHeight)), pu, CHANNEL_TYPE_LUMA);
+    if (tmpPU && CU::isIBC(*tmpPU->cu))
+    {
+      if (isAddNeighborMv(tmpPU->bv, mvPred, nbPred))
+      {
+        mvPred[nbPred++] = tmpPU->bv;
+      }
+    }
   }
 
-
-  // Above Right predictor search
-  const PredictionUnit *neibAboveRightPU = NULL;
-  neibAboveRightPU = pu.cs->getPURestricted(posRT.offset(1, -1), pu, pu.cs->chType);
-  unsigned int aboveRight = (neibAboveRightPU) ? CU::isIBC(*neibAboveRightPU->cu) : 0;
-
-  if (aboveRight)
+  size_t numAvaiCandInLUT = pu.cs->motionLut.lutIbc.size();
+  for (uint32_t cand = 0; cand < numAvaiCandInLUT && nbPred < IBC_NUM_CANDIDATES; cand++)
   {
-    MvPred[nbPred++] = neibAboveRightPU->bv;
-    if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred]))
-      nbPred++;
+    MotionInfo neibMi = pu.cs->motionLut.lutIbc[cand];
+    if (isAddNeighborMv(neibMi.bv, mvPred, nbPred))
+    {
+      mvPred[nbPred++] = neibMi.bv;
+    }
   }
 
+  bool isBvCandDerived[IBC_NUM_CANDIDATES];
+  ::memset(isBvCandDerived, false, IBC_NUM_CANDIDATES);
 
-  // Above Left predictor search
-  const PredictionUnit *neibAboveLeftPU = NULL;
-  neibAboveLeftPU = pu.cs->getPURestricted(posLT.offset(-1, -1), pu, pu.cs->chType);
-  unsigned int aboveLeft = (neibAboveLeftPU) ? CU::isIBC(*neibAboveLeftPU->cu) : 0;
-
-  if (aboveLeft)
+  int curNbPred = nbPred;
+  if (curNbPred < IBC_NUM_CANDIDATES)
   {
-    MvPred[nbPred++] = neibAboveLeftPU->bv;
-    if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred]))
-      nbPred++;
+    do
+    {
+      curNbPred = nbPred;
+      for (uint32_t idx = 0; idx < curNbPred && nbPred < IBC_NUM_CANDIDATES; idx++)
+      {
+        if (!isBvCandDerived[idx])
+        {
+          Mv derivedBv;
+          if (getDerivedBV(pu, mvPred[idx], derivedBv))
+          {
+            if (isAddNeighborMv(derivedBv, mvPred, nbPred))
+            {
+              mvPred[nbPred++] = derivedBv;
+            }
+          }
+          isBvCandDerived[idx] = true;
+        }
+      }
+    } while (nbPred > curNbPred && nbPred < IBC_NUM_CANDIDATES);
   }
 }
 
@@ -2224,13 +1611,13 @@ bool PU::getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv)
   int offsetY = currentMv.getVer();
 
 
-  if (rX < 0 || rY < 0 || rX >= pu.cs->slice->getSPS()->getPicWidthInLumaSamples() || rY >= pu.cs->slice->getSPS()->getPicHeightInLumaSamples())
+  if( rX < 0 || rY < 0 || rX >= pu.cs->slice->getPPS()->getPicWidthInLumaSamples() || rY >= pu.cs->slice->getPPS()->getPicHeightInLumaSamples() )
   {
     return false;
   }
 
   const PredictionUnit *neibRefPU = NULL;
-  neibRefPU = pu.cs->getPURestricted(pu.lumaPos().offset(offsetX, offsetY), pu, pu.cs->chType);
+  neibRefPU = pu.cs->getPURestricted(pu.lumaPos().offset(offsetX, offsetY), pu, CHANNEL_TYPE_LUMA);
 
   bool isIBC = (neibRefPU) ? CU::isIBC(*neibRefPU->cu) : 0;
   if (isIBC)
@@ -2246,83 +1633,25 @@ bool PU::getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv)
  */
 void PU::fillIBCMvpCand(PredictionUnit &pu, AMVPInfo &amvpInfo)
 {
-  CodingStructure &cs = *pu.cs;
 
   AMVPInfo *pInfo = &amvpInfo;
 
   pInfo->numCand = 0;
 
-  //-- Get Spatial MV
-  Position posLT = pu.Y().topLeft();
-  Position posRT = pu.Y().topRight();
-  Position posLB = pu.Y().bottomLeft();
-
-  bool isScaledFlagLX = false; /// variable name from specification; true when the PUs below left or left are available (availableA0 || availableA1).
-
-  const PredictionUnit* tmpPU = cs.getPURestricted(posLB.offset(-1, 1), pu, pu.chType); // getPUBelowLeft(idx, partIdxLB);
-  isScaledFlagLX = tmpPU != NULL && CU::isIBC(*tmpPU->cu);
-  if (!isScaledFlagLX)
-  {
-    tmpPU = cs.getPURestricted(posLB.offset(-1, 0), pu, pu.chType);
-    isScaledFlagLX = tmpPU != NULL && CU::isIBC(*tmpPU->cu);
-  }
-
-  // Left predictor search
-  if (isScaledFlagLX)
-  {
-    bool isAdded = addIBCMVPCand(pu, posLB, MD_BELOW_LEFT, *pInfo);
-
-    if (!isAdded)
-    {
-      isAdded = addIBCMVPCand(pu, posLB, MD_LEFT, *pInfo);
-    }
-  }
-
-  // Above predictor search
-  bool isAdded = addIBCMVPCand(pu, posRT, MD_ABOVE_RIGHT, *pInfo);
-
-  if (!isAdded)
-  {
-    isAdded = addIBCMVPCand(pu, posRT, MD_ABOVE, *pInfo);
-
-    if (!isAdded)
-    {
-      addIBCMVPCand(pu, posLT, MD_ABOVE_LEFT, *pInfo);
-    }
-  }
-
-  for( int i = 0; i < pInfo->numCand; i++ )
-  {
-    pInfo->mvCand[i].roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv);
-  }
-
-  if (pInfo->numCand == 2)
-  {
-    if (pInfo->mvCand[0] == pInfo->mvCand[1])
-    {
-      pInfo->numCand = 1;
-    }
-  }
-
-  if (pInfo->numCand < AMVP_MAX_NUM_CANDS)
-  {
-    addAMVPHMVPCand(pu, REF_PIC_LIST_0, REF_PIC_LIST_1, cs.slice->getPOC(), *pInfo, pu.cu->imv);
-  }
-
-  if (pInfo->numCand > AMVP_MAX_NUM_CANDS)
-  {
-    pInfo->numCand = AMVP_MAX_NUM_CANDS;
-  }
 
+  MergeCtx mergeCtx;
+  PU::getIBCMergeCandidates(pu, mergeCtx, AMVP_MAX_NUM_CANDS - 1);
+  int candIdx = 0;
   while (pInfo->numCand < AMVP_MAX_NUM_CANDS)
   {
-    pInfo->mvCand[pInfo->numCand] = Mv(0, 0);
+    pInfo->mvCand[pInfo->numCand] = mergeCtx.mvFieldNeighbours[(candIdx << 1) + 0].mv;;
     pInfo->numCand++;
+    candIdx++;
   }
 
   for (Mv &mv : pInfo->mvCand)
   {
-    mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+    mv.roundIbcPrecInternal2Amvr(pu.cu->imv);
   }
 }
 
@@ -2352,21 +1681,6 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
   Position posRT = pu.Y().topRight();
   Position posLB = pu.Y().bottomLeft();
 
-  bool isScaledFlagLX = false; /// variable name from specification; true when the PUs below left or left are available (availableA0 || availableA1).
-
-  {
-    const PredictionUnit* tmpPU = cs.getPURestricted( posLB.offset( -1, 1 ), pu, pu.chType ); // getPUBelowLeft(idx, partIdxLB);
-    isScaledFlagLX = tmpPU != NULL && CU::isInter( *tmpPU->cu );
-
-    if( !isScaledFlagLX )
-    {
-      tmpPU = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType );
-      isScaledFlagLX = tmpPU != NULL && CU::isInter( *tmpPU->cu );
-    }
-  }
-
-  // Left predictor search
-  if( isScaledFlagLX )
   {
     bool bAdded = addMVPCandUnscaled( pu, eRefPicList, refIdx, posLB, MD_BELOW_LEFT, *pInfo );
 
@@ -2374,15 +1688,6 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
     {
       bAdded = addMVPCandUnscaled( pu, eRefPicList, refIdx, posLB, MD_LEFT, *pInfo );
 
-      if( !bAdded )
-      {
-        bAdded = addMVPCandWithScaling( pu, eRefPicList, refIdx, posLB, MD_BELOW_LEFT, *pInfo );
-
-        if( !bAdded )
-        {
-          addMVPCandWithScaling( pu, eRefPicList, refIdx, posLB, MD_LEFT, *pInfo );
-        }
-      }
     }
   }
 
@@ -2401,24 +1706,10 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
     }
   }
 
-  if( !isScaledFlagLX )
-  {
-    bool bAdded = addMVPCandWithScaling( pu, eRefPicList, refIdx, posRT, MD_ABOVE_RIGHT, *pInfo );
-
-    if( !bAdded )
-    {
-      bAdded = addMVPCandWithScaling( pu, eRefPicList, refIdx, posRT, MD_ABOVE, *pInfo );
-
-      if( !bAdded )
-      {
-        addMVPCandWithScaling( pu, eRefPicList, refIdx, posLT, MD_ABOVE_LEFT, *pInfo );
-      }
-    }
-  }
 
   for( int i = 0; i < pInfo->numCand; i++ )
   {
-    pInfo->mvCand[i].roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv);
+    pInfo->mvCand[i].roundTransPrecInternal2Amvr(pu.cu->imv);
   }
 
   if( pInfo->numCand == 2 )
@@ -2429,7 +1720,7 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
     }
   }
 
-  if( cs.slice->getEnableTMVPFlag() && pInfo->numCand < AMVP_MAX_NUM_CANDS )
+  if (cs.picHeader->getEnableTMVPFlag() && pInfo->numCand < AMVP_MAX_NUM_CANDS && (pu.lumaSize().width + pu.lumaSize().height > 12))
   {
     // Get Temporal Motion Predictor
     const int refIdx_Col = refIdx;
@@ -2441,49 +1732,28 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
     Position posC0;
     bool C0Avail = false;
     Position posC1 = pu.Y().center();
-    bool C1Avail =  ( posC1.x  < pcv.lumaWidth ) && ( posC1.y < pcv.lumaHeight ) ;
-
     Mv cColMv;
 
     if( ( ( posRB.x + pcv.minCUWidth ) < pcv.lumaWidth ) && ( ( posRB.y + pcv.minCUHeight ) < pcv.lumaHeight ) )
     {
-      Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask );
-
-      if ((posInCtu.x + 4 < pcv.maxCUWidth) &&           // is not at the last column of CTU
-          (posInCtu.y + 4 < pcv.maxCUHeight))             // is not at the last row    of CTU
-      {
-        posC0 = posRB.offset(4, 4);
-        C0Avail = true;
-      }
-      else if (posInCtu.x + 4 < pcv.maxCUWidth)           // is not at the last column of CTU But is last row of CTU
-      {
-        // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility
-        posC0 = posRB.offset(4, 4);
-      }
-      else if (posInCtu.y + 4 < pcv.maxCUHeight)          // is not at the last row of CTU But is last column of CTU
+      int posYInCtu = posRB.y & pcv.maxCUHeightMask;
+      if (posYInCtu + 4 < pcv.maxCUHeight)
       {
         posC0 = posRB.offset(4, 4);
         C0Avail = true;
       }
-      else //is the right bottom corner of CTU
-      {
-        // same as for last column but not last row
-        posC0 = posRB.offset(4, 4);
-      }
     }
-
-    if ((C0Avail && getColocatedMVP(pu, eRefPicList, posC0, cColMv, refIdx_Col)) || (C1Avail && getColocatedMVP(pu, eRefPicList, posC1, cColMv, refIdx_Col)))
+    if ( ( C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdx_Col, false ) ) || getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdx_Col, false ) )
     {
-      cColMv.roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv);
+      cColMv.roundTransPrecInternal2Amvr(pu.cu->imv);
       pInfo->mvCand[pInfo->numCand++] = cColMv;
     }
   }
 
   if (pInfo->numCand < AMVP_MAX_NUM_CANDS)
   {
-    const int        currRefPOC = cs.slice->getRefPic(eRefPicList, refIdx)->getPOC();
-    const RefPicList eRefPicList2nd = (eRefPicList == REF_PIC_LIST_0) ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
-    addAMVPHMVPCand(pu, eRefPicList, eRefPicList2nd, currRefPOC, *pInfo, pu.cu->imv);
+    const int currRefPOC = cs.slice->getRefPic(eRefPicList, refIdx)->getPOC();
+    addAMVPHMVPCand(pu, eRefPicList, currRefPOC, *pInfo);
   }
 
   if (pInfo->numCand > AMVP_MAX_NUM_CANDS)
@@ -2499,7 +1769,7 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
 
   for (Mv &mv : pInfo->mvCand)
   {
-    mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+    mv.roundTransPrecInternal2Amvr(pu.cu->imv);
   }
 }
 
@@ -2556,28 +1826,13 @@ bool PU::addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &r
     }
 
     xInheritedAffineMv( pu, neibPU, eRefPicListIndex, outputAffineMv );
-    if ( pu.cu->imv == 0 )
-    {
-      outputAffineMv[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-      outputAffineMv[1].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-    }
-    else if ( pu.cu->imv == 2 )
-    {
-      outputAffineMv[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-      outputAffineMv[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-    }
+    outputAffineMv[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
+    outputAffineMv[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
     affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = outputAffineMv[0];
     affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = outputAffineMv[1];
     if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
     {
-      if ( pu.cu->imv == 0 )
-      {
-        outputAffineMv[2].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-      }
-      else if ( pu.cu->imv == 2 )
-      {
-        outputAffineMv[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-      }
+      outputAffineMv[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
       affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = outputAffineMv[2];
     }
     affiAMVPInfo.numCand++;
@@ -2619,12 +1874,12 @@ void PU::xInheritedAffineMv( const PredictionUnit &pu, const PredictionUnit* puN
   int shift = MAX_CU_DEPTH;
   int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY;
 
-  iDMvHorX = (mvRT - mvLT).getHor() << (shift - g_aucLog2[neiW]);
-  iDMvHorY = (mvRT - mvLT).getVer() << (shift - g_aucLog2[neiW]);
+  iDMvHorX = (mvRT - mvLT).getHor() << (shift - floorLog2(neiW));
+  iDMvHorY = (mvRT - mvLT).getVer() << (shift - floorLog2(neiW));
   if ( puNeighbour->cu->affineType == AFFINEMODEL_6PARAM && !isTopCtuBoundary )
   {
-    iDMvVerX = (mvLB - mvLT).getHor() << (shift - g_aucLog2[neiH]);
-    iDMvVerY = (mvLB - mvLT).getVer() << (shift - g_aucLog2[neiH]);
+    iDMvVerX = (mvLB - mvLT).getHor() << (shift - floorLog2(neiH));
+    iDMvVerY = (mvLB - mvLT).getVer() << (shift - floorLog2(neiH));
   }
   else
   {
@@ -2700,12 +1955,9 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   {
     for (int i = 0; i < affiAMVPInfo.numCand; i++)
     {
-      if ( pu.cu->imv != 1 )
-      {
-        affiAMVPInfo.mvCandLT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-        affiAMVPInfo.mvCandRT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-        affiAMVPInfo.mvCandLB[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-      }
+      affiAMVPInfo.mvCandLT[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+      affiAMVPInfo.mvCandRT[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+      affiAMVPInfo.mvCandLB[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
     }
     return;
   }
@@ -2757,18 +2009,9 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   outputAffineMv[1] = amvpInfo1.mvCand[0];
   outputAffineMv[2] = amvpInfo2.mvCand[0];
 
-  if ( pu.cu->imv == 0 )
-  {
-    outputAffineMv[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-    outputAffineMv[1].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-    outputAffineMv[2].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-  }
-  else if ( pu.cu->imv == 2 )
-  {
-    outputAffineMv[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-    outputAffineMv[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-    outputAffineMv[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-  }
+  outputAffineMv[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
+  outputAffineMv[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
+  outputAffineMv[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
 
   if ( cornerMVPattern == 7 || (cornerMVPattern == 3 && pu.cu->affineType == AFFINEMODEL_4PARAM) )
   {
@@ -2794,7 +2037,7 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
     }
 
     // Get Temporal Motion Predictor
-    if ( affiAMVPInfo.numCand < 2 && pu.cs->slice->getEnableTMVPFlag() )
+    if ( affiAMVPInfo.numCand < 2 && pu.cs->picHeader->getEnableTMVPFlag() )
     {
       const int refIdxCol = refIdx;
 
@@ -2805,46 +2048,19 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
       Position posC0;
       bool C0Avail = false;
       Position posC1 = pu.Y().center();
-      bool C1Avail =  ( posC1.x  < pcv.lumaWidth ) && ( posC1.y < pcv.lumaHeight ) ;
-
       Mv cColMv;
       if ( ((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight) )
       {
-        Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask );
-
-        if ( (posInCtu.x + 4 < pcv.maxCUWidth) &&           // is not at the last column of CTU
-          (posInCtu.y + 4 < pcv.maxCUHeight) )             // is not at the last row    of CTU
-        {
-          posC0 = posRB.offset( 4, 4 );
-          C0Avail = true;
-        }
-        else if ( posInCtu.x + 4 < pcv.maxCUWidth )           // is not at the last column of CTU But is last row of CTU
-        {
-          // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility
-          posC0 = posRB.offset( 4, 4 );
-        }
-        else if ( posInCtu.y + 4 < pcv.maxCUHeight )          // is not at the last row of CTU But is last column of CTU
+        int posYInCtu = posRB.y & pcv.maxCUHeightMask;
+        if (posYInCtu + 4 < pcv.maxCUHeight)
         {
-          posC0 = posRB.offset( 4, 4 );
+          posC0 = posRB.offset(4, 4);
           C0Avail = true;
         }
-        else //is the right bottom corner of CTU
-        {
-          // same as for last column but not last row
-          posC0 = posRB.offset( 4, 4 );
-        }
       }
-
-      if ( (C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdxCol )) || (C1Avail && getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdxCol ) ) )
+      if ( ( C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdxCol, false ) ) || getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdxCol, false ) )
       {
-        if ( pu.cu->imv == 0 )
-        {
-          cColMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-        }
-        else if ( pu.cu->imv == 2 )
-        {
-          cColMv.roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-        }
+        cColMv.roundAffinePrecInternal2Amvr(pu.cu->imv);
         affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = cColMv;
         affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = cColMv;
         affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = cColMv;
@@ -2866,127 +2082,20 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   }
 
   for (int i = 0; i < affiAMVPInfo.numCand; i++)
-  {
-    if ( pu.cu->imv != 1 )
-    {
-      affiAMVPInfo.mvCandLT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-      affiAMVPInfo.mvCandRT[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-      affiAMVPInfo.mvCandLB[i].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-    }
-  }
-
-
-}
-
-bool PU::addIBCMVPCand(const PredictionUnit &pu, const Position &pos, const MvpDir &eDir, AMVPInfo &info)
-{
-  CodingStructure &cs = *pu.cs;
-  const PredictionUnit *neibPU = NULL;
-  Position neibPos;
-
-  switch (eDir)
-  {
-  case MD_LEFT:
-    neibPos = pos.offset(-1, 0);
-    break;
-  case MD_ABOVE:
-    neibPos = pos.offset(0, -1);
-    break;
-  case MD_ABOVE_RIGHT:
-    neibPos = pos.offset(1, -1);
-    break;
-  case MD_BELOW_LEFT:
-    neibPos = pos.offset(-1, 1);
-    break;
-  case MD_ABOVE_LEFT:
-    neibPos = pos.offset(-1, -1);
-    break;
-  default:
-    break;
-  }
-
-  neibPU = cs.getPURestricted(neibPos, pu, pu.chType);
-
-  if (neibPU == NULL || CU::isIBC(*neibPU->cu)==false)
-  {
-    return false;
-  }
-
-  const MotionInfo& neibMi = neibPU->getMotionInfo(neibPos);
-  info.mvCand[info.numCand++] = neibMi.mv[REF_PIC_LIST_0];
-  return true;
-}
-
-bool PU::addMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &info )
-{
-        CodingStructure &cs    = *pu.cs;
-  const PredictionUnit *neibPU = NULL;
-        Position neibPos;
-
-  switch (eDir)
-  {
-  case MD_LEFT:
-    neibPos = pos.offset( -1,  0 );
-    break;
-  case MD_ABOVE:
-    neibPos = pos.offset(  0, -1 );
-    break;
-  case MD_ABOVE_RIGHT:
-    neibPos = pos.offset(  1, -1 );
-    break;
-  case MD_BELOW_LEFT:
-    neibPos = pos.offset( -1,  1 );
-    break;
-  case MD_ABOVE_LEFT:
-    neibPos = pos.offset( -1, -1 );
-    break;
-  default:
-    break;
-  }
-
-  neibPU = cs.getPURestricted( neibPos, pu, pu.chType );
-
-  if( neibPU == NULL || !CU::isInter( *neibPU->cu ) )
-  {
-    return false;
-  }
-
-  const MotionInfo& neibMi        = neibPU->getMotionInfo( neibPos );
-
-  const int        currRefPOC     = cs.slice->getRefPic( eRefPicList, iRefIdx )->getPOC();
-  const RefPicList eRefPicList2nd = ( eRefPicList == REF_PIC_LIST_0 ) ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
-
-  for( int predictorSource = 0; predictorSource < 2; predictorSource++ ) // examine the indicated reference picture list, then if not available, examine the other list.
-  {
-    const RefPicList eRefPicListIndex = ( predictorSource == 0 ) ? eRefPicList : eRefPicList2nd;
-    const int        neibRefIdx       = neibMi.refIdx[eRefPicListIndex];
-
-    if( neibRefIdx >= 0 && currRefPOC == cs.slice->getRefPOC( eRefPicListIndex, neibRefIdx ) )
-    {
-      info.mvCand[info.numCand++] = neibMi.mv[eRefPicListIndex];
-      return true;
-    }
-  }
-
-  return false;
-}
-
-/**
-* \param pInfo
-* \param eRefPicList
-* \param iRefIdx
-* \param uiPartUnitIdx
-* \param eDir
-* \returns bool
-*/
-bool PU::addMVPCandWithScaling( const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &info )
+  {
+    affiAMVPInfo.mvCandLT[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+    affiAMVPInfo.mvCandRT[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+    affiAMVPInfo.mvCandLB[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+  }
+}
+
+bool PU::addMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &info )
 {
         CodingStructure &cs    = *pu.cs;
-  const Slice &slice           = *cs.slice;
   const PredictionUnit *neibPU = NULL;
         Position neibPos;
 
-  switch( eDir )
+  switch (eDir)
   {
   case MD_LEFT:
     neibPos = pos.offset( -1,  0 );
@@ -3009,53 +2118,33 @@ bool PU::addMVPCandWithScaling( const PredictionUnit &pu, const RefPicList &eRef
 
   neibPU = cs.getPURestricted( neibPos, pu, pu.chType );
 
-  if (neibPU == NULL || !CU::isInter(*neibPU->cu) || !CU::isInter(*pu.cu))
+  if( neibPU == NULL || !CU::isInter( *neibPU->cu ) )
   {
     return false;
   }
 
   const MotionInfo& neibMi        = neibPU->getMotionInfo( neibPos );
 
+  const int        currRefPOC     = cs.slice->getRefPic( eRefPicList, iRefIdx )->getPOC();
   const RefPicList eRefPicList2nd = ( eRefPicList == REF_PIC_LIST_0 ) ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
 
-  const int  currPOC            = slice.getPOC();
-  const int  currRefPOC         = slice.getRefPic( eRefPicList, iRefIdx )->poc;
-  const bool bIsCurrRefLongTerm = slice.getRefPic( eRefPicList, iRefIdx )->longTerm;
-  const int  neibPOC            = currPOC;
-
   for( int predictorSource = 0; predictorSource < 2; predictorSource++ ) // examine the indicated reference picture list, then if not available, examine the other list.
   {
-    const RefPicList eRefPicListIndex = (predictorSource == 0) ? eRefPicList : eRefPicList2nd;
+    const RefPicList eRefPicListIndex = ( predictorSource == 0 ) ? eRefPicList : eRefPicList2nd;
     const int        neibRefIdx       = neibMi.refIdx[eRefPicListIndex];
-    if( neibRefIdx >= 0 )
-    {
-      const bool bIsNeibRefLongTerm = slice.getRefPic(eRefPicListIndex, neibRefIdx)->longTerm;
-
-      if (bIsCurrRefLongTerm == bIsNeibRefLongTerm)
-      {
-        Mv cMv = neibMi.mv[eRefPicListIndex];
-
-        if( !( bIsCurrRefLongTerm /* || bIsNeibRefLongTerm*/) )
-        {
-          const int neibRefPOC = slice.getRefPOC( eRefPicListIndex, neibRefIdx );
-          const int scale      = xGetDistScaleFactor( currPOC, currRefPOC, neibPOC, neibRefPOC );
-
-          if( scale != 4096 )
-          {
-            cMv = cMv.scaleMv( scale );
-          }
-        }
 
-        info.mvCand[info.numCand++] = cMv;
-        return true;
-      }
+    if( neibRefIdx >= 0 && currRefPOC == cs.slice->getRefPOC( eRefPicListIndex, neibRefIdx ) )
+    {
+      info.mvCand[info.numCand++] = neibMi.mv[eRefPicListIndex];
+      return true;
     }
   }
 
   return false;
 }
 
-void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, const RefPicList eRefPicList2nd, const int currRefPOC, AMVPInfo &info, uint8_t imv)
+
+void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, const int currRefPOC, AMVPInfo &info)
 {
   const Slice &slice = *(*pu.cs).slice;
 
@@ -3063,6 +2152,7 @@ void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList,
   auto &lut = CU::isIBC(*pu.cu) ? pu.cs->motionLut.lutIbc : pu.cs->motionLut.lut;
   int num_avai_candInLUT = (int) lut.size();
   int num_allowedCand = std::min(MAX_NUM_HMVP_AVMPCANDS, num_avai_candInLUT);
+  const RefPicList eRefPicList2nd = (eRefPicList == REF_PIC_LIST_0) ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
 
   for (int mrgIdx = 1; mrgIdx <= num_allowedCand; mrgIdx++)
   {
@@ -3080,7 +2170,7 @@ void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList,
       if (neibRefIdx >= 0 && (CU::isIBC(*pu.cu) || (currRefPOC == slice.getRefPOC(eRefPicListIndex, neibRefIdx))))
       {
         Mv pmv = neibMi.mv[eRefPicListIndex];
-        pmv.roundToAmvrSignalPrecision(MV_PRECISION_INTERNAL, pu.cu->imv);
+        pmv.roundTransPrecInternal2Amvr(pu.cu->imv);
 
         info.mvCand[info.numCand++] = pmv;
         if (info.numCand >= AMVP_MAX_NUM_CANDS)
@@ -3098,16 +2188,21 @@ bool PU::isBipredRestriction(const PredictionUnit &pu)
   {
     return true;
   }
+  /* disable bi-prediction for 4x8/8x4 */
+  if ( pu.cu->lumaSize().width + pu.cu->lumaSize().height == 12 )
+  {
+    return true;
+  }
   return false;
 }
 
-void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int modelIdx, int verNum, AffineMergeCtx& affMrgType )
+void PU::getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int8_t bcwIdx, int modelIdx, int verNum, AffineMergeCtx& affMrgType)
 {
   int cuW = pu.Y().width;
   int cuH = pu.Y().height;
   int vx, vy;
   int shift = MAX_CU_DEPTH;
-  int shiftHtoW = shift + g_aucLog2[cuW] - g_aucLog2[cuH];
+  int shiftHtoW = shift + floorLog2(cuW) - floorLog2(cuH);
 
   // motion info
   Mv cMv[2][4];
@@ -3135,6 +2230,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4],
         }
       }
     }
+
   }
   else if ( verNum == 3 )
   {
@@ -3156,6 +2252,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4],
         }
       }
     }
+
   }
 
   if ( dir == 0 )
@@ -3182,16 +2279,19 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4],
       case 1: // 1 : LT, RT, RB
         cMv[l][2].hor = cMv[l][3].hor + cMv[l][0].hor - cMv[l][1].hor;
         cMv[l][2].ver = cMv[l][3].ver + cMv[l][0].ver - cMv[l][1].ver;
+        cMv[l][2].clipToStorageBitDepth();
         break;
 
       case 2: // 2 : LT, LB, RB
         cMv[l][1].hor = cMv[l][3].hor + cMv[l][0].hor - cMv[l][2].hor;
         cMv[l][1].ver = cMv[l][3].ver + cMv[l][0].ver - cMv[l][2].ver;
+        cMv[l][1].clipToStorageBitDepth();
         break;
 
       case 3: // 3 : RT, LB, RB
         cMv[l][0].hor = cMv[l][1].hor + cMv[l][2].hor - cMv[l][3].hor;
         cMv[l][0].ver = cMv[l][1].ver + cMv[l][2].ver - cMv[l][3].ver;
+        cMv[l][0].clipToStorageBitDepth();
         break;
 
       case 4: // 4 : LT, RT
@@ -3202,6 +2302,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4],
         vy = (cMv[l][0].ver << shift) - ((cMv[l][2].hor - cMv[l][0].hor) << shiftHtoW);
         roundAffineMv( vx, vy, shift );
         cMv[l][1].set( vx, vy );
+        cMv[l][1].clipToStorageBitDepth();
         break;
 
       default:
@@ -3229,6 +2330,7 @@ void PU::getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4],
   }
   affMrgType.interDirNeighbours[affMrgType.numValidMergeCand] = dir;
   affMrgType.affineType[affMrgType.numValidMergeCand] = curType;
+  affMrgType.BcwIdx[affMrgType.numValidMergeCand] = (dir == 3) ? bcwIdx : BCW_DEFAULT;
   affMrgType.numValidMergeCand++;
 
 
@@ -3301,7 +2403,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
 {
   const CodingStructure &cs = *pu.cs;
   const Slice &slice = *pu.cs->slice;
-  const uint32_t maxNumAffineMergeCand = slice.getMaxNumAffineMergeCand();
+  const uint32_t maxNumAffineMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand();
 
   for ( int i = 0; i < maxNumAffineMergeCand; i++ )
   {
@@ -3313,7 +2415,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
     affMrgCtx.interDirNeighbours[i] = 0;
     affMrgCtx.affineType[i] = AFFINEMODEL_4PARAM;
     affMrgCtx.mergeType[i] = MRG_TYPE_DEFAULT_N;
-    affMrgCtx.GBiIdx[i] = GBI_DEFAULT;
+    affMrgCtx.BcwIdx[i] = BCW_DEFAULT;
   }
 
   affMrgCtx.numValidMergeCand = 0;
@@ -3321,7 +2423,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
 
   bool enableSubPuMvp = slice.getSPS()->getSBTMVPEnabledFlag() && !(slice.getPOC() == slice.getRefPic(REF_PIC_LIST_0, 0)->getPOC() && slice.isIRAP());
   bool isAvailableSubPu = false;
-  if ( enableSubPuMvp && slice.getEnableTMVPFlag() )
+  if ( enableSubPuMvp && slice.getPicHeader()->getEnableTMVPFlag() )
   {
     MergeCtx mrgCtx = *affMrgCtx.mrgCtx;
     bool tmpLICFlag = false;
@@ -3415,7 +2517,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       }
       affMrgCtx.interDirNeighbours[affMrgCtx.numValidMergeCand] = puNeigh->interDir;
       affMrgCtx.affineType[affMrgCtx.numValidMergeCand] = (EAffineModel)(puNeigh->cu->affineType);
-      affMrgCtx.GBiIdx[affMrgCtx.numValidMergeCand] = puNeigh->cu->GBiIdx;
+      affMrgCtx.BcwIdx[affMrgCtx.numValidMergeCand] = puNeigh->cu->BcwIdx;
 
       if ( affMrgCtx.numValidMergeCand == mrgCandIdx )
       {
@@ -3436,6 +2538,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       MotionInfo mi[4];
       bool isAvailable[4] = { false };
 
+      int8_t neighBcw[2] = { BCW_DEFAULT, BCW_DEFAULT };
       // control point: LT B2->B3->A2
       const Position posLT[3] = { pu.Y().topLeft().offset( -1, -1 ), pu.Y().topLeft().offset( 0, -1 ), pu.Y().topLeft().offset( -1, 0 ) };
       for ( int i = 0; i < 3; i++ )
@@ -3448,6 +2551,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
         {
           isAvailable[0] = true;
           mi[0] = puNeigh->getMotionInfo( pos );
+          neighBcw[0] = puNeigh->cu->BcwIdx;
           break;
         }
       }
@@ -3465,6 +2569,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
         {
           isAvailable[1] = true;
           mi[1] = puNeigh->getMotionInfo( pos );
+          neighBcw[1] = puNeigh->cu->BcwIdx;
           break;
         }
       }
@@ -3487,7 +2592,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       }
 
       // control point: RB
-      if ( slice.getEnableTMVPFlag() )
+      if ( slice.getPicHeader()->getEnableTMVPFlag() )
       {
         //>> MTK colocated-RightBottom
         // offset the pos to be sure to "point" to the same position the uiAbsPartIdx would've pointed to
@@ -3499,34 +2604,17 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
 
         if ( ((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight) )
         {
-          Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask );
-
-          if ( (posInCtu.x + 4 < pcv.maxCUWidth) &&  // is not at the last column of CTU
-            (posInCtu.y + 4 < pcv.maxCUHeight) )     // is not at the last row    of CTU
+          int posYInCtu = posRB.y & pcv.maxCUHeightMask;
+          if (posYInCtu + 4 < pcv.maxCUHeight)
           {
-            posC0 = posRB.offset( 4, 4 );
+            posC0 = posRB.offset(4, 4);
             C0Avail = true;
           }
-          else if ( posInCtu.x + 4 < pcv.maxCUWidth ) // is not at the last column of CTU But is last row of CTU
-          {
-            posC0 = posRB.offset( 4, 4 );
-            // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility
-          }
-          else if ( posInCtu.y + 4 < pcv.maxCUHeight ) // is not at the last row of CTU But is last column of CTU
-          {
-            posC0 = posRB.offset( 4, 4 );
-            C0Avail = true;
-          }
-          else //is the right bottom corner of CTU
-          {
-            posC0 = posRB.offset( 4, 4 );
-            // same as for last column but not last row
-          }
         }
 
         Mv        cColMv;
         int       refIdx = 0;
-        bool      bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_0, posC0, cColMv, refIdx );
+        bool      bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_0, posC0, cColMv, refIdx, false );
         if ( bExistMV )
         {
           mi[3].mv[0] = cColMv;
@@ -3537,7 +2625,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
 
         if ( slice.isInterB() )
         {
-          bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_1, posC0, cColMv, refIdx );
+          bExistMV = C0Avail && getColocatedMVP( pu, REF_PIC_LIST_1, posC0, cColMv, refIdx, false );
           if ( bExistMV )
           {
             mi[3].mv[1] = cColMv;
@@ -3565,7 +2653,7 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       for ( int idx = startIdx; idx < modelNum; idx++ )
       {
         int modelIdx = order[idx];
-        getAffineControlPointCand( pu, mi, isAvailable, model[modelIdx], modelIdx, verNum[modelIdx], affMrgCtx );
+        getAffineControlPointCand(pu, mi, isAvailable, model[modelIdx], ((modelIdx == 3) ? neighBcw[1] : neighBcw[0]), modelIdx, verNum[modelIdx], affMrgCtx);
         if ( affMrgCtx.numValidMergeCand != 0 && affMrgCtx.numValidMergeCand - 1 == mrgCandIdx )
         {
           return;
@@ -3625,24 +2713,27 @@ void PU::setAllAffineMvField( PredictionUnit &pu, MvField *mvField, RefPicList e
   pu.refIdx[eRefList] = mvField[0].refIdx;
 }
 
-void PU::setAllAffineMv( PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList, bool setHighPrec)
+void PU::setAllAffineMv(PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList, bool clipCPMVs)
 {
   int width  = pu.Y().width;
   int shift = MAX_CU_DEPTH;
-  if (setHighPrec)
+  if (clipCPMVs)
   {
-    affLT.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    affRT.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    affLB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+    affLT.mvCliptoStorageBitDepth();
+    affRT.mvCliptoStorageBitDepth();
+    if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+    {
+      affLB.mvCliptoStorageBitDepth();
+    }
   }
   int deltaMvHorX, deltaMvHorY, deltaMvVerX, deltaMvVerY;
-  deltaMvHorX = (affRT - affLT).getHor() << (shift - g_aucLog2[width]);
-  deltaMvHorY = (affRT - affLT).getVer() << (shift - g_aucLog2[width]);
+  deltaMvHorX = (affRT - affLT).getHor() << (shift - floorLog2(width));
+  deltaMvHorY = (affRT - affLT).getVer() << (shift - floorLog2(width));
   int height = pu.Y().height;
   if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
   {
-    deltaMvVerX = (affLB - affLT).getHor() << (shift - g_aucLog2[height]);
-    deltaMvVerY = (affLB - affLT).getVer() << (shift - g_aucLog2[height]);
+    deltaMvVerX = (affLB - affLT).getHor() << (shift - floorLog2(height));
+    deltaMvVerY = (affLB - affLT).getVer() << (shift - floorLog2(height));
   }
   else
   {
@@ -3660,12 +2751,22 @@ void PU::setAllAffineMv( PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPi
 
   MotionBuf mb = pu.getMotionBuf();
   int mvScaleTmpHor, mvScaleTmpVer;
+  const bool subblkMVSpreadOverLimit = InterPrediction::isSubblockVectorSpreadOverLimit( deltaMvHorX, deltaMvHorY, deltaMvVerX, deltaMvVerY, pu.interDir );
   for ( int h = 0; h < pu.Y().height; h += blockHeight )
   {
     for ( int w = 0; w < pu.Y().width; w += blockWidth )
     {
-      mvScaleTmpHor = mvScaleHor + deltaMvHorX * (halfBW + w) + deltaMvVerX * (halfBH + h);
-      mvScaleTmpVer = mvScaleVer + deltaMvHorY * (halfBW + w) + deltaMvVerY * (halfBH + h);
+      if ( !subblkMVSpreadOverLimit )
+      {
+        mvScaleTmpHor = mvScaleHor + deltaMvHorX * (halfBW + w) + deltaMvVerX * (halfBH + h);
+        mvScaleTmpVer = mvScaleVer + deltaMvHorY * (halfBW + w) + deltaMvVerY * (halfBH + h);
+
+      }
+      else
+      {
+        mvScaleTmpHor = mvScaleHor + deltaMvHorX * ( pu.Y().width >> 1 ) + deltaMvVerX * ( pu.Y().height >> 1 );
+        mvScaleTmpVer = mvScaleVer + deltaMvHorY * ( pu.Y().width >> 1 ) + deltaMvVerY * ( pu.Y().height >> 1 );
+      }
       roundAffineMv( mvScaleTmpHor, mvScaleTmpVer, shift );
       Mv curMv(mvScaleTmpHor, mvScaleTmpVer);
       curMv.clipToStorageBitDepth();
@@ -3685,93 +2786,15 @@ void PU::setAllAffineMv( PredictionUnit& pu, Mv affLT, Mv affRT, Mv affLB, RefPi
   pu.mvAffi[eRefList][2] = affLB;
 }
 
-static bool deriveScaledMotionTemporal( const Slice&      slice,
-                                        const Position&   colPos,
-                                        const Picture*    pColPic,
-                                        const RefPicList  eCurrRefPicList,
-                                        Mv&         cColMv,
-                                        const RefPicList  eFetchRefPicList)
-{
-  const MotionInfo &mi = pColPic->cs->getMotionInfo(colPos);
-  const Slice *pColSlice = nullptr;
-
-  for (const auto &pSlice : pColPic->slices)
-  {
-    if (pSlice->getIndependentSliceIdx() == mi.sliceIdx)
-    {
-      pColSlice = pSlice;
-      break;
-    }
-  }
-
-  CHECK(pColSlice == nullptr, "Couldn't find the colocated slice");
-
-  int iColPOC, iColRefPOC, iCurrPOC, iCurrRefPOC, iScale;
-  bool bAllowMirrorMV = true;
-  RefPicList eColRefPicList = slice.getCheckLDC() ? eCurrRefPicList : RefPicList(1 - eFetchRefPicList);
-  if (pColPic == slice.getRefPic(RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0), slice.getColRefIdx()))
-  {
-    eColRefPicList = eCurrRefPicList;   //67 -> disable, 64 -> enable
-    bAllowMirrorMV = false;
-  }
-
-  // Although it might make sense to keep the unavailable motion field per direction still be unavailable, I made the MV prediction the same way as in TMVP
-  // So there is an interaction between MV0 and MV1 of the corresponding blocks identified by TV.
-
-  // Grab motion and do necessary scaling.{{
-  iCurrPOC = slice.getPOC();
-
-  int iColRefIdx = mi.refIdx[eColRefPicList];
-
-  if (iColRefIdx < 0 && (slice.getCheckLDC() || bAllowMirrorMV))
-  {
-    eColRefPicList = RefPicList(1 - eColRefPicList);
-    iColRefIdx = mi.refIdx[eColRefPicList];
-
-    if (iColRefIdx < 0)
-    {
-      return false;
-    }
-  }
-
-  if (iColRefIdx >= 0 && slice.getNumRefIdx(eCurrRefPicList) > 0)
-  {
-    iColPOC = pColSlice->getPOC();
-    iColRefPOC = pColSlice->getRefPOC(eColRefPicList, iColRefIdx);
-    if (iColPOC == iColRefPOC)
-      return false;
-    ///////////////////////////////////////////////////////////////
-    // Set the target reference index to 0, may be changed later //
-    ///////////////////////////////////////////////////////////////
-    iCurrRefPOC = slice.getRefPic(eCurrRefPicList, 0)->getPOC();
-    // Scale the vector.
-    cColMv = mi.mv[eColRefPicList];
-    cColMv.setHor(roundMvComp(cColMv.getHor()));
-    cColMv.setVer(roundMvComp(cColMv.getVer()));
-    //pcMvFieldSP[2*iPartition + eCurrRefPicList].getMv();
-    // Assume always short-term for now
-    iScale = xGetDistScaleFactor(iCurrPOC, iCurrRefPOC, iColPOC, iColRefPOC);
-
-    if (iScale != 4096)
-    {
-
-      cColMv = cColMv.scaleMv(iScale);
-    }
-
-    return true;
-  }
-  return false;
-}
-
 void clipColPos(int& posX, int& posY, const PredictionUnit& pu)
 {
   Position puPos = pu.lumaPos();
-  int log2CtuSize = g_aucLog2[pu.cs->sps->getCTUSize()];
+  int log2CtuSize = floorLog2(pu.cs->sps->getCTUSize());
   int ctuX = ((puPos.x >> log2CtuSize) << log2CtuSize);
   int ctuY = ((puPos.y >> log2CtuSize) << log2CtuSize);
-  int horMax = std::min((int)pu.cs->sps->getPicWidthInLumaSamples() - 1, ctuX + (int)pu.cs->sps->getCTUSize() + 3);
+  int horMax = std::min( (int)pu.cs->pps->getPicWidthInLumaSamples() - 1, ctuX + (int)pu.cs->sps->getCTUSize() + 3 );
   int horMin = std::max((int)0, ctuX);
-  int verMax = std::min((int)pu.cs->sps->getPicHeightInLumaSamples() - 1, ctuY + (int)pu.cs->sps->getCTUSize() - 1);
+  int verMax = std::min( (int)pu.cs->pps->getPicHeightInLumaSamples() - 1, ctuY + (int)pu.cs->sps->getCTUSize() - 1 );
   int verMin = std::max((int)0, ctuY);
 
   posX = std::min(horMax, std::max(horMin, posX));
@@ -3788,29 +2811,22 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
 
   const Picture *pColPic = slice.getRefPic(RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0), slice.getColRefIdx());
   Mv cTMv;
-  RefPicList fetchRefPicList = RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0);
 
-  bool terminate = false;
-  for (unsigned currRefListId = 0; currRefListId < (slice.getSliceType() == B_SLICE ? 2 : 1) && !terminate; currRefListId++)
+  if ( count )
   {
-    if ( count )
+    if ( (mrgCtx.interDirNeighbours[0] & (1 << REF_PIC_LIST_0)) && slice.getRefPic( REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[REF_PIC_LIST_0].refIdx ) == pColPic )
     {
-      RefPicList currRefPicList = RefPicList(slice.getCheckLDC() ? (slice.getColFromL0Flag() ? currRefListId : 1 - currRefListId) : currRefListId);
-
-      if ((mrgCtx.interDirNeighbours[0] & (1 << currRefPicList)) && slice.getRefPic(currRefPicList, mrgCtx.mvFieldNeighbours[0 * 2 + currRefPicList].refIdx) == pColPic)
-      {
-        cTMv = mrgCtx.mvFieldNeighbours[0 * 2 + currRefPicList].mv;
-        terminate = true;
-        fetchRefPicList = currRefPicList;
-        break;
-      }
+      cTMv = mrgCtx.mvFieldNeighbours[REF_PIC_LIST_0].mv;
+    }
+    else if ( slice.isInterB() && (mrgCtx.interDirNeighbours[0] & (1 << REF_PIC_LIST_1)) && slice.getRefPic( REF_PIC_LIST_1, mrgCtx.mvFieldNeighbours[REF_PIC_LIST_1].refIdx ) == pColPic )
+    {
+      cTMv = mrgCtx.mvFieldNeighbours[REF_PIC_LIST_1].mv;
     }
   }
 
   ///////////////////////////////////////////////////////////////////////
   ////////          GET Initial Temporal Vector                  ////////
   ///////////////////////////////////////////////////////////////////////
-  int mvPrec = MV_FRACTIONAL_BITS_INTERNAL;
 
   Mv cTempVector = cTMv;
   bool  tempLICFlag = false;
@@ -3824,6 +2840,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
   int puWidth = numPartLine == 1 ? puSize.width : 1 << ATMVP_SUB_BLOCK_SIZE;
 
   Mv cColMv;
+  int refIdx = 0;
   // use coldir.
   bool     bBSlice = slice.isInterB();
 
@@ -3831,8 +2848,10 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
 
   bool found = false;
   cTempVector = cTMv;
-  int tempX = cTempVector.getHor() >> mvPrec;
-  int tempY = cTempVector.getVer() >> mvPrec;
+
+  cTempVector.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT);
+  int tempX = cTempVector.getHor();
+  int tempY = cTempVector.getVer();
 
   centerPos.x = puPos.x + (puSize.width >> 1) + tempX;
   centerPos.y = puPos.y + (puSize.height >> 1) + tempY;
@@ -3852,13 +2871,13 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
     {
       RefPicList  currRefPicList = RefPicList(currRefListId);
 
-      if (deriveScaledMotionTemporal(slice, centerPos, pColPic, currRefPicList, cColMv, fetchRefPicList))
+      if (getColocatedMVP(pu, currRefPicList, centerPos, cColMv, refIdx, true))
       {
         // set as default, for further motion vector field spanning
         mrgCtx.mvFieldNeighbours[(count << 1) + currRefListId].setMvField(cColMv, 0);
         mrgCtx.interDirNeighbours[count] |= (1 << currRefListId);
         LICFlag = tempLICFlag;
-        mrgCtx.GBiIdx[count] = GBI_DEFAULT;
+        mrgCtx.BcwIdx[count] = BCW_DEFAULT;
         found = true;
       }
       else
@@ -3905,7 +2924,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
         for (unsigned currRefListId = 0; currRefListId < (bBSlice ? 2 : 1); currRefListId++)
         {
           RefPicList currRefPicList = RefPicList(currRefListId);
-          if (deriveScaledMotionTemporal(slice, colPos, pColPic, currRefPicList, cColMv, fetchRefPicList))
+          if (getColocatedMVP(pu, currRefPicList, colPos, cColMv, refIdx, true))
           {
             mi.refIdx[currRefListId] = 0;
             mi.mv[currRefListId] = cColMv;
@@ -3935,7 +2954,7 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
     }
   }
   return true;
-  }
+}
 
 void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx )
 {
@@ -3954,6 +2973,7 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx )
     if( mi.isInter )
     {
       mi.interDir = pu.interDir;
+      mi.useAltHpelIf = pu.cu->imv == IMV_HPEL;
 
       for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
       {
@@ -4026,469 +3046,147 @@ void PU::applyImv( PredictionUnit& pu, MergeCtx &mrgCtx, InterPrediction *interP
   {
     if( pu.interDir != 2 /* PRED_L1 */ )
     {
-      pu.mvd[0].changePrecisionAmvr( pu.cu->imv, MV_PRECISION_QUARTER);
-      unsigned mvp_idx = pu.mvpIdx[0];
-      AMVPInfo amvpInfo;
-      if (CU::isIBC(*pu.cu))
-      {
-        PU::fillIBCMvpCand(pu, amvpInfo);
-      }
-      else
-      PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[0], amvpInfo);
-      pu.mvpNum[0] = amvpInfo.numCand;
-      pu.mvpIdx[0] = mvp_idx;
-      pu.mv    [0] = amvpInfo.mvCand[mvp_idx] + pu.mvd[0];
-      pu.mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    }
-
-    if (pu.interDir != 1 /* PRED_L0 */)
-    {
-      if( !( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 ) && pu.cu->imv )/* PRED_BI */
-      {
-        pu.mvd[1].changePrecisionAmvr(pu.cu->imv, MV_PRECISION_QUARTER);
-      }
-      unsigned mvp_idx = pu.mvpIdx[1];
-      AMVPInfo amvpInfo;
-      PU::fillMvpCand(pu, REF_PIC_LIST_1, pu.refIdx[1], amvpInfo);
-      pu.mvpNum[1] = amvpInfo.numCand;
-      pu.mvpIdx[1] = mvp_idx;
-      pu.mv    [1] = amvpInfo.mvCand[mvp_idx] + pu.mvd[1];
-      pu.mv[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    }
-  }
-  else
-  {
-    // this function is never called for merge
-    THROW("unexpected");
-    PU::getInterMergeCandidates ( pu, mrgCtx
-      , 0
-    );
-
-    mrgCtx.setMergeInfo( pu, pu.mergeIdx );
-  }
-
-  PU::spanMotionInfo( pu, mrgCtx );
-}
-
-bool PU::isBiPredFromDifferentDir( const PredictionUnit& pu )
-{
-  if ( pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 )
-  {
-    const int iPOC0 = pu.cu->slice->getRefPOC( REF_PIC_LIST_0, pu.refIdx[0] );
-    const int iPOC1 = pu.cu->slice->getRefPOC( REF_PIC_LIST_1, pu.refIdx[1] );
-    const int iPOC  = pu.cu->slice->getPOC();
-    if ( (iPOC - iPOC0)*(iPOC - iPOC1) < 0 )
-    {
-      return true;
-    }
-  }
-
-  return false;
-}
-bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu)
-{
-  if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
-  {
-    const int poc0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]);
-    const int poc1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]);
-    const int poc = pu.cu->slice->getPOC();
-    if ((poc - poc0)*(poc - poc1) < 0)
-    {
-      if (abs(poc - poc0) == abs(poc - poc1))
-      {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx )
-{
-  if( PU::isBipredRestriction( pu ) )
-  {
-    for( uint32_t mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; ++mergeCand )
-    {
-      if( mergeCtx.interDirNeighbours[ mergeCand ] == 3 )
-      {
-        mergeCtx.interDirNeighbours[ mergeCand ] = 1;
-        mergeCtx.mvFieldNeighbours[( mergeCand << 1 ) + 1].setMvField( Mv( 0, 0 ), -1 );
-        mergeCtx.GBiIdx[mergeCand] = GBI_DEFAULT;
-      }
-    }
-  }
-}
-
-void PU::restrictBiPredMergeCandsOne(PredictionUnit &pu)
-{
-  if (PU::isBipredRestriction(pu))
-  {
-    if (pu.interDir == 3)
-    {
-      pu.interDir = 1;
-      pu.refIdx[1] = -1;
-      pu.mv[1] = Mv(0, 0);
-      pu.cu->GBiIdx = GBI_DEFAULT;
-    }
-  }
-}
-
-void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx )
-{
-  const CodingStructure &cs  = *pu.cs;
-  const Slice &slice         = *pu.cs->slice;
-  const int32_t maxNumMergeCand = TRIANGLE_MAX_NUM_UNI_CANDS;
-  triangleMrgCtx.numValidMergeCand = 0;
-
-  for( int32_t i = 0; i < maxNumMergeCand; i++ )
-  {
-    triangleMrgCtx.interDirNeighbours[i] = 0;
-    triangleMrgCtx.mrgTypeNeighbours [i] = MRG_TYPE_DEFAULT_N;
-    triangleMrgCtx.mvFieldNeighbours[(i << 1)    ].refIdx = NOT_VALID;
-    triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].refIdx = NOT_VALID;
-    triangleMrgCtx.mvFieldNeighbours[(i << 1)    ].mv = Mv();
-    triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].mv = Mv();
-  }
-
-  MotionInfo candidate[TRIANGLE_MAX_NUM_CANDS_MEM];
-  int32_t candCount = 0;
-
-  const Position posLT = pu.Y().topLeft();
-  const Position posRT = pu.Y().topRight();
-  const Position posLB = pu.Y().bottomLeft();
-
-  MotionInfo miAbove, miLeft, miAboveLeft, miAboveRight, miBelowLeft;
-
-  //left
-  const PredictionUnit* puLeft = cs.getPURestricted( posLB.offset( -1, 0 ), pu, pu.chType );
-  const bool isAvailableA1 = puLeft && isDiffMER( pu, *puLeft ) && pu.cu != puLeft->cu && CU::isInter( *puLeft->cu )
-    ;
-  if( isAvailableA1 )
-  {
-    miLeft = puLeft->getMotionInfo( posLB.offset(-1, 0) );
-    candidate[candCount].isInter   = true;
-    candidate[candCount].interDir  = miLeft.interDir;
-    candidate[candCount].mv[0]     = miLeft.mv[0];
-    candidate[candCount].mv[1]     = miLeft.mv[1];
-    candidate[candCount].refIdx[0] = miLeft.refIdx[0];
-    candidate[candCount].refIdx[1] = miLeft.refIdx[1];
-    candCount++;
-  }
-
-  // above
-  const PredictionUnit *puAbove = cs.getPURestricted( posRT.offset( 0, -1 ), pu, pu.chType );
-  bool isAvailableB1 = puAbove && isDiffMER( pu, *puAbove ) && pu.cu != puAbove->cu && CU::isInter( *puAbove->cu )
-    ;
-  if( isAvailableB1 )
-  {
-    miAbove = puAbove->getMotionInfo( posRT.offset( 0, -1 ) );
-
-    if( !isAvailableA1 || ( miAbove != miLeft ) )
-    {
-      candidate[candCount].isInter   = true;
-      candidate[candCount].interDir  = miAbove.interDir;
-      candidate[candCount].mv[0]     = miAbove.mv[0];
-      candidate[candCount].mv[1]     = miAbove.mv[1];
-      candidate[candCount].refIdx[0] = miAbove.refIdx[0];
-      candidate[candCount].refIdx[1] = miAbove.refIdx[1];
-      candCount++;
-    }
-  }
-
-  // above right
-  const PredictionUnit *puAboveRight = cs.getPURestricted( posRT.offset( 1, -1 ), pu, pu.chType );
-  bool isAvailableB0 = puAboveRight && isDiffMER( pu, *puAboveRight ) && CU::isInter( *puAboveRight->cu )
-    ;
-
-  if( isAvailableB0 )
-  {
-    miAboveRight = puAboveRight->getMotionInfo( posRT.offset( 1, -1 ) );
-
-    if( ( !isAvailableB1 || ( miAbove != miAboveRight ) ) && ( !isAvailableA1 || ( miLeft != miAboveRight ) ) )
-    {
-      candidate[candCount].isInter   = true;
-      candidate[candCount].interDir  = miAboveRight.interDir;
-      candidate[candCount].mv[0]     = miAboveRight.mv[0];
-      candidate[candCount].mv[1]     = miAboveRight.mv[1];
-      candidate[candCount].refIdx[0] = miAboveRight.refIdx[0];
-      candidate[candCount].refIdx[1] = miAboveRight.refIdx[1];
-      candCount++;
-    }
-  }
-
-  //left bottom
-  const PredictionUnit *puLeftBottom = cs.getPURestricted( posLB.offset( -1, 1 ), pu, pu.chType );
-  bool isAvailableA0 = puLeftBottom && isDiffMER( pu, *puLeftBottom ) && CU::isInter( *puLeftBottom->cu )
-    ;
-  if( isAvailableA0 )
-  {
-    miBelowLeft = puLeftBottom->getMotionInfo( posLB.offset( -1, 1 ) );
-
-    if( ( !isAvailableA1 || ( miBelowLeft != miLeft ) ) && ( !isAvailableB1 || ( miBelowLeft != miAbove ) ) && ( !isAvailableB0 || ( miBelowLeft != miAboveRight ) ) )
-    {
-      candidate[candCount].isInter   = true;
-      candidate[candCount].interDir  = miBelowLeft.interDir;
-      candidate[candCount].mv[0]     = miBelowLeft.mv[0];
-      candidate[candCount].mv[1]     = miBelowLeft.mv[1];
-      candidate[candCount].refIdx[0] = miBelowLeft.refIdx[0];
-      candidate[candCount].refIdx[1] = miBelowLeft.refIdx[1];
-      candCount++;
-    }
-  }
-
-  // above left
-  const PredictionUnit *puAboveLeft = cs.getPURestricted( posLT.offset( -1, -1 ), pu, pu.chType );
-  bool isAvailableB2 = puAboveLeft && isDiffMER( pu, *puAboveLeft ) && CU::isInter( *puAboveLeft->cu )
-    ;
-
-  if( isAvailableB2 )
-  {
-    miAboveLeft = puAboveLeft->getMotionInfo( posLT.offset( -1, -1 ) );
-
-    if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) && ( !isAvailableA0 || ( miBelowLeft != miAboveLeft ) ) && ( !isAvailableB0 || ( miAboveRight != miAboveLeft ) ) )
-    {
-      candidate[candCount].isInter   = true;
-      candidate[candCount].interDir  = miAboveLeft.interDir;
-      candidate[candCount].mv[0]     = miAboveLeft.mv[0];
-      candidate[candCount].mv[1]     = miAboveLeft.mv[1];
-      candidate[candCount].refIdx[0] = miAboveLeft.refIdx[0];
-      candidate[candCount].refIdx[1] = miAboveLeft.refIdx[1];
-      candCount++;
-    }
-  }
-
-  if( slice.getEnableTMVPFlag() )
-  {
-    Position posRB = pu.Y().bottomRight().offset(-3, -3);
-
-    const PreCalcValues& pcv = *cs.pcv;
-
-    Position posC0;
-    Position posC1 = pu.Y().center();
-    bool isAvailableC0 = false;
-    bool isAvailableC1 = (posC1.x < pcv.lumaWidth) && (posC1.y < pcv.lumaHeight);
-
-    if (((posRB.x + pcv.minCUWidth) < pcv.lumaWidth) && ((posRB.y + pcv.minCUHeight) < pcv.lumaHeight))
-    {
-      Position posInCtu( posRB.x & pcv.maxCUWidthMask, posRB.y & pcv.maxCUHeightMask );
-
-      if( ( posInCtu.x + 4 < pcv.maxCUWidth ) &&           // is not at the last column of CTU
-          ( posInCtu.y + 4 < pcv.maxCUHeight ) )           // is not at the last row    of CTU
-      {
-        posC0 = posRB.offset( 4, 4 );
-        isAvailableC0 = true;
-      }
-      else if( posInCtu.x + 4 < pcv.maxCUWidth )           // is not at the last column of CTU But is last row of CTU
-      {
-        posC0 = posRB.offset( 4, 4 );
-        // in the reference the CTU address is not set - thus probably resulting in no using this C0 possibility
-      }
-      else if( posInCtu.y + 4 < pcv.maxCUHeight )          // is not at the last row of CTU But is last column of CTU
-      {
-        posC0 = posRB.offset( 4, 4 );
-        isAvailableC0 = true;
-      }
-      else //is the right bottom corner of CTU
+      pu.mvd[0].changeTransPrecAmvr2Internal(pu.cu->imv);
+      unsigned mvp_idx = pu.mvpIdx[0];
+      AMVPInfo amvpInfo;
+      if (CU::isIBC(*pu.cu))
       {
-        posC0 = posRB.offset( 4, 4 );
-        // same as for last column but not last row
+        PU::fillIBCMvpCand(pu, amvpInfo);
       }
+      else
+      PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[0], amvpInfo);
+      pu.mvpNum[0] = amvpInfo.numCand;
+      pu.mvpIdx[0] = mvp_idx;
+      pu.mv    [0] = amvpInfo.mvCand[mvp_idx] + pu.mvd[0];
+      pu.mv[0].mvCliptoStorageBitDepth();
     }
 
-    // C0
-    Mv        cColMv;
-    int32_t   refIdx     = 0;
-    bool      existMV    = ( isAvailableC0 && getColocatedMVP( pu, REF_PIC_LIST_0, posC0, cColMv, refIdx ) );
-    MotionInfo temporalMv;
-    temporalMv.interDir  = 0;
-    if( existMV )
-    {
-      temporalMv.isInter   = true;
-      temporalMv.interDir |= 1;
-      temporalMv.mv[0]     = cColMv;
-      temporalMv.refIdx[0] = refIdx;
-    }
-    existMV = ( isAvailableC0 && getColocatedMVP( pu, REF_PIC_LIST_1, posC0, cColMv, refIdx ) );
-    if( existMV )
+    if (pu.interDir != 1 /* PRED_L0 */)
     {
-      temporalMv.interDir |= 2;
-      temporalMv.mv[1]     = cColMv;
-      temporalMv.refIdx[1] = refIdx;
+      if( !( pu.cu->cs->picHeader->getMvdL1ZeroFlag() && pu.interDir == 3 ) && pu.cu->imv )/* PRED_BI */
+      {
+        pu.mvd[1].changeTransPrecAmvr2Internal(pu.cu->imv);
+      }
+      unsigned mvp_idx = pu.mvpIdx[1];
+      AMVPInfo amvpInfo;
+      PU::fillMvpCand(pu, REF_PIC_LIST_1, pu.refIdx[1], amvpInfo);
+      pu.mvpNum[1] = amvpInfo.numCand;
+      pu.mvpIdx[1] = mvp_idx;
+      pu.mv    [1] = amvpInfo.mvCand[mvp_idx] + pu.mvd[1];
+      pu.mv[1].mvCliptoStorageBitDepth();
     }
+  }
+  else
+  {
+    // this function is never called for merge
+    THROW("unexpected");
+    PU::getInterMergeCandidates ( pu, mrgCtx
+      , 0
+    );
 
-    if( temporalMv.interDir != 0 )
-    {
-      candidate[candCount].isInter   = true;
-      candidate[candCount].interDir  = temporalMv.interDir;
-      candidate[candCount].mv[0]     = temporalMv.mv[0];
-      candidate[candCount].mv[1]     = temporalMv.mv[1];
-      candidate[candCount].refIdx[0] = temporalMv.refIdx[0];
-      candidate[candCount].refIdx[1] = temporalMv.refIdx[1];
-      candCount++;
-    }
+    mrgCtx.setMergeInfo( pu, pu.mergeIdx );
+  }
+
+  PU::spanMotionInfo( pu, mrgCtx );
+}
 
-    // C1
-    temporalMv.interDir = 0;
-    existMV    = isAvailableC1 && getColocatedMVP(pu, REF_PIC_LIST_0, posC1, cColMv, refIdx );
-    if( existMV )
-    {
-      temporalMv.isInter   = true;
-      temporalMv.interDir |= 1;
-      temporalMv.mv[0]     = cColMv;
-      temporalMv.refIdx[0] = refIdx;
-    }
-    existMV    = isAvailableC1 && getColocatedMVP(pu, REF_PIC_LIST_1, posC1, cColMv, refIdx );
-    if( existMV )
-    {
-      temporalMv.interDir |= 2;
-      temporalMv.mv[1]     = cColMv;
-      temporalMv.refIdx[1] = refIdx;
-    }
 
-    if( temporalMv.interDir != 0 )
+bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu)
+{
+  if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
+  {
+    if (pu.cu->slice->getRefPic(REF_PIC_LIST_0, pu.refIdx[0])->longTerm
+      || pu.cu->slice->getRefPic(REF_PIC_LIST_1, pu.refIdx[1])->longTerm)
     {
-      candidate[candCount].isInter   = true;
-      candidate[candCount].interDir  = temporalMv.interDir;
-      candidate[candCount].mv[0]     = temporalMv.mv[0];
-      candidate[candCount].mv[1]     = temporalMv.mv[1];
-      candidate[candCount].refIdx[0] = temporalMv.refIdx[0];
-      candidate[candCount].refIdx[1] = temporalMv.refIdx[1];
-      candCount++;
+      return false;
     }
-  }
-  // put uni-prediction candidate to the triangle candidate list
-  for( int32_t i = 0; i < candCount; i++ )
-  {
-    if( candidate[i].interDir != 3 )
+    const int poc0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]);
+    const int poc1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]);
+    const int poc = pu.cu->slice->getPOC();
+    if ((poc - poc0)*(poc - poc1) < 0)
     {
-      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = candidate[i].interDir;
-      triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].mv = candidate[i].mv[0];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = candidate[i].mv[1];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].refIdx = candidate[i].refIdx[0];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = candidate[i].refIdx[1];
-      triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx);
-      if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS )
+      if (abs(poc - poc0) == abs(poc - poc1))
       {
-        return;
+        return true;
       }
     }
   }
+  return false;
+}
 
-  // put L0 mv of bi-prediction candidate to the triangle candidate list
-  for( int32_t i = 0; i < candCount; i++ )
+void PU::restrictBiPredMergeCandsOne(PredictionUnit &pu)
+{
+  if (PU::isBipredRestriction(pu))
   {
-    if( candidate[i].interDir == 3 )
+    if (pu.interDir == 3)
     {
-      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1;
-      triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].mv = candidate[i].mv[0];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = Mv(0, 0);
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].refIdx = candidate[i].refIdx[0];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = -1;
-      triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx);
-      if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS )
-      {
-        return;
-      }
+      pu.interDir = 1;
+      pu.refIdx[1] = -1;
+      pu.mv[1] = Mv(0, 0);
+      pu.cu->BcwIdx = BCW_DEFAULT;
     }
   }
+}
+
+void PU::getTriangleMergeCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx )
+{
+  MergeCtx tmpMergeCtx;
+
+  const Slice &slice = *pu.cs->slice;
+  const uint32_t maxNumMergeCand = slice.getPicHeader()->getMaxNumMergeCand();
 
-  // put L1 mv of bi-prediction candidate to the triangle candidate list
-  for( int32_t i = 0; i < candCount; i++ )
+  triangleMrgCtx.numValidMergeCand = 0;
+
+  for (int32_t i = 0; i < TRIANGLE_MAX_NUM_UNI_CANDS; i++)
   {
-    if( candidate[i].interDir == 3 )
-    {
-      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 2;
-      triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].mv = Mv(0, 0);
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = candidate[i].mv[1];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].refIdx = -1;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = candidate[i].refIdx[1];
-      triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx);
-      if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS )
-      {
-        return;
-      }
-    }
+    triangleMrgCtx.BcwIdx[i] = BCW_DEFAULT;
+    triangleMrgCtx.interDirNeighbours[i] = 0;
+    triangleMrgCtx.mrgTypeNeighbours[i] = MRG_TYPE_DEFAULT_N;
+    triangleMrgCtx.mvFieldNeighbours[(i << 1)].refIdx = NOT_VALID;
+    triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].refIdx = NOT_VALID;
+    triangleMrgCtx.mvFieldNeighbours[(i << 1)].mv = Mv();
+    triangleMrgCtx.mvFieldNeighbours[(i << 1) + 1].mv = Mv();
+    triangleMrgCtx.useAltHpelIf[i] = false;
   }
 
-  // put average of L0 and L1 mvs of bi-prediction candidate to the triangle candidate list
-  for( int32_t i = 0; i < candCount; i++ )
+  PU::getInterMergeCandidates(pu, tmpMergeCtx, 0);
+
+  for (int32_t i = 0; i < maxNumMergeCand; i++)
   {
-    if( candidate[i].interDir == 3 )
+    int parity = i & 1;
+    if (tmpMergeCtx.interDirNeighbours[i] & (0x01 + parity))
     {
-      int32_t curPicPoc   = slice.getPOC();
-      int32_t refPicPocL0 = slice.getRefPOC(REF_PIC_LIST_0, candidate[i].refIdx[0]);
-      int32_t refPicPocL1 = slice.getRefPOC(REF_PIC_LIST_1, candidate[i].refIdx[1]);
-      Mv aveMv = candidate[i].mv[1];
-      int32_t distscale = xGetDistScaleFactor( curPicPoc, refPicPocL0, curPicPoc, refPicPocL1 );
-      if( distscale != 4096 )
-      {
-        aveMv = aveMv.scaleMv( distscale ); // scaling to L0
-      }
-      aveMv = aveMv + candidate[i].mv[0];
-      roundAffineMv(aveMv.hor, aveMv.ver, 1);
-      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1;
-      triangleMrgCtx.mrgTypeNeighbours [triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].mv = aveMv;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = Mv(0, 0);
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1)    ].refIdx = candidate[i].refIdx[0];
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = -1;
-      triangleMrgCtx.numValidMergeCand += isUniqueTriangleCandidates(pu, triangleMrgCtx);
-      if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS )
+      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1 + parity;
+      triangleMrgCtx.mrgTypeNeighbours[triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N;
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].mv = Mv(0, 0);
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].mv;
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].refIdx = -1;
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].refIdx;
+      triangleMrgCtx.numValidMergeCand++;
+      if (triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS)
       {
         return;
       }
+      continue;
     }
-  }
 
-  // fill with Mv(0, 0)
-  int32_t numRefIdx = std::min( slice.getNumRefIdx(REF_PIC_LIST_0), slice.getNumRefIdx(REF_PIC_LIST_1) );
-  int32_t cnt = 0;
-  while( triangleMrgCtx.numValidMergeCand < TRIANGLE_MAX_NUM_UNI_CANDS )
-  {
-    if( cnt < numRefIdx )
+    if (tmpMergeCtx.interDirNeighbours[i] & (0x02 - parity))
     {
-      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 1;
-      triangleMrgCtx.mvFieldNeighbours[triangleMrgCtx.numValidMergeCand << 1].setMvField(Mv(0, 0), cnt);
-      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + 1].refIdx = NOT_VALID;
-      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + 1].mv = Mv();
+      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 2 - parity;
+      triangleMrgCtx.mrgTypeNeighbours[triangleMrgCtx.numValidMergeCand] = MRG_TYPE_DEFAULT_N;
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].mv;
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].mv = Mv(0, 0);
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + !parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].refIdx;
+      triangleMrgCtx.mvFieldNeighbours[(triangleMrgCtx.numValidMergeCand << 1) + parity].refIdx = -1;
       triangleMrgCtx.numValidMergeCand++;
-
-      if( triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS )
+      if (triangleMrgCtx.numValidMergeCand == TRIANGLE_MAX_NUM_UNI_CANDS)
       {
         return;
       }
-
-      triangleMrgCtx.interDirNeighbours[triangleMrgCtx.numValidMergeCand] = 2;
-      triangleMrgCtx.mvFieldNeighbours [(triangleMrgCtx.numValidMergeCand << 1) + 1 ].setMvField(Mv(0, 0), cnt);
-      triangleMrgCtx.mvFieldNeighbours[triangleMrgCtx.numValidMergeCand << 1].refIdx = NOT_VALID;
-      triangleMrgCtx.mvFieldNeighbours[triangleMrgCtx.numValidMergeCand << 1].mv = Mv();
-      triangleMrgCtx.numValidMergeCand++;
-
-      cnt = (cnt + 1) % numRefIdx;
-    }
-  }
-}
-
-bool PU::isUniqueTriangleCandidates( const PredictionUnit &pu, MergeCtx& triangleMrgCtx )
-{
-  int newCand = triangleMrgCtx.numValidMergeCand;
-  for( int32_t i = 0; i < newCand; i++ )
-  {
-    int32_t predFlagCur  = triangleMrgCtx.interDirNeighbours[i] == 1 ? 0 : 1;
-    int32_t predFlagNew  = triangleMrgCtx.interDirNeighbours[newCand] == 1 ? 0 : 1;
-    int32_t refPicPocCur = pu.cs->slice->getRefPOC( (RefPicList)predFlagCur, triangleMrgCtx.mvFieldNeighbours[(i << 1) + predFlagCur].refIdx );
-    int32_t refPicPocNew = pu.cs->slice->getRefPOC( (RefPicList)predFlagNew, triangleMrgCtx.mvFieldNeighbours[(newCand << 1) + predFlagNew].refIdx);
-    if( refPicPocCur == refPicPocNew && triangleMrgCtx.mvFieldNeighbours[(i << 1) + predFlagCur].mv == triangleMrgCtx.mvFieldNeighbours[(newCand << 1) + predFlagNew].mv )
-    {
-      return false;
     }
   }
-  return true;
 }
 
-
 void PU::spanTriangleMotionInfo( PredictionUnit &pu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
 {
   pu.triangleSplitDir = splitDir;
@@ -4518,49 +3216,23 @@ void PU::spanTriangleMotionInfo( PredictionUnit &pu, MergeCtx &triangleMrgCtx, c
   }
   else if( triangleMrgCtx.interDirNeighbours[candIdx0] == 1 && triangleMrgCtx.interDirNeighbours[candIdx1] == 1 )
   {
-    int32_t refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_0, triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx ), REF_PIC_LIST_1 );
-    if( refIdx != -1 )
-    {
-      biMv.interDir  = 3;
-      biMv.mv[0]     = triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv;
-      biMv.mv[1]     = triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv;
-      biMv.refIdx[0] = triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx;
-      biMv.refIdx[1] = refIdx;
-    }
-    else
-    {
-      refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_0, triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx), REF_PIC_LIST_1 );
-      biMv.interDir  = ( refIdx != -1 ) ? 3 : 1;
-      biMv.mv[0]     = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv : triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv;
-      biMv.mv[1]     = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv : Mv(0, 0);
-      biMv.refIdx[0] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx : triangleMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx;
-      biMv.refIdx[1] = ( refIdx != -1 ) ? refIdx : -1;
-    }
+    biMv.interDir = 1;
+    biMv.mv[0] = triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv;
+    biMv.mv[1] = Mv(0, 0);
+    biMv.refIdx[0] = triangleMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx;
+    biMv.refIdx[1] = -1;
   }
   else if( triangleMrgCtx.interDirNeighbours[candIdx0] == 2 && triangleMrgCtx.interDirNeighbours[candIdx1] == 2 )
   {
-    int32_t refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_1, triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx ), REF_PIC_LIST_0 );
-    if( refIdx != -1 )
-    {
-      biMv.interDir  = 3;
-      biMv.mv[0]     = triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv;
-      biMv.mv[1]     = triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv;
-      biMv.refIdx[0] = refIdx;
-      biMv.refIdx[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx;
-    }
-    else
-    {
-      refIdx = mappingRefPic( pu, pu.cs->slice->getRefPOC( REF_PIC_LIST_1, triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx ), REF_PIC_LIST_0 );
-      biMv.interDir  = ( refIdx != -1 ) ? 3 : 2;
-      biMv.mv[0]     = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv : Mv(0, 0);
-      biMv.mv[1]     = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv : triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv;
-      biMv.refIdx[0] = ( refIdx != -1 ) ? refIdx : -1;
-      biMv.refIdx[1] = ( refIdx != -1 ) ? triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx : triangleMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx;
-    }
+    biMv.interDir = 2;
+    biMv.mv[0] = Mv(0, 0);
+    biMv.mv[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv;
+    biMv.refIdx[0] = -1;
+    biMv.refIdx[1] = triangleMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx;
   }
 
-  int32_t idxW  = (int32_t)(g_aucLog2[pu.lwidth() ] - MIN_CU_LOG2);
-  int32_t idxH  = (int32_t)(g_aucLog2[pu.lheight()] - MIN_CU_LOG2);
+  int32_t idxW  = (int32_t)(floorLog2(pu.lwidth() ) - MIN_CU_LOG2);
+  int32_t idxH  = (int32_t)(floorLog2(pu.lheight()) - MIN_CU_LOG2);
   for( int32_t y = 0; y < mb.height; y++ )
   {
     for( int32_t x = 0; x < mb.width; x++ )
@@ -4613,69 +3285,6 @@ int32_t PU::mappingRefPic( const PredictionUnit &pu, int32_t refPicPoc, bool tar
   return -1;
 }
 
-void CU::resetMVDandMV2Int( CodingUnit& cu, InterPrediction *interPred )
-{
-  for( auto &pu : CU::traversePUs( cu ) )
-  {
-    MergeCtx mrgCtx;
-
-    if( !pu.mergeFlag )
-    {
-      if( pu.interDir != 2 /* PRED_L1 */ )
-      {
-        Mv mv        = pu.mv[0];
-        Mv mvPred;
-        AMVPInfo amvpInfo;
-        if (CU::isIBC(*pu.cu))
-          PU::fillIBCMvpCand(pu, amvpInfo);
-        else
-        PU::fillMvpCand(pu, REF_PIC_LIST_0, pu.refIdx[0], amvpInfo);
-        pu.mvpNum[0] = amvpInfo.numCand;
-
-        mvPred       = amvpInfo.mvCand[pu.mvpIdx[0]];
-        mv.roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, cu.imv);
-        pu.mv[0]     = mv;
-        Mv mvDiff    = mv - mvPred;
-        pu.mvd[0]    = mvDiff;
-      }
-      if( pu.interDir != 1 /* PRED_L0 */ )
-      {
-        Mv mv        = pu.mv[1];
-        Mv mvPred;
-        AMVPInfo amvpInfo;
-        PU::fillMvpCand(pu, REF_PIC_LIST_1, pu.refIdx[1], amvpInfo);
-        pu.mvpNum[1] = amvpInfo.numCand;
-
-        mvPred       = amvpInfo.mvCand[pu.mvpIdx[1]];
-        mv.roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, cu.imv);
-        Mv mvDiff    = mv - mvPred;
-
-        if( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ )
-        {
-          pu.mvd[1] = Mv();
-          mv = mvPred;
-        }
-        else
-        {
-          pu.mvd[1] = mvDiff;
-        }
-        pu.mv[1] = mv;
-      }
-
-    }
-    else
-    {
-        PU::getInterMergeCandidates ( pu, mrgCtx
-          , 0
-        );
-
-        mrgCtx.setMergeInfo( pu, pu.mergeIdx );
-    }
-
-    PU::spanMotionInfo( pu, mrgCtx );
-  }
-}
-
 bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu )
 {
   bool bNonZeroMvd = false;
@@ -4691,7 +3300,7 @@ bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu )
       }
       if( pu.interDir != 1 /* PRED_L0 */ )
       {
-        if( !pu.cu->cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
+        if( !pu.cu->cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
         {
           bNonZeroMvd |= pu.mvd[REF_PIC_LIST_1].getHor() != 0;
           bNonZeroMvd |= pu.mvd[REF_PIC_LIST_1].getVer() != 0;
@@ -4727,7 +3336,7 @@ bool CU::hasSubCUNonZeroAffineMVd( const CodingUnit& cu )
 
       if ( pu.interDir != 1 /* PRED_L0 */ )
       {
-        if ( !pu.cu->cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
+        if ( !pu.cu->cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
         {
           for ( int i = 0; i < ( cu.affineType == AFFINEMODEL_6PARAM ? 3 : 2 ); i++ )
           {
@@ -4742,30 +3351,6 @@ bool CU::hasSubCUNonZeroAffineMVd( const CodingUnit& cu )
   return nonZeroAffineMvd;
 }
 
-int CU::getMaxNeighboriMVCandNum( const CodingStructure& cs, const Position& pos )
-{
-  const int  numDefault     = 0;
-  int        maxImvNumCand  = 0;
-
-  // Get BCBP of left PU
-#if HEVC_TILES_WPP
-  const CodingUnit *cuLeft  = cs.getCURestricted( pos.offset( -1, 0 ), cs.slice->getIndependentSliceIdx(), cs.picture->tileMap->getTileIdxMap( pos ), CH_L );
-#else
-  const CodingUnit *cuLeft  = cs.getCURestricted( pos.offset( -1, 0 ), cs.slice->getIndependentSliceIdx(), CH_L );
-#endif
-  maxImvNumCand = ( cuLeft ) ? cuLeft->imvNumCand : numDefault;
-
-  // Get BCBP of above PU
-#if HEVC_TILES_WPP
-  const CodingUnit *cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), cs.slice->getIndependentSliceIdx(), cs.picture->tileMap->getTileIdxMap( pos ), CH_L );
-#else
-  const CodingUnit *cuAbove = cs.getCURestricted( pos.offset( 0, -1 ), cs.slice->getIndependentSliceIdx(), CH_L );
-#endif
-  maxImvNumCand = std::max( maxImvNumCand, ( cuAbove ) ? cuAbove->imvNumCand : numDefault );
-
-  return maxImvNumCand;
-}
-
 uint8_t CU::getSbtInfo( uint8_t idx, uint8_t pos )
 {
   return ( pos << 4 ) + ( idx << 0 );
@@ -4856,11 +3441,6 @@ uint8_t CU::numSbtModeRdo( uint8_t sbtAllowed )
   return sum;
 }
 
-bool CU::isMtsMode( const uint8_t sbtInfo )
-{
-  return getSbtIdx( sbtInfo ) == SBT_OFF_MTS;
-}
-
 bool CU::isSbtMode( const uint8_t sbtInfo )
 {
   uint8_t sbtIdx = getSbtIdx( sbtInfo );
@@ -4879,11 +3459,34 @@ bool CU::isSameSbtSize( const uint8_t sbtInfo1, const uint8_t sbtInfo2 )
     return false;
 }
 
-bool CU::isGBiIdxCoded( const CodingUnit &cu )
+bool CU::isPredRegDiffFromTB(const CodingUnit &cu, const ComponentID compID)
+{
+  return (compID == COMPONENT_Y)
+    && (cu.ispMode == VER_INTRA_SUBPARTITIONS &&
+      CU::isMinWidthPredEnabledForBlkSize(cu.blocks[compID].width, cu.blocks[compID].height)
+      );
+}
+
+bool CU::isMinWidthPredEnabledForBlkSize(const int w, const int h)
+{
+  return ((w == 8 && h > 4) || w == 4);
+}
+
+bool CU::isFirstTBInPredReg(const CodingUnit& cu, const ComponentID compID, const CompArea &area)
+{
+  return (compID == COMPONENT_Y) && cu.ispMode && ((area.topLeft().x - cu.Y().topLeft().x) % PRED_REG_MIN_WIDTH == 0);
+}
+
+void CU::adjustPredArea(CompArea &area)
+{
+  area.width = std::max<int>(PRED_REG_MIN_WIDTH, area.width);
+}
+
+bool CU::isBcwIdxCoded( const CodingUnit &cu )
 {
-  if( cu.cs->sps->getUseGBi() == false )
+  if( cu.cs->sps->getUseBcw() == false )
   {
-    CHECK(cu.GBiIdx != GBI_DEFAULT, "Error: cu.GBiIdx != GBI_DEFAULT");
+    CHECK(cu.BcwIdx != BCW_DEFAULT, "Error: cu.BcwIdx != BCW_DEFAULT");
     return false;
   }
 
@@ -4897,7 +3500,7 @@ bool CU::isGBiIdxCoded( const CodingUnit &cu )
     return false;
   }
 
-  if( cu.lwidth() * cu.lheight() < GBI_SIZE_CONSTRAINT )
+  if( cu.lwidth() * cu.lheight() < BCW_SIZE_CONSTRAINT )
   {
     return false;
   }
@@ -4906,19 +3509,18 @@ bool CU::isGBiIdxCoded( const CodingUnit &cu )
   {
     if( cu.firstPU->interDir == 3 )
     {
-		WPScalingParam *wp0;
-		WPScalingParam *wp1;
-		int refIdx0 = cu.firstPU->refIdx[REF_PIC_LIST_0];
-		int refIdx1 = cu.firstPU->refIdx[REF_PIC_LIST_1];
-
-		cu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
-		cu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
-		if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag
-			|| wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag)
-			)
-		{
-			return false;
-		}
+      WPScalingParam *wp0;
+      WPScalingParam *wp1;
+      int refIdx0 = cu.firstPU->refIdx[REF_PIC_LIST_0];
+      int refIdx1 = cu.firstPU->refIdx[REF_PIC_LIST_1];
+
+      cu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
+      cu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
+      if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag
+        || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag))
+      {
+        return false;
+      }
       return true;
     }
   }
@@ -4926,11 +3528,11 @@ bool CU::isGBiIdxCoded( const CodingUnit &cu )
   return false;
 }
 
-uint8_t CU::getValidGbiIdx( const CodingUnit &cu )
+uint8_t CU::getValidBcwIdx( const CodingUnit &cu )
 {
   if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag )
   {
-    return cu.GBiIdx;
+    return cu.BcwIdx;
   }
   else if( cu.firstPU->interDir == 3 && cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_DEFAULT_N )
   {
@@ -4938,23 +3540,23 @@ uint8_t CU::getValidGbiIdx( const CodingUnit &cu )
   }
   else if( cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_SUBPU_ATMVP )
   {
-    CHECK(cu.GBiIdx != GBI_DEFAULT, " cu.GBiIdx != GBI_DEFAULT ");
+    CHECK(cu.BcwIdx != BCW_DEFAULT, " cu.BcwIdx != BCW_DEFAULT ");
   }
   else
   {
-    CHECK(cu.GBiIdx != GBI_DEFAULT, " cu.GBiIdx != GBI_DEFAULT ");
+    CHECK(cu.BcwIdx != BCW_DEFAULT, " cu.BcwIdx != BCW_DEFAULT ");
   }
 
-  return GBI_DEFAULT;
+  return BCW_DEFAULT;
 }
 
-void CU::setGbiIdx( CodingUnit &cu, uint8_t uh )
+void CU::setBcwIdx( CodingUnit &cu, uint8_t uh )
 {
   int8_t uhCnt = 0;
 
   if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag )
   {
-    cu.GBiIdx = uh;
+    cu.BcwIdx = uh;
     ++uhCnt;
   }
   else if( cu.firstPU->interDir == 3 && cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_DEFAULT_N )
@@ -4963,44 +3565,74 @@ void CU::setGbiIdx( CodingUnit &cu, uint8_t uh )
   }
   else if( cu.firstPU->mergeFlag && cu.firstPU->mergeType == MRG_TYPE_SUBPU_ATMVP )
   {
-    cu.GBiIdx = GBI_DEFAULT;
+    cu.BcwIdx = BCW_DEFAULT;
   }
   else
   {
-    cu.GBiIdx = GBI_DEFAULT;
+    cu.BcwIdx = BCW_DEFAULT;
   }
 
   CHECK(uhCnt <= 0, " uhCnt <= 0 ");
 }
 
-uint8_t CU::deriveGbiIdx( uint8_t gbiLO, uint8_t gbiL1 )
+uint8_t CU::deriveBcwIdx( uint8_t bcwLO, uint8_t bcwL1 )
 {
-  if( gbiLO == gbiL1 )
+  if( bcwLO == bcwL1 )
   {
-    return gbiLO;
+    return bcwLO;
   }
-  const int8_t w0 = getGbiWeight(gbiLO, REF_PIC_LIST_0);
-  const int8_t w1 = getGbiWeight(gbiL1, REF_PIC_LIST_1);
-  const int8_t th = g_GbiWeightBase >> 1;
+  const int8_t w0 = getBcwWeight(bcwLO, REF_PIC_LIST_0);
+  const int8_t w1 = getBcwWeight(bcwL1, REF_PIC_LIST_1);
+  const int8_t th = g_BcwWeightBase >> 1;
   const int8_t off = 1;
 
   if( w0 == w1 || (w0 < (th - off) && w1 < (th - off)) || (w0 >(th + off) && w1 >(th + off)) )
   {
-    return GBI_DEFAULT;
+    return BCW_DEFAULT;
   }
   else
   {
     if( w0 > w1 )
     {
-      return ( w0 >= th ? gbiLO : gbiL1 );
+      return ( w0 >= th ? bcwLO : bcwL1 );
     }
     else
     {
-      return ( w1 >= th ? gbiL1 : gbiLO );
+      return ( w1 >= th ? bcwL1 : bcwLO );
     }
   }
 }
 
+bool CU::bdpcmAllowed( const CodingUnit& cu, const ComponentID compID )
+{
+  SizeType transformSkipMaxSize = 1 << cu.cs->pps->getLog2MaxTransformSkipBlockSize();
+
+  bool bdpcmAllowed = cu.cs->sps->getBDPCMEnabled();
+       bdpcmAllowed &= (isLuma(compID) || cu.cs->sps->getBDPCMEnabled() == BDPCM_LUMACHROMA);
+       bdpcmAllowed &= CU::isIntra( cu );
+       if (isLuma(compID))
+           bdpcmAllowed &= (cu.lwidth() <= transformSkipMaxSize && cu.lheight() <= transformSkipMaxSize);
+       else
+           bdpcmAllowed &= (cu.chromaSize().width <= transformSkipMaxSize && cu.chromaSize().height <= transformSkipMaxSize);
+  return bdpcmAllowed;
+}
+
+bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID)
+{
+  SizeType tsMaxSize = 1 << cu.cs->pps->getLog2MaxTransformSkipBlockSize();
+  const int maxSize  = CU::isIntra( cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE;
+  const int cuWidth  = cu.blocks[0].lumaSize().width;
+  const int cuHeight = cu.blocks[0].lumaSize().height;
+  bool mtsAllowed    = cu.chType == CHANNEL_TYPE_LUMA && compID == COMPONENT_Y;
+
+  mtsAllowed &= CU::isIntra( cu ) ? cu.cs->sps->getUseIntraMTS() : cu.cs->sps->getUseInterMTS() && CU::isInter( cu );
+  mtsAllowed &= cuWidth <= maxSize && cuHeight <= maxSize;
+  mtsAllowed &= !cu.ispMode;
+  mtsAllowed &= !cu.sbtInfo;
+  mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize);
+  return mtsAllowed;
+}
+
 // TU tools
 
 bool TU::isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID)
@@ -5015,6 +3647,8 @@ bool TU::getCbf( const TransformUnit &tu, const ComponentID &compID )
 
 bool TU::getCbfAtDepth(const TransformUnit &tu, const ComponentID &compID, const unsigned &depth)
 {
+  if( !tu.blocks[compID].valid() )
+    CHECK( tu.cbf[compID] != 0, "cbf must be 0 if the component is not available" );
   return ((tu.cbf[compID] >> depth) & 1) == 1;
 }
 
@@ -5028,156 +3662,47 @@ void TU::setCbfAtDepth(TransformUnit &tu, const ComponentID &compID, const unsig
 
 bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID)
 {
-  bool    tsAllowed = compID == COMPONENT_Y;
-  const int maxSize = tu.cs->pps->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize();
+  const int maxSize = tu.cs->pps->getLog2MaxTransformSkipBlockSize();
 
-  tsAllowed &= tu.cs->pps->getUseTransformSkip();
-  tsAllowed &= !tu.cu->transQuantBypass;
+  bool tsAllowed = tu.cs->sps->getTransformSkipEnabledFlag();
   tsAllowed &= ( !tu.cu->ispMode || !isLuma(compID) );
-
   SizeType transformSkipMaxSize = 1 << maxSize;
-  tsAllowed &= tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize;
+  tsAllowed &= !(tu.cu->bdpcmMode && isLuma(compID));
+  tsAllowed &= !(tu.cu->bdpcmModeChroma && isChroma(compID));
+  tsAllowed &= tu.blocks[compID].width <= transformSkipMaxSize && tu.blocks[compID].height <= transformSkipMaxSize;
   tsAllowed &= !tu.cu->sbtInfo;
 
   return tsAllowed;
 }
 
-bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID)
-{
-  bool   mtsAllowed = compID == COMPONENT_Y;
-  const int maxSize = CU::isIntra( *tu.cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE;
-
-  mtsAllowed &= CU::isIntra( *tu.cu ) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter( *tu.cu );
-  mtsAllowed &= ( tu.lwidth() <= maxSize && tu.lheight() <= maxSize );
-  mtsAllowed &= !tu.cu->ispMode;
-  mtsAllowed &= !tu.cu->sbtInfo;
-  return mtsAllowed;
-}
-
-uint32_t TU::getGolombRiceStatisticsIndex(const TransformUnit &tu, const ComponentID &compID)
-{
-  const bool transformSkip    = tu.mtsIdx==1;
-  const bool transquantBypass = tu.cu->transQuantBypass;
-
-  //--------
-
-  const uint32_t channelTypeOffset = isChroma(compID) ? 2 : 0;
-  const uint32_t nonTransformedOffset = (transformSkip || transquantBypass) ? 1 : 0;
-
-  //--------
-
-  const uint32_t selectedIndex = channelTypeOffset + nonTransformedOffset;
-  CHECK( selectedIndex >= RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS, "Invalid golomb rice adaptation statistics set" );
 
-  return selectedIndex;
-}
-
-#if HEVC_USE_MDCS
-uint32_t TU::getCoefScanIdx(const TransformUnit &tu, const ComponentID &compID)
+int TU::getICTMode( const TransformUnit& tu, int jointCbCr )
 {
-  //------------------------------------------------
-
-  //this mechanism is available for intra only
-
-  if( !CU::isIntra( *tu.cu ) )
+  if( jointCbCr < 0 )
   {
-    return SCAN_DIAG;
-  }
-
-  //------------------------------------------------
-
-  //check that MDCS can be used for this TU
-
-
-  const CompArea &area      = tu.blocks[compID];
-  const SPS &sps            = *tu.cs->sps;
-  const ChromaFormat format = sps.getChromaFormatIdc();
-
-
-  const uint32_t maximumWidth  = MDCS_MAXIMUM_WIDTH  >> getComponentScaleX(compID, format);
-  const uint32_t maximumHeight = MDCS_MAXIMUM_HEIGHT >> getComponentScaleY(compID, format);
-
-  if ((area.width > maximumWidth) || (area.height > maximumHeight))
-  {
-    return SCAN_DIAG;
-  }
-
-  //------------------------------------------------
-
-  //otherwise, select the appropriate mode
-
-  const PredictionUnit &pu = *tu.cs->getPU( area.pos(), toChannelType( compID ) );
-
-  uint32_t uiDirMode = PU::getFinalIntraMode(pu, toChannelType(compID));
-
-  //------------------
-
-       if (abs((int) uiDirMode - VER_IDX) <= MDCS_ANGLE_LIMIT)
-  {
-    return SCAN_HOR;
-  }
-  else if (abs((int) uiDirMode - HOR_IDX) <= MDCS_ANGLE_LIMIT)
-  {
-    return SCAN_VER;
-  }
-  else
-  {
-    return SCAN_DIAG;
+    jointCbCr = tu.jointCbCr;
   }
+  return g_ictModes[ tu.cs->picHeader->getJointCbCrSignFlag() ][ jointCbCr ];
 }
 
-#endif
 bool TU::hasCrossCompPredInfo( const TransformUnit &tu, const ComponentID &compID )
 {
   return (isChroma(compID) && tu.cs->pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf(tu, COMPONENT_Y) &&
     (!CU::isIntra(*tu.cu) || PU::isChromaIntraModeCrossCheckMode(*tu.cs->getPU(tu.blocks[compID].pos(), toChannelType(compID)))));
 }
 
-uint32_t TU::getNumNonZeroCoeffsNonTS( const TransformUnit& tu, const bool bLuma, const bool bChroma )
-{
-  uint32_t count = 0;
-  for( uint32_t i = 0; i < ::getNumberValidTBlocks( *tu.cs->pcv ); i++ )
-  {
-    if( tu.blocks[i].valid() && ( isLuma(ComponentID(i)) ? tu.mtsIdx !=1 : true ) && TU::getCbf( tu, ComponentID( i ) ) )
-    {
-      if( isLuma  ( tu.blocks[i].compID ) && !bLuma   ) continue;
-      if( isChroma( tu.blocks[i].compID ) && !bChroma ) continue;
-
-      uint32_t area = tu.blocks[i].area();
-      const TCoeff* coeff = tu.getCoeffs( ComponentID( i ) ).buf;
-      for( uint32_t j = 0; j < area; j++ )
-      {
-        count += coeff[j] != 0;
-      }
-    }
-  }
-  return count;
-}
 
 bool TU::needsSqrt2Scale( const TransformUnit &tu, const ComponentID &compID )
 {
   const Size &size=tu.blocks[compID];
-  const bool isTransformSkip = tu.mtsIdx==1 && isLuma(compID);
-  return (!isTransformSkip) && (((g_aucLog2[size.width] + g_aucLog2[size.height]) & 1) == 1);
+  const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+  return (!isTransformSkip) && (((floorLog2(size.width) + floorLog2(size.height)) & 1) == 1);
 }
 
-#if HM_QTBT_AS_IN_JEM_QUANT
-
 bool TU::needsBlockSizeTrafoScale( const TransformUnit &tu, const ComponentID &compID )
 {
   return needsSqrt2Scale( tu, compID ) || isNonLog2BlockSize( tu.blocks[compID] );
 }
-#else
-bool TU::needsQP3Offset(const TransformUnit &tu, const ComponentID &compID)
-{
-  if( !tu.transformSkip[compID] )
-  {
-    return ( ( ( g_aucLog2[tu.blocks[compID].width] + g_aucLog2[tu.blocks[compID].height] ) & 1 ) == 1 );
-  }
-  return false;
-}
-#endif
-
 
 TransformUnit* TU::getPrevTU( const TransformUnit &tu, const ComponentID compID )
 {
@@ -5197,44 +3722,50 @@ bool TU::getPrevTuCbfAtDepth( const TransformUnit &currentTu, const ComponentID
   return ( prevTU != nullptr ) ? TU::getCbfAtDepth( *prevTU, compID, trDepth ) : false;
 }
 
-void TU::getTransformTypeISP( const TransformUnit &tu, const ComponentID compID, int &typeH, int &typeV )
+
+// other tools
+
+uint32_t getCtuAddr( const Position& pos, const PreCalcValues& pcv )
 {
-  typeH = DCT2, typeV = DCT2;
-  const int uiChFinalMode = PU::getFinalIntraMode( *tu.cu->firstPU, toChannelType( compID ) );
-  bool intraModeIsEven = uiChFinalMode % 2 == 0;
+  return ( pos.x >> pcv.maxCUWidthLog2 ) + ( pos.y >> pcv.maxCUHeightLog2 ) * pcv.widthInCtus;
+}
 
-  if( uiChFinalMode == DC_IDX || uiChFinalMode == 33 || uiChFinalMode == 35 )
+int getNumModesMip(const Size& block)
+{
+  switch( getMipSizeId(block) )
   {
-    typeH = DCT2;
-    typeV = typeH;
+  case 0: return 16;
+  case 1: return  8;
+  case 2: return  6;
+  default: THROW( "Invalid mipSizeId" );
   }
-  else if( uiChFinalMode == PLANAR_IDX || ( uiChFinalMode >= 31 && uiChFinalMode <= 37 ) )
+}
+
+
+int getMipSizeId(const Size& block)
+{
+  if( block.width == 4 && block.height == 4 )
   {
-    typeH = DST7;
-    typeV = typeH;
+    return 0;
   }
-  else if( ( intraModeIsEven && uiChFinalMode >= 2 && uiChFinalMode <= 30 ) || ( !intraModeIsEven && uiChFinalMode >= 39 && uiChFinalMode <= 65 ) )
+  else if( block.width == 4 || block.height == 4 || (block.width == 8 && block.height == 8) )
   {
-    typeH = DST7;
-    typeV = DCT2;
+    return 1;
   }
-  else if( ( !intraModeIsEven && uiChFinalMode >= 3 && uiChFinalMode <= 29 ) || ( intraModeIsEven && uiChFinalMode >= 38 && uiChFinalMode <= 66 ) )
+  else
   {
-    typeH = DCT2;
-    typeV = DST7;
+    return 2;
   }
-  //Size restriction for non-DCT-II transforms
-  Area tuArea = tu.blocks[compID];
-  typeH = tuArea.width  <= 2 || tuArea.width  >= 32 ? DCT2 : typeH;
-  typeV = tuArea.height <= 2 || tuArea.height >= 32 ? DCT2 : typeV;
-}
-
 
-// other tools
+}
 
-uint32_t getCtuAddr( const Position& pos, const PreCalcValues& pcv )
+bool allowLfnstWithMip(const Size& block)
 {
-  return ( pos.x >> pcv.maxCUWidthLog2 ) + ( pos.y >> pcv.maxCUHeightLog2 ) * pcv.widthInCtus;
+  if (block.width >= 16 && block.height >= 16)
+  {
+    return true;
+  }
+  return false;
 }
 
 
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index 344bac8fb20b2b0d64a461b0c6841c002166143d..8cead3c5ca37492e67158b262460de667902203f 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -59,42 +59,48 @@ namespace CU
   bool isIntra                        (const CodingUnit &cu);
   bool isInter                        (const CodingUnit &cu);
   bool isIBC                          (const CodingUnit &cu);
+  bool isPLT                          (const CodingUnit &cu);
   bool isRDPCMEnabled                 (const CodingUnit &cu);
-  bool isLosslessCoded                (const CodingUnit &cu);
-  uint32_t getIntraSizeIdx                (const CodingUnit &cu);
 
   bool isSameCtu                      (const CodingUnit &cu, const CodingUnit &cu2);
   bool isSameSlice                    (const CodingUnit &cu, const CodingUnit &cu2);
-#if HEVC_TILES_WPP
   bool isSameTile                     (const CodingUnit &cu, const CodingUnit &cu2);
   bool isSameSliceAndTile             (const CodingUnit &cu, const CodingUnit &cu2);
-#endif
   bool isLastSubCUOfCtu               (const CodingUnit &cu);
   uint32_t getCtuAddr                     (const CodingUnit &cu);
-
   int  predictQP                      (const CodingUnit& cu, const int prevQP );
 
   uint32_t getNumPUs                      (const CodingUnit& cu);
   void addPUs                         (      CodingUnit& cu);
 
+  void saveMotionInHMVP               (const CodingUnit& cu, const bool isToBeDone );
+
   PartSplit getSplitAtDepth           (const CodingUnit& cu, const unsigned depth);
+  ModeType  getModeTypeAtDepth        (const CodingUnit& cu, const unsigned depth);
 
-  bool hasNonTsCodedBlock             (const CodingUnit& cu);
-  uint32_t getNumNonZeroCoeffNonTs        (const CodingUnit& cu);
+  uint32_t getNumNonZeroCoeffNonTsCorner8x8( const CodingUnit& cu, const bool lumaFlag = true, const bool chromaFlag = true );
+  bool  isPredRegDiffFromTB(const CodingUnit& cu, const ComponentID compID);
+  bool  isFirstTBInPredReg(const CodingUnit& cu, const ComponentID compID, const CompArea &area);
+  bool  isMinWidthPredEnabledForBlkSize(const int w, const int h);
+  void  adjustPredArea(CompArea &area);
+  bool  isBcwIdxCoded                 (const CodingUnit& cu);
+  uint8_t getValidBcwIdx              (const CodingUnit& cu);
+  void  setBcwIdx                     (CodingUnit& cu, uint8_t uh);
+  uint8_t deriveBcwIdx                (uint8_t bcwLO, uint8_t bcwL1);
+  bool bdpcmAllowed                   (const CodingUnit& cu, const ComponentID compID);
+  bool isMTSAllowed                   (const CodingUnit& cu, const ComponentID compID);
 
-  bool  isGBiIdxCoded                 (const CodingUnit& cu);
-  uint8_t getValidGbiIdx              (const CodingUnit& cu);
-  void  setGbiIdx                     (CodingUnit& cu, uint8_t uh);
-  uint8_t deriveGbiIdx                (uint8_t gbiLO, uint8_t gbiL1);
 
   bool      divideTuInRows            ( const CodingUnit &cu );
-  bool      firstTestISPHorSplit      ( const int width, const int height,            const ComponentID compID, const CodingUnit *cuLeft = nullptr, const CodingUnit *cuAbove = nullptr );
   PartSplit getISPType                ( const CodingUnit &cu,                         const ComponentID compID );
   bool      isISPLast                 ( const CodingUnit &cu, const CompArea &tuArea, const ComponentID compID );
   bool      isISPFirst                ( const CodingUnit &cu, const CompArea &tuArea, const ComponentID compID );
-  ISPType   canUseISPSplit            ( const CodingUnit &cu,                         const ComponentID compID );
-  ISPType   canUseISPSplit            ( const int width, const int height, const int maxTrSize = MAX_TB_SIZEY );
+  bool      canUseISP                 ( const CodingUnit &cu,                         const ComponentID compID );
+  bool      canUseISP                 ( const int width, const int height, const int maxTrSize = MAX_TB_SIZEY );
+  bool      canUseLfnstWithISP        ( const CompArea& cuArea, const ISPType ispSplitType );
+  bool      canUseLfnstWithISP        ( const CodingUnit& cu, const ChannelType chType );
   uint32_t  getISPSplitDim            ( const int width, const int height, const PartSplit ispType );
+  bool      allLumaCBFsAreZero        ( const CodingUnit& cu );
 
   PUTraverser traversePUs             (      CodingUnit& cu);
   TUTraverser traverseTUs             (      CodingUnit& cu);
@@ -103,8 +109,6 @@ namespace CU
 
   bool  hasSubCUNonZeroMVd            (const CodingUnit& cu);
   bool  hasSubCUNonZeroAffineMVd      ( const CodingUnit& cu );
-  int   getMaxNeighboriMVCandNum      (const CodingStructure& cs, const Position& pos);
-  void  resetMVDandMV2Int             (      CodingUnit& cu, InterPrediction *interPred );
 
   uint8_t getSbtInfo                  (uint8_t idx, uint8_t pos);
   uint8_t getSbtIdx                   (const uint8_t sbtInfo);
@@ -114,17 +118,21 @@ namespace CU
   uint8_t getSbtPosFromSbtMode        (const uint8_t sbtMode);
   uint8_t targetSbtAllowed            (uint8_t idx, uint8_t sbtAllowed);
   uint8_t numSbtModeRdo               (uint8_t sbtAllowed);
-  bool    isMtsMode                   (const uint8_t sbtInfo);
   bool    isSbtMode                   (const uint8_t sbtInfo);
   bool    isSameSbtSize               (const uint8_t sbtInfo1, const uint8_t sbtInfo2);
+  bool    getRprScaling               ( const SPS* sps, const PPS* curPPS, Picture* refPic, int& xScale, int& yScale );
 }
 // PU tools
 namespace PU
 {
-  int  getLMSymbolList(const PredictionUnit &pu, int *pModeList);
+  int  getLMSymbolList(const PredictionUnit &pu, int *modeList);
   int  getIntraMPMs(const PredictionUnit &pu, unsigned *mpm, const ChannelType &channelType = CHANNEL_TYPE_LUMA);
+  bool          isMIP                 (const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA);
+  uint32_t      getIntraDirLuma       (const PredictionUnit &pu);
   void getIntraChromaCandModes        (const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]);
   uint32_t getFinalIntraMode              (const PredictionUnit &pu, const ChannelType &chType);
+  uint32_t getCoLocatedIntraLumaMode      (const PredictionUnit &pu);
+  int getWideAngIntraMode             ( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID );
   void getInterMergeCandidates        (const PredictionUnit &pu, MergeCtx& mrgCtx,
     int mmvdList,
     const int& mrgCandIdx = -1 );
@@ -132,92 +140,70 @@ namespace PU
   void getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const int& mrgCandIdx = -1);
   int getDistScaleFactor(const int &currPOC, const int &currRefPOC, const int &colPOC, const int &colRefPOC);
   bool isDiffMER                      (const PredictionUnit &pu, const PredictionUnit &pu2);
-  bool getColocatedMVP                (const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &pos, Mv& rcMv, const int &refIdx);
+  bool getColocatedMVP                (const PredictionUnit &pu, const RefPicList &eRefPicList, const Position &pos, Mv& rcMv, const int &refIdx, bool sbFlag);
   void fillMvpCand                    (      PredictionUnit &pu, const RefPicList &eRefPicList, const int &refIdx, AMVPInfo &amvpInfo );
   void fillIBCMvpCand                 (PredictionUnit &pu, AMVPInfo &amvpInfo);
-  bool addIBCMVPCand                  (const PredictionUnit &pu, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo);
   void fillAffineMvpCand              (      PredictionUnit &pu, const RefPicList &eRefPicList, const int &refIdx, AffineAMVPInfo &affiAMVPInfo);
   bool addMVPCandUnscaled             (const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo);
-  bool addMVPCandWithScaling          (const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo);
   void xInheritedAffineMv             ( const PredictionUnit &pu, const PredictionUnit* puNeighbour, RefPicList eRefPicList, Mv rcMv[3] );
-  bool xCheckSimilarMotion(const int mergeCandIndex, const int prevCnt, const MergeCtx mergeCandList, bool hasPruned[MRG_MAX_NUM_CANDS]);
-#if JVET_L0090_PAIR_AVG
-  bool addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos
-    , bool ibcFlag
-    , bool isShared
-  );
-#else
-  bool addMergeHMVPCand(const CodingStructure &cs, MergeCtx& mrgCtx, bool isCandInter[MRG_MAX_NUM_CANDS], bool canFastExit, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt, const int prevCnt, bool isAvailableSubPu, unsigned subPuMvpPos
+  bool addMergeHMVPCand               (const CodingStructure &cs, MergeCtx& mrgCtx, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt
+    , const bool isAvailableA1, const MotionInfo miLeft, const bool isAvailableB1, const MotionInfo miAbove
+    , const bool ibcFlag
+    , const bool isGt4x4
   );
-#endif
-  void addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList, const RefPicList eRefPicList2nd, const int currRefPOC, AMVPInfo &info, uint8_t imv);
-  bool addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &refPicList, const int &refIdx, const Position &pos, const MvpDir &dir, AffineAMVPInfo &affiAmvpInfo );
+  void addAMVPHMVPCand                (const PredictionUnit &pu, const RefPicList eRefPicList, const int currRefPOC, AMVPInfo &info);
+  bool addAffineMVPCandUnscaled       ( const PredictionUnit &pu, const RefPicList &refPicList, const int &refIdx, const Position &pos, const MvpDir &dir, AffineAMVPInfo &affiAmvpInfo );
   bool isBipredRestriction            (const PredictionUnit &pu);
   void spanMotionInfo                 (      PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() );
   void applyImv                       (      PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL );
-  void getAffineControlPointCand( const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int modelIdx, int verNum, AffineMergeCtx& affMrgCtx );
+  void getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int8_t bcwIdx, int modelIdx, int verNum, AffineMergeCtx& affMrgCtx);
   void getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx, const int mrgCandIdx = -1 );
   void setAllAffineMvField            (      PredictionUnit &pu, MvField *mvField, RefPicList eRefList );
-  void setAllAffineMv                 (      PredictionUnit &pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList
-    , bool setHighPrec = false
-  );
-  bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count
-    , int mmvdList
-  );
+  void setAllAffineMv                 (      PredictionUnit &pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList, bool clipCPMVs = false );
+  bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count, int mmvdList);
   bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count);
-  bool isBiPredFromDifferentDir       (const PredictionUnit &pu);
   bool isBiPredFromDifferentDirEqDistPoc(const PredictionUnit &pu);
-  void restrictBiPredMergeCands       (const PredictionUnit &pu, MergeCtx& mrgCtx);
   void restrictBiPredMergeCandsOne    (PredictionUnit &pu);
 
   bool isLMCMode                      (                          unsigned mode);
   bool isLMCModeEnabled               (const PredictionUnit &pu, unsigned mode);
   bool isChromaIntraModeCrossCheckMode(const PredictionUnit &pu);
-  int  getMHIntraMPMs                 (const PredictionUnit &pu, unsigned *mpm, const ChannelType &channelType = CHANNEL_TYPE_LUMA, const bool isChromaMDMS = false, const unsigned startIdx = 0);
-  int  getNarrowShape                 (const int width, const int height);
   void getTriangleMergeCandidates     (const PredictionUnit &pu, MergeCtx &triangleMrgCtx);
-  bool isUniqueTriangleCandidates     (const PredictionUnit &pu, MergeCtx &triangleMrgCtx);
   void spanTriangleMotionInfo         (      PredictionUnit &pu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1);
   int32_t mappingRefPic               (const PredictionUnit &pu, int32_t refPicPoc, bool targetRefPicList);
-  void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred);
+  bool isAddNeighborMv  (const Mv& currMv, Mv* neighborMvs, int numNeighborMv);
+  void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred);
   bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv);
-  bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize);
   bool checkDMVRCondition(const PredictionUnit& pu);
+
 }
 
 // TU tools
 namespace TU
 {
-  uint32_t getNumNonZeroCoeffsNonTS       (const TransformUnit &tu, const bool bLuma = true, const bool bChroma = true);
+  uint32_t getNumNonZeroCoeffsNonTSCorner8x8( const TransformUnit &tu, const bool bLuma = true, const bool bChroma = true );
   bool isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID);
   bool getCbf                         (const TransformUnit &tu, const ComponentID &compID);
   bool getCbfAtDepth                  (const TransformUnit &tu, const ComponentID &compID, const unsigned &depth);
   void setCbfAtDepth                  (      TransformUnit &tu, const ComponentID &compID, const unsigned &depth, const bool &cbf);
   bool isTSAllowed                    (const TransformUnit &tu, const ComponentID  compID);
-  bool isMTSAllowed                   (const TransformUnit &tu, const ComponentID  compID);
-  uint32_t getGolombRiceStatisticsIndex   (const TransformUnit &tu, const ComponentID &compID);
-#if HEVC_USE_MDCS
-  uint32_t getCoefScanIdx                 (const TransformUnit &tu, const ComponentID &compID);
-#endif
   bool hasCrossCompPredInfo           (const TransformUnit &tu, const ComponentID &compID);
 
 
   bool needsSqrt2Scale                ( const TransformUnit &tu, const ComponentID &compID );
-#if HM_QTBT_AS_IN_JEM_QUANT
   bool needsBlockSizeTrafoScale       ( const TransformUnit &tu, const ComponentID &compID );
-#else
-  bool needsQP3Offset                 (const TransformUnit &tu, const ComponentID &compID);
-#endif
   TransformUnit* getPrevTU          ( const TransformUnit &tu, const ComponentID compID );
   bool           getPrevTuCbfAtDepth( const TransformUnit &tu, const ComponentID compID, const int trDepth );
-  void           getTransformTypeISP( const TransformUnit &tu, const ComponentID compID, int &typeH, int &typeV );
+  int            getICTMode         ( const TransformUnit &tu, int jointCbCr = -1 );
 }
 
 uint32_t getCtuAddr        (const Position& pos, const PreCalcValues &pcv);
+int  getNumModesMip   (const Size& block);
+int getMipSizeId      (const Size& block);
+bool allowLfnstWithMip(const Size& block);
 
 template<typename T, size_t N>
 uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList
-  , static_vector<int, N>& extendRefList, int extendRef
   , size_t uiFastCandNum = N, int* iserttPos = nullptr)
 {
   CHECK( std::min( uiFastCandNum, candModeList.size() ) != std::min( uiFastCandNum, candCostList.size() ), "Sizes do not match!" );
@@ -238,17 +224,9 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi
     {
       candModeList[currSize - i] = candModeList[currSize - 1 - i];
       candCostList[currSize - i] = candCostList[currSize - 1 - i];
-      if (extendRef != -1)
-      {
-        extendRefList[currSize - i] = extendRefList[currSize - 1 - i];
-      }
     }
     candModeList[currSize - shift] = uiMode;
     candCostList[currSize - shift] = uiCost;
-    if (extendRef != -1)
-    {
-      extendRefList[currSize - shift] = extendRef;
-    }
     if (iserttPos != nullptr)
     {
       *iserttPos = int(currSize - shift);
@@ -259,10 +237,6 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi
   {
     candModeList.insert( candModeList.end() - shift, uiMode );
     candCostList.insert( candCostList.end() - shift, uiCost );
-    if (extendRef != -1)
-    {
-      extendRefList.insert(extendRefList.end() - shift, extendRef);
-    }
     if (iserttPos != nullptr)
     {
       *iserttPos = int(candModeList.size() - shift - 1);
@@ -276,56 +250,4 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi
   return 0;
 }
 
-template<typename T, size_t N>
-uint32_t updateDoubleCandList(T mode, double cost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, static_vector<T, N>& candModeList2, T mode2, size_t fastCandNum = N, int* iserttPos = nullptr)
-{
-  CHECK(std::min(fastCandNum, candModeList.size()) != std::min(fastCandNum, candCostList.size()), "Sizes do not match!");
-  CHECK(fastCandNum > candModeList.capacity(), "The vector is to small to hold all the candidates!");
-
-  size_t i;
-  size_t shift = 0;
-  size_t currSize = std::min(fastCandNum, candCostList.size());
-
-  while (shift < fastCandNum && shift < currSize && cost < candCostList[currSize - 1 - shift])
-  {
-    shift++;
-  }
-
-  if (candModeList.size() >= fastCandNum && shift != 0)
-  {
-    for (i = 1; i < shift; i++)
-    {
-      candModeList[currSize - i] = candModeList[currSize - 1 - i];
-      candModeList2[currSize - i] = candModeList2[currSize - 1 - i];
-      candCostList[currSize - i] = candCostList[currSize - 1 - i];
-    }
-    candModeList[currSize - shift] = mode;
-    candModeList2[currSize - shift] = mode2;
-    candCostList[currSize - shift] = cost;
-    if (iserttPos != nullptr)
-    {
-      *iserttPos = int(currSize - shift);
-    }
-    return 1;
-  }
-  else if (currSize < fastCandNum)
-  {
-    candModeList.insert(candModeList.end() - shift, mode);
-    candModeList2.insert(candModeList2.end() - shift, mode2);
-    candCostList.insert(candCostList.end() - shift, cost);
-    if (iserttPos != nullptr)
-    {
-      *iserttPos = int(candModeList.size() - shift - 1);
-    }
-    return 1;
-  }
-
-  if (iserttPos != nullptr)
-  {
-    *iserttPos = -1;
-  }
-  return 0;
-}
-
-
 #endif
diff --git a/source/Lib/CommonLib/WeightPrediction.cpp b/source/Lib/CommonLib/WeightPrediction.cpp
index 8495736e89b85b9ef48ec2111027fd8a74986bcb..cf20eb20902d4fb6f4f00478b81d5dc983f77eb2 100644
--- a/source/Lib/CommonLib/WeightPrediction.cpp
+++ b/source/Lib/CommonLib/WeightPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -161,13 +161,19 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf          &pcYuvSrc0,
                                    const WPScalingParam *const wp1,
                                          PelUnitBuf           &rpcYuvDst,
                                    const bool                  bRoundLuma /*= true*/,
-                                   const ComponentID           maxNumComp)
+                                   const ComponentID           maxNumComp
+                                  , bool                       lumaOnly
+                                  , bool                       chromaOnly
+)
 {
   const bool enableRounding[MAX_NUM_COMPONENT] = { bRoundLuma, true, true };
 
   const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size();
 
-  for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++)
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  int firstComponent = chromaOnly ? 1 : 0;
+  int lastComponent = lumaOnly ? 0 : maxNumComp;
+  for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent; componentIndex++)
   {
     const ComponentID compID = ComponentID(componentIndex);
 
@@ -215,15 +221,78 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf          &pcYuvSrc0,
   } // compID loop
 }
 
+void WeightPrediction::addWeightBiComponent(const CPelUnitBuf          &pcYuvSrc0,
+                                            const CPelUnitBuf          &pcYuvSrc1,
+                                            const ClpRngs              &clpRngs,
+                                            const WPScalingParam *const wp0,
+                                            const WPScalingParam *const wp1,
+                                                  PelUnitBuf           &rpcYuvDst,
+                                            const bool                  bRoundLuma /*= true*/,
+                                            const ComponentID           Comp)
+{
+  const bool enableRounding[MAX_NUM_COMPONENT] = { bRoundLuma, true, true };
+
+  const ComponentID compID = ComponentID(Comp);
+
+  const Pel* src0 = pcYuvSrc0.bufs[compID].buf;
+  const Pel* src1 = pcYuvSrc1.bufs[compID].buf;
+        Pel* dst  = rpcYuvDst.bufs[compID].buf;
+
+  // Luma : --------------------------------------------
+  const ClpRng& clpRng = clpRngs.comp[compID];
+  const int  w0       = wp0[compID].w;
+  const int  offset   = wp0[compID].offset;
+  const int  clipBD   = clpRng.bd;
+  const int  shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipBD));
+  const int  shift    = wp0[compID].shift + shiftNum;
+  const int  round    = (enableRounding[compID] && (shift > 0)) ? (1 << (shift - 1)) : 0;
+  const int  w1       = wp1[compID].w;
+  const int  height  = rpcYuvDst.bufs[compID].height;
+  const int  width   = rpcYuvDst.bufs[compID].width;
+
+  const uint32_t src0Stride = pcYuvSrc0.bufs[compID].stride;
+  const uint32_t src1Stride = pcYuvSrc1.bufs[compID].stride;
+  const uint32_t dstStride =  rpcYuvDst.bufs[compID].stride;
+
+  for (int y = height - 1; y >= 0; y--)
+  {
+    // do it in batches of 4 (partial unroll)
+    int x = width - 1;
+
+    for (; x >= 3; )
+    {
+      dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--;
+      dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--;
+      dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--;
+      dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng ); x--;
+    }
+    for (; x >= 0; x--)
+    {
+      dst[x] = weightBidir(w0, src0[x], w1, src1[x], round, shift, offset, clpRng );
+    }
+
+    src0 += src0Stride;
+    src1 += src1Stride;
+    dst += dstStride;
+  } // y loop
+}
+
 void  WeightPrediction::addWeightUni(const CPelUnitBuf          &pcYuvSrc0,
                                      const ClpRngs              &clpRngs,
                                      const WPScalingParam *const wp0,
                                            PelUnitBuf           &rpcYuvDst,
-                                     const ComponentID           maxNumComp)
+                                     const ComponentID           maxNumComp
+                                    , bool                       lumaOnly
+                                    , bool                       chromaOnly
+)
 {
   const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size();
 
-  for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++)
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  int firstComponent = chromaOnly ? 1 : 0;
+  int lastComponent  = lumaOnly ? 0 : maxNumComp;
+  for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent;
+       componentIndex++)
   {
     const ComponentID compID = ComponentID(componentIndex);
 
@@ -315,7 +384,10 @@ void  WeightPrediction::xWeightedPredictionUni(const PredictionUnit       &pu,
                                                const RefPicList           &eRefPicList,
                                                      PelUnitBuf           &pcYuvPred,
                                                const int                   iRefIdx_input/* = -1*/,
-                                               const ComponentID           maxNumComp)
+                                               const ComponentID           maxNumComp
+                                              , bool                       lumaOnly
+                                              , bool                       chromaOnly
+)
 {
   WPScalingParam  *pwp, *pwpTmp;
 
@@ -335,14 +407,17 @@ void  WeightPrediction::xWeightedPredictionUni(const PredictionUnit       &pu,
   {
     getWpScaling(pu.cs->slice, -1, iRefIdx, pwpTmp, pwp, maxNumComp);
   }
-  addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp);
+  addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp, lumaOnly, chromaOnly);
 }
 
 void  WeightPrediction::xWeightedPredictionBi(const PredictionUnit       &pu,
                                               const CPelUnitBuf          &pcYuvSrc0,
                                               const CPelUnitBuf          &pcYuvSrc1,
                                                     PelUnitBuf           &rpcYuvDst,
-                                              const ComponentID           maxNumComp)
+                                              const ComponentID           maxNumComp
+                                              , bool                      lumaOnly
+                                              , bool                      chromaOnly
+)
 {
   const int iRefIdx0 = pu.refIdx[0];
   const int iRefIdx1 = pu.refIdx[1];
@@ -351,19 +426,21 @@ void  WeightPrediction::xWeightedPredictionBi(const PredictionUnit       &pu,
 
   CHECK( !pu.cs->pps->getWPBiPred(), "Weighted Bi-prediction disabled" );
 
+  if (iRefIdx0 < 0 && iRefIdx1 < 0) return;
+
   getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1, maxNumComp);
 
   if (iRefIdx0 >= 0 && iRefIdx1 >= 0)
   {
-    addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp);
+    addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp, lumaOnly, chromaOnly);
   }
   else if (iRefIdx0 >= 0 && iRefIdx1 < 0)
   {
-    addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp);
+    addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly);
   }
   else if (iRefIdx0 < 0 && iRefIdx1 >= 0)
   {
-    addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp);
+    addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly);
   }
   else
   {
diff --git a/source/Lib/CommonLib/WeightPrediction.h b/source/Lib/CommonLib/WeightPrediction.h
index 2cbb82fa608e06ad25ff95a80fbc822a7aa16c37..4cc91597c8251dda3cffd509d6b87a0cdc36ac4e 100644
--- a/source/Lib/CommonLib/WeightPrediction.h
+++ b/source/Lib/CommonLib/WeightPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -69,26 +69,47 @@ public:
                                 const WPScalingParam *const wp1,
                                       PelUnitBuf           &rpcYuvDst,
                                 const bool                  bRoundLuma = true,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT );
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT 
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+                                );
+
+  void addWeightBiComponent(    const CPelUnitBuf          &pcYuvSrc0,
+                                const CPelUnitBuf          &pcYuvSrc1,
+                                const ClpRngs              &clpRngs,
+                                const WPScalingParam *const wp0,
+                                const WPScalingParam *const wp1,
+                                      PelUnitBuf           &rpcYuvDst,
+                                const bool                  bRoundLuma = true,
+                                const ComponentID           Comp = COMPONENT_Y);
 
   void  addWeightUni(           const CPelUnitBuf          &pcYuvSrc0,
                                 const ClpRngs              &clpRngs,
                                 const WPScalingParam *const wp0,
                                       PelUnitBuf           &rpcYuvDst,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT);
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+                                );
 
   void  xWeightedPredictionUni( const PredictionUnit       &pu,
                                 const CPelUnitBuf          &pcYuvSrc,
                                 const RefPicList           &eRefPicList,
                                       PelUnitBuf           &pcYuvPred,
                                 const int                   iRefIdx=-1,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT);
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+                                );
 
   void  xWeightedPredictionBi(  const PredictionUnit       &pu,
                                 const CPelUnitBuf          &pcYuvSrc0,
                                 const CPelUnitBuf          &pcYuvSrc1,
                                       PelUnitBuf           &pcYuvDst,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT );
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT 
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+                                );
 };
 
 #endif
diff --git a/source/Lib/CommonLib/dtrace.cpp b/source/Lib/CommonLib/dtrace.cpp
index 503f53eec6313b222fbbbf6fd15d851ca8d01993..3500c6be69f4c885964cbec1cd10bc855144fa1b 100644
--- a/source/Lib/CommonLib/dtrace.cpp
+++ b/source/Lib/CommonLib/dtrace.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace.h b/source/Lib/CommonLib/dtrace.h
index b57efbd22dfa02edc57a02a5fbf3966ec7bd7b34..9e10e201b70042294d934eb4bf4bd44ae78d8dae 100644
--- a/source/Lib/CommonLib/dtrace.h
+++ b/source/Lib/CommonLib/dtrace.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
index c78720917af4ffb15c340e446a649ac0c5d881a7..8005a537f23095a2ca901a3d95414f68ecd3dccc 100644
--- a/source/Lib/CommonLib/dtrace_blockstatistics.cpp
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -277,8 +277,8 @@ void retrieveTriangularMvInfo(const PredictionUnit& pu, MotionInfo& mi0, MotionI
   CMotionBuf mb = pu.getMotionBuf();
   bool foundMv[2] = { false, false };
   bool foundBi = false;
-  int32_t idxW  = (int32_t)(g_aucLog2[pu.lwidth() ] - MIN_CU_LOG2);
-  int32_t idxH  = (int32_t)(g_aucLog2[pu.lheight()] - MIN_CU_LOG2);
+  int32_t idxW  = (int32_t)(floorLog2(pu.lwidth() ) - MIN_CU_LOG2);
+  int32_t idxH  = (int32_t)(floorLog2(pu.lheight()) - MIN_CU_LOG2);
   for (int32_t y = 0; y < mb.height; y++)
   {
     for (int32_t x = 0; x < mb.width; x++)
@@ -376,7 +376,7 @@ void writeBlockStatisticsHeader(const SPS *sps)
 
   DTRACE_HEADER( g_trace_ctx, "# VTMBMS Block Statistics\n");
   // sequence info
-  DTRACE_HEADER( g_trace_ctx, "# Sequence size: [%dx %d]\n", sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples());
+  DTRACE_HEADER( g_trace_ctx, "# Sequence size: [%dx %d]\n", sps->getMaxPicWidthInLumaSamples(), sps->getMaxPicHeightInLumaSamples() );
   // list statistics
   for( auto i = static_cast<int>(BlockStatistic::PredMode); i < static_cast<int>(BlockStatistic::NumBlockStatistics); i++)
   {
@@ -426,17 +426,17 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp);
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries);
 
-        if (cs.pps->getTransquantBypassEnabledFlag())
-        {
-          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass);
-        }
-
         // skip flag
         if (!cs.slice->isIntra())
         {
           DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SkipFlag), cu.skip);
         }
 
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BDPCM), cu.bdpcmMode);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BDPCMChroma), cu.bdpcmModeChroma);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TileIdx), cu.tileIdx);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IndependentSliceIdx), cu.slice->getIndependentSliceIdx());
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::LFNSTIdx), cu.lfnstIdx);
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MMVDSkipFlag), cu.mmvdSkip);
       }
       else if( chType == CHANNEL_TYPE_CHROMA )
@@ -449,11 +449,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
         DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp);
         DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries);
 
-        if (cs.pps->getTransquantBypassEnabledFlag())
-        {
-          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass);
-        }
-
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BDPCMChroma), cu.bdpcmModeChroma);
       }
 
 
@@ -467,6 +463,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
             {
               DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeFlag), pu.mergeFlag);
             }
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::RegularMergeFlag), pu.regularMergeFlag);
             if( pu.mergeFlag )
             {
               DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeIdx),  pu.mergeIdx);
@@ -476,12 +473,11 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
               {
               DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MMVDMergeIdx),  pu.mmvdMergeIdx);
               }
-              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MHIntraFlag),  pu.mhIntraFlag);
-              if (pu.mhIntraFlag)
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::CiipFlag),  pu.ciipFlag);
+              if (pu.ciipFlag)
               {
                 DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode),  pu.intraDir[COMPONENT_Y]);
               }
-              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::TriangleFlag), pu.cu->triangle);
             }
             DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine);
             DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType);
@@ -657,22 +653,20 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
 
 
           }
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SMVDFlag), cu.smvdMode);
           DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv);
           DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf);
-          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::GBIIndex), cu.GBiIdx);
-          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IBCFlag), cu.predMode == MODE_IBC);
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BCWIndex), cu.BcwIdx);
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SbtIdx), cu.getSbtIdx());
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SbtPos), cu.getSbtPos());
         }
         break;
       case MODE_INTRA:
         {
-
           if(chType == CHANNEL_TYPE_LUMA)
           {
-            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm);
-          }
-          else if(chType == CHANNEL_TYPE_CHROMA)
-          {
-            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm);
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MIPFlag), cu.mipFlag);
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ISPMode), cu.ispMode);
           }
 
           const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat );
@@ -693,9 +687,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
                 {
                   const uint32_t uiChFinalMode  = PU::getFinalIntraMode( pu, ChannelType( chType ) );
                   DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), uiChFinalMode);
-#if ENABLE_CHROMA_422
                     assert(0);
-#endif
                 }
               }
             }
@@ -712,12 +704,32 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
         if (tu.Y().valid())
         {
           DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]);
-          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx), tu.mtsIdx);
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Y), tu.mtsIdx[COMPONENT_Y]);
         }
-        if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA)))
+        if ( tu.Cb().valid() )
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::JointCbCr), tu.jointCbCr);
+        }
+
+        bool lumaOnly  = ( cu.chromaFormat == CHROMA_400 || !tu.blocks[COMPONENT_Cb].valid() );
+        if( !lumaOnly )
+        {
+          if( TU::hasCrossCompPredInfo( tu, COMPONENT_Cb ) )
+          {
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::CompAlphaCb), tu.compAlpha[COMPONENT_Cb] );
+          }
+          if( TU::hasCrossCompPredInfo( tu, COMPONENT_Cr ) )
+          {
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::CompAlphaCr), tu.compAlpha[COMPONENT_Cr] );
+          }
+        }
+
+        if( !(cu.chromaFormat == CHROMA_400 || (cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA)) )
         {
           DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]);
           DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]);
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cb), tu.mtsIdx[COMPONENT_Cb]);
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cr), tu.mtsIdx[COMPONENT_Cr]);
         }
       }
     }
@@ -733,7 +745,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
   for (int ch = 0; ch < maxNumChannelType; ch++)
   {
     const ChannelType chType = ChannelType(ch);
-    const SPS& sps = *cs.sps;
 
     for (const CodingUnit &cu : cs.traverseCUs(CS::getArea(cs, ctuArea, chType), chType))
     {
@@ -746,11 +757,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj), cu.chromaQpAdj);
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp);
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries);
-        // transquant bypass flag
-        if (cs.pps->getTransquantBypassEnabledFlag())
-        {
-          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass);
-        }
         // skip flag
         if (!cs.slice->isIntra() && cu.Y().valid())
         {
@@ -764,13 +770,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
         // prediction mode and partitioning data
         DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode);
 
-        if (CU::isIntra(cu))
-        {
-          if (!(!sps.getPCMEnabledFlag() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize())))
-          {
-            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm);
-          }
-        }
       }
       else if (chType == CHANNEL_TYPE_CHROMA )
       {
@@ -781,19 +780,7 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
         DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj_Chroma), cu.chromaQpAdj);
         DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp);
         DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries);
-        // transquant bypass flag
-        if (cs.pps->getTransquantBypassEnabledFlag())
-        {
-          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass);
-        }
 
-        if (CU::isIntra(cu))
-        {
-          if (!(!sps.getPCMEnabledFlag() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize())))
-          {
-            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm);
-          }
-        }
       }
 
       for (const PredictionUnit &pu : CU::traversePUs(cu))
@@ -806,7 +793,7 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
             {
               DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), PU::getFinalIntraMode(pu, ChannelType(chType)));
             }
-            if (!(pu.chromaFormat == CHROMA_400 || (CS::isDualITree(*pu.cs) && pu.chType == CHANNEL_TYPE_LUMA)))
+            if (!(pu.chromaFormat == CHROMA_400 || (pu.cu->isSepTree() && pu.chType == CHANNEL_TYPE_LUMA)))
             {
               DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA));
             }
@@ -837,12 +824,12 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
               {
                 DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine);
               }
-              if (pu.cs->sps->getUseMHIntra() && !pu.cu->skip && !pu.cu->affine && !(pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE)
+              if (pu.cs->sps->getUseCiip() && !pu.cu->skip && !pu.cu->affine && !(pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE)
                 && !pu.mmvdMergeFlag
                 )
               {
-                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MHIntraFlag), pu.mhIntraFlag);
-                if (pu.mhIntraFlag)
+                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::CiipFlag), pu.ciipFlag);
+                if (pu.ciipFlag)
                 {
                   if (cu.Y().valid())
                   {
@@ -851,11 +838,6 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
                   }
                 }
               }
-              if (cu.cs->slice->getSPS()->getUseTriangle() && cu.cs->slice->isInterB() && cu.lwidth() * cu.lheight() >= TRIANGLE_MIN_SIZE && !cu.affine)
-              {
-                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TriangleFlag), cu.triangle);
-
-              }
             }
             else
             {
@@ -967,9 +949,9 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
             {
               DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv);
             }
-            if (CU::isGBiIdxCoded(cu))
+            if (CU::isBcwIdxCoded(cu))
             {
-              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::GBIIndex), cu.GBiIdx);
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BCWIndex), cu.BcwIdx);
             }
             break;
           }
@@ -995,12 +977,14 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
           if (tu.Y().valid())
           {
             DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]);
-            DTRACE_BLOCK_SCALAR( g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName( BlockStatistic::MTSIdx ), tu.mtsIdx );
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Y), tu.mtsIdx[COMPONENT_Y]);
           }
-          if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA)))
+          if (!(cu.chromaFormat == CHROMA_400 || (cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA)))
           {
             DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]);
             DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]);
+            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cb), tu.mtsIdx[COMPONENT_Cb]);
+            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::MTSIdx_Cr), tu.mtsIdx[COMPONENT_Cr]);
           }
         }
       }
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h
index 56b5adec63f37049fcc7bf518083f0b0c2e40fbf..2df8b9b3af20416ee918590671b48f374c40f1d9 100644
--- a/source/Lib/CommonLib/dtrace_blockstatistics.h
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -63,23 +63,40 @@ enum class BlockStatistic {
   ChromaQPAdj,
   QP,
   SplitSeries,
-  TransQuantBypassFlag,
-  MTSIdx,
+  MTSIdx_Y,
+  MTSIdx_Cb,
+  MTSIdx_Cr,
+  BDPCM,
+  BDPCMChroma,
+  TileIdx,
+  IndependentSliceIdx,
+  LFNSTIdx,
+  JointCbCr,
+  CompAlphaCb,
+  CompAlphaCr,
+  RDPCM_Y,
+  RDPCM_Cb,
+  RDPCM_Cr,
 
   // intra
-  IPCM,
   Luma_IntraMode,
   Chroma_IntraMode,
   MultiRefIdx,
+  MIPFlag,
+  ISPMode,
+
   // inter
   SkipFlag,
   RootCbf,
+  SbtIdx,
+  SbtPos,
   Cbf_Y,
   Cbf_Cb,
   Cbf_Cr,
   IMVMode,
   InterDir,
   MergeFlag,
+  RegularMergeFlag,
   MergeIdx,
   MergeType,
   MVPIdxL0,
@@ -99,13 +116,12 @@ enum class BlockStatistic {
   MMVDSkipFlag,
   MMVDMergeFlag,
   MMVDMergeIdx,
-  MHIntraFlag,
-  TriangleFlag,
+  CiipFlag,
+  SMVDFlag,
   TrianglePartitioning,
   TriangleMVL0, //<< currently only uni-prediction enabled
   TriangleMVL1, //<< currently only uni-prediction enabled
-  GBIIndex,
-  IBCFlag,
+  BCWIndex,
 // for dual tree
   // general
   Depth_Chroma,
@@ -115,10 +131,8 @@ enum class BlockStatistic {
   ChromaQPAdj_Chroma,
   QP_Chroma,
   SplitSeries_Chroma,
-  TransQuantBypassFlag_Chroma,
 
   // intra
-  IPCM_Chroma,
 
   NumBlockStatistics,
 };
@@ -138,15 +152,31 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType
 {
   // Statistics enum                                                                                Statistics name string         Statistic Type                              Type specific information:
   //                                                                                                                                                                           Value range, vector scale
-  { BlockStatistic::PredMode,               std::tuple<std::string, BlockStatisticType, std::string>{"PredMode",                    BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::PredMode,               std::tuple<std::string, BlockStatisticType, std::string>{"PredMode",                    BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_OF_PREDICTION_MODES) + "]"}},
   { BlockStatistic::MergeFlag,              std::tuple<std::string, BlockStatisticType, std::string>{"MergeFlag",                   BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::RegularMergeFlag,       std::tuple<std::string, BlockStatisticType, std::string>{"RegularMergeFlag",            BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::MVL0,                   std::tuple<std::string, BlockStatisticType, std::string>{"MVL0",                        BlockStatisticType::Vector,                 "Scale: 4"}},
   { BlockStatistic::MVL1,                   std::tuple<std::string, BlockStatisticType, std::string>{"MVL1",                        BlockStatisticType::Vector,                 "Scale: 4"}},
-  { BlockStatistic::IPCM,                   std::tuple<std::string, BlockStatisticType, std::string>{"IPCM",                        BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::Luma_IntraMode,         std::tuple<std::string, BlockStatisticType, std::string>{"Luma_IntraMode",              BlockStatisticType::Integer,                "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}},
   { BlockStatistic::Chroma_IntraMode,       std::tuple<std::string, BlockStatisticType, std::string>{"Chroma_IntraMode",            BlockStatisticType::Integer,                "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}},
   { BlockStatistic::SkipFlag,               std::tuple<std::string, BlockStatisticType, std::string>{"SkipFlag",                    BlockStatisticType::Flag,                   ""}},
-  { BlockStatistic::MTSIdx,                 std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Y",         BlockStatisticType::Integer,                ""}},
+  { BlockStatistic::MTSIdx_Y,               std::tuple<std::string, BlockStatisticType, std::string> {"MTS_Y",                      BlockStatisticType::Integer,                ""} },
+  { BlockStatistic::MTSIdx_Cb,               std::tuple<std::string, BlockStatisticType, std::string>{"MTS_Cb",                     BlockStatisticType::Integer,                ""} },
+  { BlockStatistic::MTSIdx_Cr,               std::tuple<std::string, BlockStatisticType, std::string>{"MTS_Cr",                     BlockStatisticType::Integer,                ""} },
+  { BlockStatistic::BDPCM,                  std::tuple<std::string, BlockStatisticType, std::string>{"BDPCM",                       BlockStatisticType::Flag,                   ""}},    // called bdpcmMode, but used like a flag in the software? related to intra, but signalled always?
+  { BlockStatistic::BDPCMChroma,            std::tuple<std::string, BlockStatisticType, std::string>{"BDPCMChroma",                 BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::TileIdx,                std::tuple<std::string, BlockStatisticType, std::string>{"TileIdx",                     BlockStatisticType::Integer,                ""}},
+  { BlockStatistic::IndependentSliceIdx,    std::tuple<std::string, BlockStatisticType, std::string>{"IndependentSliceIdx",         BlockStatisticType::Integer,                ""}},
+  { BlockStatistic::LFNSTIdx,               std::tuple<std::string, BlockStatisticType, std::string>{"LFNSTIdx",                    BlockStatisticType::Integer,                "[0, 3]"}},
+  { BlockStatistic::JointCbCr,              std::tuple<std::string, BlockStatisticType, std::string>{"JointCbCr",                   BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::CompAlphaCb,            std::tuple<std::string, BlockStatisticType, std::string>{"CompAlphaCb",                 BlockStatisticType::Integer,                ""}},
+  { BlockStatistic::CompAlphaCr,            std::tuple<std::string, BlockStatisticType, std::string>{"CompAlphaCr",                 BlockStatisticType::Integer,                ""}},
+  { BlockStatistic::RDPCM_Y,                std::tuple<std::string, BlockStatisticType, std::string>{"RDPCM_Y",                     BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_OF_RDPCM_MODES) + "]"}},
+  { BlockStatistic::RDPCM_Cb,               std::tuple<std::string, BlockStatisticType, std::string>{"RDPCM_Cb",                    BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_OF_RDPCM_MODES) + "]"}},
+  { BlockStatistic::RDPCM_Cr,               std::tuple<std::string, BlockStatisticType, std::string>{"RDPCM_Cr",                    BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_OF_RDPCM_MODES) + "]"}},
+
+  { BlockStatistic::MIPFlag,                std::tuple<std::string, BlockStatisticType, std::string>{"MIPFlag",                     BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::ISPMode,                std::tuple<std::string, BlockStatisticType, std::string>{"ISPMode",                     BlockStatisticType::Integer,                "[0, " + std::to_string(NUM_INTRA_SUBPARTITIONS_MODES) + "]"}},
   { BlockStatistic::Depth,                  std::tuple<std::string, BlockStatisticType, std::string>{"Depth",                       BlockStatisticType::Integer,                "[0, 7]"}},
   { BlockStatistic::QT_Depth,               std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth",                    BlockStatisticType::Integer,                "[0, 7]"}},
   { BlockStatistic::BT_Depth,               std::tuple<std::string, BlockStatisticType, std::string>{"BT_Depth",                    BlockStatisticType::Integer,                "[0, 7]"}},
@@ -155,10 +185,11 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType
   { BlockStatistic::QP,                     std::tuple<std::string, BlockStatisticType, std::string>{"QP",                          BlockStatisticType::Integer,                "[0, 51]"}},
   { BlockStatistic::SplitSeries,            std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries",                 BlockStatisticType::Integer,                "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}},
   { BlockStatistic::RootCbf,                std::tuple<std::string, BlockStatisticType, std::string>{"RootCbf",                     BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::SbtIdx,                 std::tuple<std::string, BlockStatisticType, std::string>{"SbtIdx",                      BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_SBT_IDX) + "]"}},
+  { BlockStatistic::SbtPos,                 std::tuple<std::string, BlockStatisticType, std::string>{"SbtPos",                      BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_SBT_POS) + "]"}},
   { BlockStatistic::Cbf_Y,                  std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Y",                       BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::Cbf_Cb,                 std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cb",                      BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::Cbf_Cr,                 std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cr",                      BlockStatisticType::Flag,                   ""}},
-  { BlockStatistic::TransQuantBypassFlag,   std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag",        BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::MergeIdx,               std::tuple<std::string, BlockStatisticType, std::string>{"MergeIdx",                    BlockStatisticType::Integer,                "[0, 7]"}},
   { BlockStatistic::InterDir,               std::tuple<std::string, BlockStatisticType, std::string>{"InterDir",                    BlockStatisticType::Integer,                "[1, 3]"}},
   { BlockStatistic::MergeType,              std::tuple<std::string, BlockStatisticType, std::string>{"MergeType",                   BlockStatisticType::Integer,                "[0, 2]"}},
@@ -179,13 +210,12 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType
   { BlockStatistic::MMVDSkipFlag,           std::tuple<std::string, BlockStatisticType, std::string>{"MMVDSkipFlag",                BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::MMVDMergeFlag,          std::tuple<std::string, BlockStatisticType, std::string>{"MMVDMergeFlag",               BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::MMVDMergeIdx,           std::tuple<std::string, BlockStatisticType, std::string>{"MMVDMergeIdx",                BlockStatisticType::Integer,                "[0, 1]"}},
-  { BlockStatistic::MHIntraFlag,            std::tuple<std::string, BlockStatisticType, std::string>{"MHIntraFlag",                 BlockStatisticType::Flag,                   ""}},
-  { BlockStatistic::TriangleFlag,           std::tuple<std::string, BlockStatisticType, std::string>{"TriangleFlag",                BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::CiipFlag,            std::tuple<std::string, BlockStatisticType, std::string>{"CiipFlag",                 BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::SMVDFlag,               std::tuple<std::string, BlockStatisticType, std::string>{"SMVDFlag",                    BlockStatisticType::Flag,                   ""}},
   { BlockStatistic::TrianglePartitioning,   std::tuple<std::string, BlockStatisticType, std::string>{"TrianglePartitioning",        BlockStatisticType::Line,                   ""}},
   { BlockStatistic::TriangleMVL0,           std::tuple<std::string, BlockStatisticType, std::string>{"TriangleMVL0",                BlockStatisticType::VectorPolygon,          "Scale: 4"}},
   { BlockStatistic::TriangleMVL1,           std::tuple<std::string, BlockStatisticType, std::string>{"TriangleMVL1",                BlockStatisticType::VectorPolygon,          "Scale: 4"}},
-  { BlockStatistic::GBIIndex,               std::tuple<std::string, BlockStatisticType, std::string>{"GBIIndex",                    BlockStatisticType::Integer,                "[0, 4]"}},
-  { BlockStatistic::IBCFlag,                std::tuple<std::string, BlockStatisticType, std::string>{"IBCFlag",                     BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::BCWIndex,               std::tuple<std::string, BlockStatisticType, std::string>{"BCWIndex",                    BlockStatisticType::Integer,                "[0, 4]"}},
   // for dual tree
   { BlockStatistic::Depth_Chroma,                  std::tuple<std::string, BlockStatisticType, std::string>{"Depth_Chroma",                       BlockStatisticType::Integer,                "[0, 10]"}}, // todo: actual limits?
   { BlockStatistic::QT_Depth_Chroma,               std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth_Chroma",                    BlockStatisticType::Integer,                "[0, 10]"}}, // todo: actual limits?
@@ -194,8 +224,6 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType
   { BlockStatistic::ChromaQPAdj_Chroma,            std::tuple<std::string, BlockStatisticType, std::string>{"ChromaQPAdj_Chroma",                 BlockStatisticType::Integer,                "[-10, 10]"}}, // todo: actual limits?
   { BlockStatistic::QP_Chroma,                     std::tuple<std::string, BlockStatisticType, std::string>{"QP_Chroma",                          BlockStatisticType::Integer,                "[0, 51]"}},
   { BlockStatistic::SplitSeries_Chroma,            std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries_Chroma",                 BlockStatisticType::Integer,                "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}},
-  { BlockStatistic::TransQuantBypassFlag_Chroma,   std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag_Chroma",        BlockStatisticType::Flag,                   ""}},
-  { BlockStatistic::IPCM_Chroma,                   std::tuple<std::string, BlockStatisticType, std::string>{"IPCM_Chroma",                        BlockStatisticType::Flag,                   ""}},
 
 };
 
diff --git a/source/Lib/CommonLib/dtrace_buffer.h b/source/Lib/CommonLib/dtrace_buffer.h
index f5fcbdf61a542990ed952845786472b1e40d6077..afba4a3ced5a5e4e8db2d57faba03a055c53db1d 100644
--- a/source/Lib/CommonLib/dtrace_buffer.h
+++ b/source/Lib/CommonLib/dtrace_buffer.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_codingstruct.h b/source/Lib/CommonLib/dtrace_codingstruct.h
index eafc916e1b9cbfbb4fb646a057cbd8e59f1bc436..656942903eb14621fe0db10cc3a721a465affb0c 100644
--- a/source/Lib/CommonLib/dtrace_codingstruct.h
+++ b/source/Lib/CommonLib/dtrace_codingstruct.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_next.h b/source/Lib/CommonLib/dtrace_next.h
index 95681319b4d76c2b1a02236fd4dd41ec1b905d4b..7ef78cca960ba7d31b793788d4d0510bee803e52 100644
--- a/source/Lib/CommonLib/dtrace_next.h
+++ b/source/Lib/CommonLib/dtrace_next.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/version.h b/source/Lib/CommonLib/version.h
index 39d859b2b00201c0e31e0bc5b97c7fe3e0c7acc0..3ef053dca0c41d0a1edba42c9f4c2a67b976c164 100644
--- a/source/Lib/CommonLib/version.h
+++ b/source/Lib/CommonLib/version.h
@@ -1,3 +1,3 @@
 #if ! defined( VTM_VERSION )
-#define VTM_VERSION "4.1"
+#define VTM_VERSION "7.3"
 #endif
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
index ef368d78f93a25eeccfa57e0481415fda4b588f8..30858585761678f3c87cdbb9c4bcc51b3e572a7b 100644
--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -31,881 +31,653 @@
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/** \file     AdaptiveLoopFilterX86.h
-    \brief    adaptive loop filter class
-*/
 #include "CommonDefX86.h"
 #include "../AdaptiveLoopFilter.h"
 
-//! \ingroup CommonLib
-//! \{
-
 #ifdef TARGET_SIMD_X86
 #if defined _MSC_VER
 #include <tmmintrin.h>
 #else
-#include <immintrin.h>
+#include <x86intrin.h>
 #endif
 
 template<X86_VEXT vext>
-static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift )
+static void simdDeriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS],
+                                        const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const int shift,
+                                        const int vbCTUHeight, int vbPos )
 {
-  const int img_stride = srcLuma.stride;
-  const Pel* srcExt = srcLuma.buf;
+  CHECK((blk.height & 7) != 0, "Block height must be a multiple of 8");
+  CHECK((blk.width & 7) != 0, "Block width must be a multiple of 8");
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
 
-  const int fl = 2;
-  const int flplusOne = fl + 1;
-  const int fl2plusTwo = 2 * fl + 2;
-  const int var_max = 15;
+  const size_t imgStride = srcLuma.stride;
+  const Pel *  srcExt    = srcLuma.buf;
 
-  const int imgHExtended = blk.height + fl2plusTwo;
-  const int imgWExtended = blk.width + fl2plusTwo;
+  const int imgHExtended = blk.height + 4;
+  const int imgWExtended = blk.width + 4;
 
   const int posX = blk.pos().x;
   const int posY = blk.pos().y;
-  const int start_height1 = posY - flplusOne;
 
-  static uint16_t _temp[( AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4 ) >> 1][AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4];
+  // 18x40 array
+  uint16_t colSums[(AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4) >> 1]
+                  [AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 8];
 
-  for( int i = 0; i < imgHExtended - 2; i += 2 )
+  for (int i = 0; i < imgHExtended; i += 2)
   {
-    int yoffset = ( i + 1 + start_height1 ) * img_stride - flplusOne;
-
-    const Pel *p_imgY_pad_down = &srcExt[yoffset - img_stride];
-    const Pel *p_imgY_pad = &srcExt[yoffset];
-    const Pel *p_imgY_pad_up = &srcExt[yoffset + img_stride];
-    const Pel *p_imgY_pad_up2 = &srcExt[yoffset + img_stride * 2];
+    const size_t offset = (i + posY - 3) * imgStride + posX - 3;
 
-    __m128i mmStore = _mm_setzero_si128();
+    const Pel *imgY0 = &srcExt[offset];
+    const Pel *imgY1 = &srcExt[offset + imgStride];
+    const Pel *imgY2 = &srcExt[offset + imgStride * 2];
+    const Pel *imgY3 = &srcExt[offset + imgStride * 3];
 
-    for( int j = 2; j < imgWExtended; j += 8 )
+    // pixel padding for gradient calculation
+    int pos      = blkDst.pos().y - 2 + i;
+    int posInCTU = pos & (vbCTUHeight - 1);
+    if (pos > 0 && posInCTU == vbPos - 2)
     {
-      const int pixY = j - 1 + posX;
-
-      const __m128i* pY = ( __m128i* )( p_imgY_pad + pixY - 1 );
-      const __m128i* pYdown = ( __m128i* )( p_imgY_pad_down + pixY - 1 );
-      const __m128i* pYup = ( __m128i* )( p_imgY_pad_up + pixY - 1 );
-      const __m128i* pYup2 = ( __m128i* )( p_imgY_pad_up2 + pixY - 1 );
-
-      const __m128i* pY_next = ( __m128i* )( p_imgY_pad + pixY + 7 );
-      const __m128i* pYdown_next = ( __m128i* )( p_imgY_pad_down + pixY + 7 );
-      const __m128i* pYup_next = ( __m128i* )( p_imgY_pad_up + pixY + 7 );
-      const __m128i* pYup2_next = ( __m128i* )( p_imgY_pad_up2 + pixY + 7 );
-
-      __m128i xmm0 = _mm_loadu_si128( pYdown );
-      __m128i xmm1 = _mm_loadu_si128( pY );
-      __m128i xmm2 = _mm_loadu_si128( pYup );
-      __m128i xmm3 = _mm_loadu_si128( pYup2 );
-
-      const __m128i xmm0_next = _mm_loadu_si128( pYdown_next );
-      const __m128i xmm1_next = _mm_loadu_si128( pY_next );
-      const __m128i xmm2_next = _mm_loadu_si128( pYup_next );
-      const __m128i xmm3_next = _mm_loadu_si128( pYup2_next );
-
-      __m128i xmm4 = _mm_slli_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), 1 );
-      __m128i xmm5 = _mm_slli_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 2 ), 1 );
-
-      __m128i xmm15 = _mm_setzero_si128();
-
-      //dig0
-      __m128i xmm6 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm0 );
-      xmm6 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm6, xmm15, 0xAA ) );
-      __m128i xmm8 = _mm_add_epi16( _mm_alignr_epi8( xmm3_next, xmm3, 4 ), xmm1 );
-      xmm8 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm8, xmm15, 0x55 ) );
-
-      //dig1
-      __m128i xmm9 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 4 ), xmm2 );
-      xmm9 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm9, xmm15, 0xAA ) );
-      __m128i xmm10 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm3 );
-      xmm10 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm10, xmm15, 0x55 ) );
-
-      //hor
-      __m128i xmm13 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 4 ), xmm1 );
-      xmm13 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm13, xmm15, 0xAA ) );
-      __m128i xmm14 = _mm_add_epi16( _mm_alignr_epi8( xmm2_next, xmm2, 4 ), xmm2 );
-      xmm14 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm14, xmm15, 0x55 ) );
-
-      //ver
-      __m128i xmm11 = _mm_add_epi16( _mm_alignr_epi8( xmm0_next, xmm0, 2 ), _mm_alignr_epi8( xmm2_next, xmm2, 2 ) );
-      xmm11 = _mm_sub_epi16( _mm_blend_epi16 ( xmm4, xmm15, 0xAA ), _mm_blend_epi16 ( xmm11, xmm15, 0xAA ) );
-      __m128i xmm12 = _mm_add_epi16( _mm_alignr_epi8( xmm1_next, xmm1, 2 ), _mm_alignr_epi8( xmm3_next, xmm3, 2 ) );
-      xmm12 = _mm_sub_epi16( _mm_blend_epi16 ( xmm5, xmm15, 0x55 ), _mm_blend_epi16 ( xmm12, xmm15, 0x55 ) );
-
-      xmm6 = _mm_abs_epi16( xmm6 );
-      xmm8 = _mm_abs_epi16( xmm8 );
-      xmm9 = _mm_abs_epi16( xmm9 );
-      xmm10 = _mm_abs_epi16( xmm10 );
-      xmm11 = _mm_abs_epi16( xmm11 );
-      xmm12 = _mm_abs_epi16( xmm12 );
-      xmm13 = _mm_abs_epi16( xmm13 );
-      xmm14 = _mm_abs_epi16( xmm14 );
-
-      xmm6 = _mm_add_epi16( xmm6, xmm8 );
-      xmm9 = _mm_add_epi16( xmm9, xmm10 );
-      xmm11 = _mm_add_epi16( xmm11, xmm12 );
-      xmm13 = _mm_add_epi16( xmm13, xmm14 );
-
-      xmm6 = _mm_add_epi16( xmm6, _mm_srli_si128( xmm6, 2 ) );
-      xmm9 = _mm_add_epi16( xmm9, _mm_slli_si128( xmm9, 2 ) );
-      xmm11 = _mm_add_epi16( xmm11, _mm_srli_si128( xmm11, 2 ) );
-      xmm13 = _mm_add_epi16( xmm13, _mm_slli_si128( xmm13, 2 ) );
-
-      xmm6 = _mm_blend_epi16( xmm6, xmm9, 0xAA );
-      xmm11 = _mm_blend_epi16( xmm11, xmm13, 0xAA );
-
-      xmm6 = _mm_add_epi16( xmm6, _mm_slli_si128( xmm6, 4 ) );
-      xmm11 = _mm_add_epi16( xmm11, _mm_srli_si128( xmm11, 4 ) );
-
-      xmm6 = _mm_blend_epi16( xmm11, xmm6, 0xCC );
-
-      xmm9 = _mm_srli_si128( xmm6, 8 );
-
-      if( j > 2 )
-      {
-        _mm_storel_epi64( ( __m128i* )( &( _temp[i >> 1][j - 2 - 4] ) ), _mm_add_epi16( xmm6, mmStore ) );
-      }
+      imgY3 = imgY2;
+    }
+    else if (pos > 0 && posInCTU == vbPos)
+    {
+      imgY0 = imgY1;
+    }
 
-      xmm6 = _mm_add_epi16( xmm6, xmm9 );  //V H D0 D1
-      _mm_storel_epi64( ( __m128i* )( &( _temp[i >> 1][j - 2] ) ), xmm6 );
+    __m128i prev = _mm_setzero_si128();
 
-      mmStore = xmm9;
+    for (int j = 0; j < imgWExtended; j += 8)
+    {
+      const __m128i x0 = _mm_loadu_si128((const __m128i *) (imgY0 + j));
+      const __m128i x1 = _mm_loadu_si128((const __m128i *) (imgY1 + j));
+      const __m128i x2 = _mm_loadu_si128((const __m128i *) (imgY2 + j));
+      const __m128i x3 = _mm_loadu_si128((const __m128i *) (imgY3 + j));
+
+      const __m128i x4 = _mm_loadu_si128((const __m128i *) (imgY0 + j + 2));
+      const __m128i x5 = _mm_loadu_si128((const __m128i *) (imgY1 + j + 2));
+      const __m128i x6 = _mm_loadu_si128((const __m128i *) (imgY2 + j + 2));
+      const __m128i x7 = _mm_loadu_si128((const __m128i *) (imgY3 + j + 2));
+
+      const __m128i nw = _mm_blend_epi16(x0, x1, 0xaa);
+      const __m128i n  = _mm_blend_epi16(x0, x5, 0x55);
+      const __m128i ne = _mm_blend_epi16(x4, x5, 0xaa);
+      const __m128i w  = _mm_blend_epi16(x1, x2, 0xaa);
+      const __m128i e  = _mm_blend_epi16(x5, x6, 0xaa);
+      const __m128i sw = _mm_blend_epi16(x2, x3, 0xaa);
+      const __m128i s  = _mm_blend_epi16(x2, x7, 0x55);
+      const __m128i se = _mm_blend_epi16(x6, x7, 0xaa);
+
+      __m128i c = _mm_blend_epi16(x1, x6, 0x55);
+      c         = _mm_add_epi16(c, c);
+      __m128i d = _mm_shuffle_epi8(c, _mm_setr_epi8(2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13));
+
+      const __m128i ver = _mm_abs_epi16(_mm_sub_epi16(c, _mm_add_epi16(n, s)));
+      const __m128i hor = _mm_abs_epi16(_mm_sub_epi16(d, _mm_add_epi16(w, e)));
+      const __m128i di0 = _mm_abs_epi16(_mm_sub_epi16(d, _mm_add_epi16(nw, se)));
+      const __m128i di1 = _mm_abs_epi16(_mm_sub_epi16(d, _mm_add_epi16(ne, sw)));
+
+      const __m128i hv  = _mm_hadd_epi16(ver, hor);
+      const __m128i di  = _mm_hadd_epi16(di0, di1);
+      const __m128i all = _mm_hadd_epi16(hv, di);
+
+      const __m128i t = _mm_blend_epi16(all, prev, 0xaa);
+      _mm_storeu_si128((__m128i *) &colSums[i >> 1][j], _mm_hadd_epi16(t, all));
+      prev = all;
     }
   }
 
-  //const int offset = 8 << NO_VALS_LAGR_SHIFT;
-
-  const __m128i mm_0 = _mm_setzero_si128();
-  const __m128i mm_15 = _mm_set1_epi64x( 0x000000000000000F );
-  const __m128i mm_th = _mm_set1_epi64x( 0x4333333332222210 );
-
-  const __m128i xmm14 = _mm_set1_epi32( 1 ); //offset
-  const __m128i xmm13 = _mm_set1_epi32( var_max );
-
-  for( int i = 0; i < ( blk.height >> 1 ); i += 2 )
+  for (int i = 0; i < (blk.height >> 1); i += 4)
   {
-    for( int j = 0; j < blk.width; j += 8 )
+    for (int j = 0; j < blk.width; j += 8)
     {
-      __m128i xmm0 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 0][j] ) ) );
-      __m128i xmm1 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 1][j] ) ) );
-      __m128i xmm2 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 2][j] ) ) );
-      __m128i xmm3 = _mm_loadu_si128( ( __m128i* )( &( _temp[i + 3][j] ) ) );
-
-      __m128i xmm4 = _mm_add_epi16( xmm0, xmm1 );
-      __m128i xmm6 = _mm_add_epi16( xmm2, xmm3 );
-
-      xmm0 = _mm_unpackhi_epi16( xmm4, mm_0 );
-      xmm2 = _mm_unpackhi_epi16( xmm6, mm_0 );
-      xmm0 = _mm_add_epi32( xmm0, xmm2 );
-
-      xmm4 = _mm_unpacklo_epi16( xmm4, mm_0 );
-      xmm6 = _mm_unpacklo_epi16( xmm6, mm_0 );
-      xmm4 = _mm_add_epi32( xmm4, xmm6 );
-
-      __m128i xmm12 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 );
-      __m128i xmm10 = _mm_shuffle_epi32( xmm12, 0xB1 );
-      xmm12 = _mm_add_epi32( xmm10, xmm12 );
-      xmm12 = _mm_srai_epi32( xmm12, shift - 6 );
-      xmm12 = _mm_min_epi32( xmm12, xmm13 );
-
-      xmm12 = _mm_and_si128( xmm12, mm_15 );
-      xmm12 = _mm_slli_epi32( xmm12, 2 );
-      __m128i xmm11 = _mm_shuffle_epi32( xmm12, 0x0E ); //extracted from second half coz no different shifts are available
-      xmm12 = _mm_srl_epi64( mm_th, xmm12 );
-      xmm11 = _mm_srl_epi64( mm_th, xmm11 );
-      xmm12 = _mm_blend_epi16( xmm12, xmm11, 0xF0 );
-      xmm12 = _mm_and_si128( xmm12, mm_15 ); // avg_var in lower 4 bits of both halves
-
-      xmm6 = _mm_shuffle_epi32( xmm4, 0xB1 );
-      xmm2 = _mm_shuffle_epi32( xmm0, 0xB1 );
-
-      __m128i xmm7 = _mm_set_epi32( 0, 2, 1, 3 );
-      __m128i xmm9 = _mm_shuffle_epi32( xmm7, 0xB1 );
-
-      __m128i xmm5 = _mm_cmplt_epi32( xmm6, xmm4 );
-      __m128i xmm8 = _mm_cmplt_epi32( xmm2, xmm0 ); //2 masks coz 4 integers for every parts are compared
-
-      xmm5 = _mm_shuffle_epi32( xmm5, 0xA0 );
-      xmm8 = _mm_shuffle_epi32( xmm8, 0xA0 );
-
-      xmm4 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm4 ), _mm_and_si128( xmm5, xmm6 ) ); //HV + D
-      xmm0 = _mm_or_si128( _mm_andnot_si128( xmm8, xmm0 ), _mm_and_si128( xmm8, xmm2 ) ); //HV + D <--second part
-
-      xmm10 = _mm_or_si128( _mm_andnot_si128( xmm8, xmm7 ), _mm_and_si128( xmm8, xmm9 ) ); //dirTemp <-- second part
-      xmm7 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm7 ), _mm_and_si128( xmm5, xmm9 ) ); //dirTemp
-
-      xmm3 = _mm_shuffle_epi32( xmm0, 0x1B );  // need higher part from this
-      xmm6 = _mm_shuffle_epi32( xmm4, 0x1B );
-      xmm8 = _mm_blend_epi16( xmm4, xmm3, 0xF0 ); // 0 or 3
-      xmm6 = _mm_blend_epi16( xmm6, xmm0, 0xF0 );
-
-      xmm6 = _mm_mullo_epi32( xmm8, xmm6 );
-      xmm9 = _mm_shuffle_epi32( xmm6, 0xB1 );
-      xmm5 = _mm_cmpgt_epi32( xmm6, xmm9 );
-      xmm5 = _mm_shuffle_epi32( xmm5, 0xF0 ); //second mask is for all upper part
-
-      xmm8 = _mm_shuffle_epi32( xmm4, 0x0E );
-      xmm8 = _mm_blend_epi16( xmm8, xmm0, 0xF0 ); // (DL, DH in upepr part)
-      xmm4 = _mm_blend_epi16( xmm4, _mm_shuffle_epi32( xmm0, 0x40 ), 0xF0 ); //(HVL, HVH) in upper part
-
-      xmm7 = _mm_shuffle_epi32( xmm7, 0x08 ); // 2 -> 1
-      xmm7 = _mm_blend_epi16( xmm7, _mm_shuffle_epi32( xmm10, 0x80 ), 0xF0 );
-      xmm1 = _mm_shuffle_epi32( xmm7, 0xB1 ); // 1 -> 0, 0 -> 1
-
-      xmm4 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm4 ), _mm_and_si128( xmm5, xmm8 ) ); //HV_D
-      xmm7 = _mm_or_si128( _mm_andnot_si128( xmm5, xmm7 ), _mm_and_si128( xmm5, xmm1 ) ); //main - secondary (upper halves are for second value)
-
-                                                                                          //xmm7 not to mix
-
-      xmm0 = _mm_shuffle_epi32( xmm4, 0xFA );
-      xmm4 = _mm_shuffle_epi32( xmm4, 0x50 ); //low, low, high, high
-      xmm6 = _mm_set_epi32( 2, 1, 9, 2 );
-
-      xmm2 = _mm_mullo_epi32( xmm0, xmm6 );
-      xmm6 = _mm_mullo_epi32( xmm4, xmm6 );
-      xmm4 = _mm_shuffle_epi32( xmm6, 0x4E );
-      xmm0 = _mm_shuffle_epi32( xmm2, 0x4E ); //p to xmm6
-      xmm6 = _mm_blend_epi16( xmm6, xmm0, 0xF0 );
-      xmm4 = _mm_blend_epi16( xmm4, xmm2, 0xF0 );
-
-      xmm5 = _mm_cmpgt_epi32( xmm4, xmm6 );
-      xmm4 = _mm_and_si128( xmm5, xmm14 ); // 1 + 1
-
-      xmm8 = _mm_and_si128( xmm7, xmm14 );
-      xmm8 = _mm_slli_epi32( xmm8, 1 );
-
-      xmm5 = _mm_add_epi32( xmm4, _mm_shuffle_epi32( xmm4, 0xB1 ) ); //directionStrength
-      xmm4 = _mm_cmpgt_epi32( xmm5, mm_0 ); //is a mask now
-      xmm4 = _mm_and_si128( _mm_add_epi32( xmm8, xmm5 ), xmm4 );
-
-      xmm4 = _mm_add_epi32( xmm4, _mm_slli_epi32( xmm4, 2 ) ); //x5
-      xmm4 = _mm_add_epi32( xmm4, xmm12 ); //+=
-
-      xmm9 = _mm_shuffle_epi32( xmm7, 0xB1 );// <--
-      xmm7 = _mm_slli_epi32( xmm7, 1 );
-      xmm9 = _mm_srai_epi32( xmm9, 1 );
-      xmm7 = _mm_add_epi32( xmm7, xmm9 );
-
-      //to write to struct
-      const int t0 = _mm_extract_epi32( xmm7, 0 );
-      const int t1 = _mm_extract_epi32( xmm7, 2 );
-      const int c0 = _mm_extract_epi32( xmm4, 0 );
-      const int c1 = _mm_extract_epi32( xmm4, 2 );
-
-      const int transposeTable[8] = { 0, 1, 0, 2, 2, 3, 1, 3 };
-      int transposeIdx0 = transposeTable[t0];
-      int transposeIdx1 = transposeTable[t1];
-      int classIdx0 = c0;
-      int classIdx1 = c1;
-
-      const int yOffset = ( i << 1 ) + posY;
-      const int xOffset = j + posX;
-
-      AlfClassifier *cl0 = classifier[yOffset] + xOffset;
-      AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset;
-      AlfClassifier *cl2 = classifier[yOffset + 2] + xOffset;
-      AlfClassifier *cl3 = classifier[yOffset + 3] + xOffset;
-
-      AlfClassifier *_cl0 = cl0 + 4;
-      AlfClassifier *_cl1 = cl1 + 4;
-      AlfClassifier *_cl2 = cl2 + 4;
-      AlfClassifier *_cl3 = cl3 + 4;
-
-      cl0[0] = cl0[1] = cl0[2] = cl0[3] = cl1[0] = cl1[1] = cl1[2] = cl1[3] = cl2[0] = cl2[1] = cl2[2] = cl2[3] = cl3[0] = cl3[1] = cl3[2] = cl3[3] = AlfClassifier( classIdx0, transposeIdx0 );
-      _cl0[0] = _cl0[1] = _cl0[2] = _cl0[3] = _cl1[0] = _cl1[1] = _cl1[2] = _cl1[3] = _cl2[0] = _cl2[1] = _cl2[2] = _cl2[3] = _cl3[0] = _cl3[1] = _cl3[2] = _cl3[3] = AlfClassifier( classIdx1, transposeIdx1 );
+      __m128i x0, x1, x2, x3, x4, x5, x6, x7;
+
+      const uint32_t z = (2 * i + blkDst.pos().y) & (vbCTUHeight - 1);
+      const uint32_t z2 = (2 * i + 4 + blkDst.pos().y) & (vbCTUHeight - 1);
+
+      x0 = (z == vbPos) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 0][j + 4]);
+      x1 = _mm_loadu_si128((__m128i *) &colSums[i + 1][j + 4]);
+      x2 = _mm_loadu_si128((__m128i *) &colSums[i + 2][j + 4]);
+      x3 = (z == vbPos - 4) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 3][j + 4]);
+
+      x4 = (z2 == vbPos) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 2][j + 4]);
+      x5 = _mm_loadu_si128((__m128i *) &colSums[i + 3][j + 4]);
+      x6 = _mm_loadu_si128((__m128i *) &colSums[i + 4][j + 4]);
+      x7 = (z2 == vbPos - 4) ? _mm_setzero_si128() : _mm_loadu_si128((__m128i *) &colSums[i + 5][j + 4]);
+
+      __m128i x0l = _mm_cvtepu16_epi32(x0);
+      __m128i x0h = _mm_unpackhi_epi16(x0, _mm_setzero_si128());
+      __m128i x1l = _mm_cvtepu16_epi32(x1);
+      __m128i x1h = _mm_unpackhi_epi16(x1, _mm_setzero_si128());
+      __m128i x2l = _mm_cvtepu16_epi32(x2);
+      __m128i x2h = _mm_unpackhi_epi16(x2, _mm_setzero_si128());
+      __m128i x3l = _mm_cvtepu16_epi32(x3);
+      __m128i x3h = _mm_unpackhi_epi16(x3, _mm_setzero_si128());
+      __m128i x4l = _mm_cvtepu16_epi32(x4);
+      __m128i x4h = _mm_unpackhi_epi16(x4, _mm_setzero_si128());
+      __m128i x5l = _mm_cvtepu16_epi32(x5);
+      __m128i x5h = _mm_unpackhi_epi16(x5, _mm_setzero_si128());
+      __m128i x6l = _mm_cvtepu16_epi32(x6);
+      __m128i x6h = _mm_unpackhi_epi16(x6, _mm_setzero_si128());
+      __m128i x7l = _mm_cvtepu16_epi32(x7);
+      __m128i x7h = _mm_unpackhi_epi16(x7, _mm_setzero_si128());
+
+      x0l = _mm_add_epi32(x0l, x1l);
+      x2l = _mm_add_epi32(x2l, x3l);
+      x4l = _mm_add_epi32(x4l, x5l);
+      x6l = _mm_add_epi32(x6l, x7l);
+      x0h = _mm_add_epi32(x0h, x1h);
+      x2h = _mm_add_epi32(x2h, x3h);
+      x4h = _mm_add_epi32(x4h, x5h);
+      x6h = _mm_add_epi32(x6h, x7h);
+
+      x0l = _mm_add_epi32(x0l, x2l);
+      x4l = _mm_add_epi32(x4l, x6l);
+      x0h = _mm_add_epi32(x0h, x2h);
+      x4h = _mm_add_epi32(x4h, x6h);
+
+      x2l = _mm_unpacklo_epi32(x0l, x4l);
+      x2h = _mm_unpackhi_epi32(x0l, x4l);
+      x6l = _mm_unpacklo_epi32(x0h, x4h);
+      x6h = _mm_unpackhi_epi32(x0h, x4h);
+
+      __m128i sumV  = _mm_unpacklo_epi32(x2l, x6l);
+      __m128i sumH  = _mm_unpackhi_epi32(x2l, x6l);
+      __m128i sumD0 = _mm_unpacklo_epi32(x2h, x6h);
+      __m128i sumD1 = _mm_unpackhi_epi32(x2h, x6h);
+
+      //      uint32_t tempAct = sumV + sumH;
+      __m128i tempAct = _mm_add_epi32(sumV, sumH);
+
+      //      const uint32_t activity = std::min<uint32_t>(15, tempAct * scale >> shift);
+      //      static const uint8_t th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
+      //      uint8_t classIdx = th[activity];
+      const uint32_t scale  = (z == vbPos - 4 || z == vbPos) ? 96 : 64;
+      const uint32_t scale2 = (z2 == vbPos - 4 || z2 == vbPos) ? 96 : 64;
+      __m128i activity = _mm_mullo_epi32(tempAct, _mm_unpacklo_epi64(_mm_set1_epi32(scale), _mm_set1_epi32(scale2)));
+      activity         = _mm_srl_epi32(activity, _mm_cvtsi32_si128(shift));
+      activity         = _mm_min_epi32(activity, _mm_set1_epi32(15));
+      __m128i classIdx = _mm_shuffle_epi8(_mm_setr_epi8(0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4), activity);
+
+      //      if (sumV > sumH)
+      //      {
+      //        hv1       = sumV;
+      //        hv0       = sumH;
+      //        dirTempHV = 0;
+      //      }
+      //      else
+      //      {
+      //        hv1       = sumH;
+      //        hv0       = sumV;
+      //        dirTempHV = 1;
+      //      }
+      __m128i dirTempHVMinus1 = _mm_cmpgt_epi32(sumV, sumH);
+      __m128i hv1             = _mm_max_epi32(sumV, sumH);
+      __m128i hv0             = _mm_min_epi32(sumV, sumH);
+
+      //      if (sumD0 > sumD1)
+      //      {
+      //        d1       = sumD0;
+      //        d0       = sumD1;
+      //        dirTempD = 0;
+      //      }
+      //      else
+      //      {
+      //        d1       = sumD1;
+      //        d0       = sumD0;
+      //        dirTempD = 1;
+      //      }
+      __m128i dirTempDMinus1 = _mm_cmpgt_epi32(sumD0, sumD1);
+      __m128i d1             = _mm_max_epi32(sumD0, sumD1);
+      __m128i d0             = _mm_min_epi32(sumD0, sumD1);
+
+      //      int dirIdx;
+      //      if (d1 * hv0 > hv1 * d0)
+      //      {
+      //        hvd1   = d1;
+      //        hvd0   = d0;
+      //        dirIdx = 0;
+      //      }
+      //      else
+      //      {
+      //        hvd1   = hv1;
+      //        hvd0   = hv0;
+      //        dirIdx = 2;
+      //      }
+      __m128i a      = _mm_xor_si128(_mm_mullo_epi32(d1, hv0), _mm_set1_epi32(0x80000000));
+      __m128i b      = _mm_xor_si128(_mm_mullo_epi32(hv1, d0), _mm_set1_epi32(0x80000000));
+      __m128i dirIdx = _mm_cmpgt_epi32(a, b);
+      __m128i hvd1   = _mm_blendv_epi8(hv1, d1, dirIdx);
+      __m128i hvd0   = _mm_blendv_epi8(hv0, d0, dirIdx);
+
+      //      if (hvd1 * 2 > 9 * hvd0)
+      //      {
+      //        classIdx += (dirIdx + 2) * 5;
+      //      }
+      //      else if (hvd1 > 2 * hvd0)
+      //      {
+      //        classIdx += (dirIdx + 1) * 5;
+      //      }
+      __m128i strength1 = _mm_cmpgt_epi32(hvd1, _mm_add_epi32(hvd0, hvd0));
+      __m128i strength2 = _mm_cmpgt_epi32(_mm_add_epi32(hvd1, hvd1), _mm_add_epi32(hvd0, _mm_slli_epi32(hvd0, 3)));
+      __m128i offset    = _mm_and_si128(strength1, _mm_set1_epi32(5));
+      classIdx          = _mm_add_epi32(classIdx, offset);
+      classIdx          = _mm_add_epi32(classIdx, _mm_and_si128(strength2, _mm_set1_epi32(5)));
+      offset            = _mm_andnot_si128(dirIdx, offset);
+      offset            = _mm_add_epi32(offset, offset);
+      classIdx          = _mm_add_epi32(classIdx, offset);
+
+      //      uint8_t transposeIdx = 2 * dirTempD + dirTempHV;
+      __m128i transposeIdx = _mm_set1_epi32(3);
+      transposeIdx         = _mm_add_epi32(transposeIdx, dirTempHVMinus1);
+      transposeIdx         = _mm_add_epi32(transposeIdx, dirTempDMinus1);
+      transposeIdx         = _mm_add_epi32(transposeIdx, dirTempDMinus1);
+
+      int yOffset = 2 * i + blkDst.pos().y;
+      int xOffset = j + blkDst.pos().x;
+
+      static_assert(sizeof(AlfClassifier) == 2, "ALFClassifier type must be 16 bits wide");
+      __m128i v;
+      v = _mm_unpacklo_epi8(classIdx, transposeIdx);
+      v = _mm_shuffle_epi8(v, _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9));
+      _mm_storeu_si128((__m128i *) (classifier[yOffset] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 1] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 2] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 3] + xOffset), v);
+      v = _mm_unpackhi_epi8(classIdx, transposeIdx);
+      v = _mm_shuffle_epi8(v, _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9));
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 4] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 5] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 6] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 7] + xOffset), v);
     }
   }
 }
 
 template<X86_VEXT vext>
-static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
-{
-  static const unsigned char mask05[16] = { 8, 9, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-  static const unsigned char mask03[16] = { 4, 5, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-  static const unsigned char mask_c[16] = { 0, 1, 8, 9, 4, 5, 14, 15, 2, 3, 10, 11, 12, 13, 6, 7 };
-
-  const bool bChroma = isChroma( compId );
-
-  const SPS*     sps = cs.slice->getSPS();
-  bool isDualTree = CS::isDualITree(cs);
-  bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag();
-  ChromaFormat nChromaFormat = sps->getChromaFormatIdc();
+static void simdFilter5x5Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+  const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+  const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+  int vbPos)
 
-  const CPelBuf srcLuma = recSrc.get( compId );
-  PelBuf dstLuma = recDst.get( compId );
-
-  const int srcStride = srcLuma.stride;
-  const int dstStride = dstLuma.stride;
-
-  const Pel* srcExt = srcLuma.buf;
-  Pel* dst = dstLuma.buf;
-
-  const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5;
-
-  short *coef = filterSet;
-  const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5;
-
-  const int numBitsMinus1 = AdaptiveLoopFilter::m_NUM_BITS - 1;
-  const int offset = ( 1 << ( AdaptiveLoopFilter::m_NUM_BITS - 2 ) );
-
-  const int startHeight = blk.y;
-  const int endHeight = blk.y + blk.height;
-  const int startWidth = blk.x;
-  const int endWidth = blk.x + blk.width;
+{
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+  CHECK(!isChroma(compId), "ALF 5x5 filter is for chroma only");
 
-  Pel* imgYRecPost = dst;
-  imgYRecPost += startHeight * dstStride;
 
-  int transposeIdx = 0;
+  const CPelBuf srcBuffer = recSrc.get(compId);
+  PelBuf        dstBuffer = recDst.get(compId);
 
-  const int clsSizeY = 4;
-  const int clsSizeX = 4;
+  const size_t srcStride = srcBuffer.stride;
+  const size_t dstStride = dstBuffer.stride;
 
-  bool pcmFlags2x2[4] = {0,0,0,0};
-  Pel  pcmRec2x2[16];
+  constexpr int SHIFT = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int ROUND = 1 << (SHIFT - 1);
 
-  CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" );
-  CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" );
-  CHECK( ( endHeight - startHeight ) % clsSizeY, "Wrong endHeight in filtering" );
-  CHECK( ( endWidth - startWidth ) % clsSizeX, "Wrong endWidth in filtering" );
+  const size_t width  = blk.width;
+  const size_t height = blk.height;
 
-  const Pel* imgYRec = srcExt;
+  constexpr size_t STEP_X = 8;
+  constexpr size_t STEP_Y = 4;
 
-  Pel *pRec;
-  AlfClassifier *pClass = nullptr;
+  CHECK(blk.y % STEP_Y, "Wrong startHeight in filtering");
+  CHECK(blk.x % STEP_X, "Wrong startWidth in filtering");
+  CHECK(height % STEP_Y, "Wrong endHeight in filtering");
+  CHECK(width % 4, "Wrong endWidth in filtering");
 
-  int srcStride2 = srcStride * clsSizeY;
+  const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
 
-  const __m128i mmOffset = _mm_set1_epi32( offset );
-  const __m128i mmMin = _mm_set1_epi32( clpRng.min );
-  const __m128i mmMax = _mm_set1_epi32( clpRng.max );
 
-  const __m128i xmm10 = _mm_loadu_si128( ( __m128i* )mask03 );
-  const __m128i mm_mask05 = _mm_loadu_si128( ( __m128i* )mask05 );
 
-  pImgYPad0 = imgYRec + startHeight * srcStride + startWidth;
-  pImgYPad1 = pImgYPad0 + srcStride;
-  pImgYPad2 = pImgYPad0 - srcStride;
-  pImgYPad3 = pImgYPad1 + srcStride;
-  pImgYPad4 = pImgYPad2 - srcStride;
-  pImgYPad5 = pImgYPad3 + srcStride;
+  const __m128i mmOffset = _mm_set1_epi32(ROUND);
+  const __m128i mmMin = _mm_set1_epi16( clpRng.min );
+  const __m128i mmMax = _mm_set1_epi16( clpRng.max );
 
-  pRec = imgYRecPost + startWidth;
+  __m128i params[2][3];
+  __m128i fs   = _mm_loadu_si128((__m128i *) filterSet);
+  params[0][0] = _mm_shuffle_epi32(fs, 0x00);
+  params[0][1] = _mm_shuffle_epi32(fs, 0x55);
+  params[0][2] = _mm_shuffle_epi32(fs, 0xaa);
+  __m128i fc   = _mm_loadu_si128((__m128i *) fClipSet);
+  params[1][0] = _mm_shuffle_epi32(fc, 0x00);
+  params[1][1] = _mm_shuffle_epi32(fc, 0x55);
+  params[1][2] = _mm_shuffle_epi32(fc, 0xaa);
 
-  for( int i = 0; i < endHeight - startHeight; i += 4 )
+  for (size_t i = 0; i < height; i += STEP_Y)
   {
-    pRec = imgYRecPost + startWidth + i * dstStride;
-
-    if( !bChroma )
+    for (size_t j = 0; j < width; j += STEP_X)
     {
-      pClass = classifier[startHeight + i] + startWidth;
-    }
 
-    for( int j = 0; j < endWidth - startWidth; j += 4 )
-    {
-      if( !bChroma )
+      for (size_t ii = 0; ii < STEP_Y; ii++)
       {
-        AlfClassifier& cl = pClass[j];
-        transposeIdx = cl.transposeIdx;
-        if( isPCMFilterDisabled && cl.classIdx == AdaptiveLoopFilter::m_ALF_UNUSED_CLASSIDX && transposeIdx == AdaptiveLoopFilter::m_ALF_UNUSED_TRANSPOSIDX )
+        const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4;
+
+        pImg0 = src + j + ii * srcStride;
+        pImg1 = pImg0 + srcStride;
+        pImg2 = pImg0 - srcStride;
+        pImg3 = pImg1 + srcStride;
+        pImg4 = pImg2 - srcStride;
+
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - 2))   // above
         {
-          pRec += 4;
-          continue;
+          pImg1 = (yVb == vbPos - 1) ? pImg0 : pImg1;
+          pImg3 = (yVb >= vbPos - 2) ? pImg1 : pImg3;
+
+          pImg2 = (yVb == vbPos - 1) ? pImg0 : pImg2;
+          pImg4 = (yVb >= vbPos - 2) ? pImg2 : pImg4;
         }
-        coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
-      }
-      else if ( isPCMFilterDisabled )
-      {
-        int  blkX, blkY;
-        bool *flags  = pcmFlags2x2;
-        Pel  *pcmRec = pcmRec2x2;
+        else if (yVb >= vbPos && (yVb <= vbPos + 1))   // bottom
+        {
+          pImg2 = (yVb == vbPos) ? pImg0 : pImg2;
+          pImg4 = (yVb <= vbPos + 1) ? pImg2 : pImg4;
 
-        // check which chroma 2x2 blocks use PCM
-        // chroma PCM may not be aligned with 4x4 ALF processing grid
-        for( blkY=0; blkY<4; blkY+=2 )
+          pImg1 = (yVb == vbPos) ? pImg0 : pImg1;
+          pImg3 = (yVb <= vbPos + 1) ? pImg1 : pImg3;
+        }
+        __m128i cur = _mm_loadu_si128((const __m128i *) pImg0);
+        __m128i accumA = mmOffset;
+        __m128i accumB = mmOffset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) {
+          const __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr0), cur);
+          const __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr2), cur);
+          const __m128i val01 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr1), cur);
+          const __m128i val11 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr3), cur);
+          __m128i val01A = _mm_unpacklo_epi16(val00, val10);
+          __m128i val01B = _mm_unpackhi_epi16(val00, val10);
+          __m128i val01C = _mm_unpacklo_epi16(val01, val11);
+          __m128i val01D = _mm_unpackhi_epi16(val01, val11);
+
+          __m128i limit01A = params[1][i];
+
+          val01A = _mm_min_epi16(val01A, limit01A);
+          val01B = _mm_min_epi16(val01B, limit01A);
+          val01C = _mm_min_epi16(val01C, limit01A);
+          val01D = _mm_min_epi16(val01D, limit01A);
+
+          limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
+
+          val01A = _mm_max_epi16(val01A, limit01A);
+          val01B = _mm_max_epi16(val01B, limit01A);
+          val01C = _mm_max_epi16(val01C, limit01A);
+          val01D = _mm_max_epi16(val01D, limit01A);
+
+          val01A = _mm_add_epi16(val01A, val01C);
+          val01B = _mm_add_epi16(val01B, val01D);
+
+          __m128i coeff01A = params[0][i];
+
+          accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
+          accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01A));
+        };
+
+        process2coeffs(0, pImg3 + 0, pImg4 + 0, pImg1 + 1, pImg2 - 1);
+        process2coeffs(1, pImg1 + 0, pImg2 + 0, pImg1 - 1, pImg2 + 1);
+        process2coeffs(2, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+#if JVET_Q0150
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+        if (!(isNearVBabove || isNearVBbelow))
         {
-          for( blkX=0; blkX<4; blkX+=2 )
-          {
-            Position pos(j+startWidth+blkX, i+startHeight+blkY);
-            CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L);
-            *flags++ = cu->ipcm ? 1 : 0;
-
-            // save original samples from 2x2 PCM blocks
-            if( cu->ipcm )
-            {
-              *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+0)];
-              *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+1)];
-              *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+0)];
-              *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+1)];
-            }
-          }
+          accumA = _mm_srai_epi32(accumA, SHIFT);
+          accumB = _mm_srai_epi32(accumB, SHIFT);
         }
-
-        // skip entire 4x4 if all chroma 2x2 blocks use PCM
-        if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] )
+        else
         {
-          pRec += 4;
-          continue;
+          accumA = _mm_srai_epi32(accumA, SHIFT + 3);
+          accumB = _mm_srai_epi32(accumB, SHIFT + 3);
         }
-      }
-
-      __m128i c0, t0 = _mm_setzero_si128();
-
-      c0 = _mm_loadu_si128( ( __m128i* )( coef + 0 ) );
-      c0 = _mm_alignr_epi8( c0, c0, 2 );
-      c0 = _mm_blend_epi16( c0, t0, 0x40 );
-
-      if( transposeIdx & 1 )
-      {
-        c0 = _mm_shuffle_epi8( c0, _mm_loadu_si128( ( __m128i* )mask_c ) );
-      }
-
-      if( transposeIdx == 0 || transposeIdx == 1 )
-      {
-        c0 = _mm_shuffle_epi8( c0, xmm10 );
-      }
-
-      pImg0 = pImgYPad0 + j;
-      pImg1 = pImgYPad1 + j;
-      pImg2 = pImgYPad2 + j;
-      pImg3 = pImgYPad3 + j;
-      pImg4 = pImgYPad4 + j;
-      pImg5 = pImgYPad5 + j;
-
-      for( int k = 0; k < 4; k++ )
-      {
-        __m128i xmm4 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 ) );
-        __m128i xmm2 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 1 ) );
-        __m128i xmm0 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 2 ) );
-        __m128i xmm1 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 1 - 1 ) );
-        __m128i xmm3 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 - 0 - 2 ) );
-
-        __m128i xmm7 = _mm_setzero_si128();
-
-        __m128i xmm6 = _mm_shuffle_epi8( xmm0, mm_mask05 );
-        __m128i xmm8 = _mm_shuffle_epi8( _mm_srli_si128( xmm0, 2 ), mm_mask05 );
-        __m128i xmm9 = _mm_shuffle_epi8( _mm_srli_si128( xmm0, 4 ), mm_mask05 );
-        __m128i xmm11 = _mm_shuffle_epi8( _mm_srli_si128( xmm0, 6 ), mm_mask05 );
-
-        xmm6 = _mm_blend_epi16( xmm7, xmm6, 0x03 );
-        xmm8 = _mm_blend_epi16( xmm7, xmm8, 0x03 );
-        xmm9 = _mm_blend_epi16( xmm7, xmm9, 0x03 );
-        xmm11 = _mm_blend_epi16( xmm7, xmm11, 0x03 );
-
-        xmm6 = _mm_add_epi16( xmm6, xmm0 );
-        xmm8 = _mm_add_epi16( xmm8, _mm_srli_si128( xmm0, 2 ) );
-        xmm9 = _mm_add_epi16( xmm9, _mm_srli_si128( xmm0, 4 ) );
-        xmm11 = _mm_add_epi16( xmm11, _mm_srli_si128( xmm0, 6 ) );
-
-        xmm6 = _mm_slli_si128( xmm6, 6 );
-        xmm8 = _mm_slli_si128( xmm8, 6 );
-        xmm9 = _mm_slli_si128( xmm9, 6 );
-        xmm11 = _mm_slli_si128( xmm11, 6 );
-
-        xmm4 = _mm_add_epi16( xmm4, _mm_srli_si128( xmm3, 4 ) );
-        xmm6 = _mm_blend_epi16( xmm6, _mm_slli_si128( xmm4, 14 ), 0x80 );
-        xmm8 = _mm_blend_epi16( xmm8, _mm_slli_si128( xmm4, 12 ), 0x80 );
-        xmm9 = _mm_blend_epi16( xmm9, _mm_slli_si128( xmm4, 10 ), 0x80 );
-        xmm11 = _mm_blend_epi16( xmm11, _mm_slli_si128( xmm4, 8 ), 0x80 );
-
-        __m128i xmm12 = _mm_shuffle_epi8( xmm2, xmm10 );
-        __m128i xmm13 = _mm_shuffle_epi8( _mm_srli_si128( xmm2, 2 ), xmm10 );
-        __m128i xmm14 = _mm_shuffle_epi8( _mm_srli_si128( xmm2, 4 ), xmm10 );
-        __m128i xmm15 = _mm_shuffle_epi8( _mm_srli_si128( xmm2, 6 ), xmm10 );
-
-        xmm12 = _mm_add_epi16( xmm12, _mm_srli_si128( xmm1, 2 ) );
-        xmm13 = _mm_add_epi16( xmm13, _mm_srli_si128( xmm1, 4 ) );
-        xmm14 = _mm_add_epi16( xmm14, _mm_srli_si128( xmm1, 6 ) );
-        xmm15 = _mm_add_epi16( xmm15, _mm_srli_si128( xmm1, 8 ) );
-
-        xmm6 = _mm_blend_epi16( xmm6, xmm12, 0x07 );
-        xmm8 = _mm_blend_epi16( xmm8, xmm13, 0x07 );
-        xmm9 = _mm_blend_epi16( xmm9, xmm14, 0x07 );
-        xmm11 = _mm_blend_epi16( xmm11, xmm15, 0x07 );
-
-        xmm6 = _mm_madd_epi16( xmm6, c0 );
-        xmm8 = _mm_madd_epi16( xmm8, c0 );
-        xmm9 = _mm_madd_epi16( xmm9, c0 );
-        xmm11 = _mm_madd_epi16( xmm11, c0 );
-
-        xmm12 = _mm_shuffle_epi32( xmm6, 0x1B );
-        xmm13 = _mm_shuffle_epi32( xmm8, 0x1B );
-        xmm14 = _mm_shuffle_epi32( xmm9, 0x1B );
-        xmm15 = _mm_shuffle_epi32( xmm11, 0x1B );
-
-        xmm6 = _mm_add_epi32( xmm6, xmm12 );
-        xmm8 = _mm_add_epi32( xmm8, xmm13 );
-        xmm9 = _mm_add_epi32( xmm9, xmm14 );
-        xmm11 = _mm_add_epi32( xmm11, xmm15 );
-
-        xmm6 = _mm_blend_epi16( xmm6, xmm8, 0xF0 );
-        xmm9 = _mm_blend_epi16( xmm9, xmm11, 0xF0 );
-
-        xmm12 = _mm_hadd_epi32( xmm6, xmm9 );
-
-        xmm12 = _mm_add_epi32( xmm12, mmOffset );
-        xmm12 = _mm_srai_epi32( xmm12, numBitsMinus1 );
-
-        xmm12 = _mm_min_epi32( mmMax, _mm_max_epi32( xmm12, mmMin ) );
-
-        xmm12 = _mm_packus_epi32( xmm12, xmm12 );
-
-        _mm_storel_epi64( ( __m128i* )( pRec ), xmm12 );
-
-        pRec += dstStride;
-
-        pImg0 += srcStride;
-        pImg1 += srcStride;
-        pImg2 += srcStride;
-        pImg3 += srcStride;
-        pImg4 += srcStride;
-        pImg5 += srcStride;
-
-      } //<-- end of k-loop
-
-      pRec -= ( 4 * dstStride );
+#else
+        accumA = _mm_srai_epi32(accumA, SHIFT);
+        accumB = _mm_srai_epi32(accumB, SHIFT);
+#endif
+        accumA = _mm_packs_epi32(accumA, accumB);
+        accumA = _mm_add_epi16(accumA, cur);
+        accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
 
-      // restore 2x2 PCM chroma blocks
-      if( bChroma && isPCMFilterDisabled )
-      {
-        int  blkX, blkY;
-        bool *flags  = pcmFlags2x2;
-        Pel  *pcmRec = pcmRec2x2;
-        for( blkY=0; blkY<4; blkY+=2 )
+        if (j + STEP_X <= width)
+        {
+          _mm_storeu_si128((__m128i *) (dst + ii * dstStride + j), accumA);
+        }
+        else
         {
-          for( blkX=0; blkX<4; blkX+=2 )
-          {
-            if( *flags++ )
-            {
-              pRec[(blkY+0)*dstStride + (blkX+0)] = *pcmRec++;
-              pRec[(blkY+0)*dstStride + (blkX+1)] = *pcmRec++;
-              pRec[(blkY+1)*dstStride + (blkX+0)] = *pcmRec++;
-              pRec[(blkY+1)*dstStride + (blkX+1)] = *pcmRec++;
-            }
-          }
+          _mm_storel_epi64((__m128i *) (dst + ii * dstStride + j), accumA);
         }
       }
 
-      pRec += 4;
     }
 
-    pRec += 4 * dstStride;
-
-    pImgYPad0 += srcStride2;
-    pImgYPad1 += srcStride2;
-    pImgYPad2 += srcStride2;
-    pImgYPad3 += srcStride2;
-    pImgYPad4 += srcStride2;
-    pImgYPad5 += srcStride2;
+    src += srcStride * STEP_Y;
+    dst += dstStride * STEP_Y;
   }
 }
 
-template<X86_VEXT vext>
-static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
+constexpr uint16_t sh(int x)
 {
-  static const unsigned char mask0[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 7, 4, 5, 2, 3 };
-  static const unsigned char mask00[16] = { 2, 3, 0, 1, 0, 0, 0, 0, 8, 9, 0, 0, 0, 0, 0, 1 };
-  static const unsigned char mask02[16] = { 0, 0, 0, 0, 2, 3, 10, 11, 0, 0, 10, 11, 2, 3, 0, 0 };
-  static const unsigned char mask20[16] = { 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0 };
-  static const unsigned char mask22[16] = { 14, 15, 0, 0, 6, 7, 4, 5, 12, 13, 0, 0, 8, 9, 0, 1 };
-  static const unsigned char mask35[16] = { 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7 };
-
-  const bool bChroma = isChroma( compId );
+  return 0x0202 * (x & 7) + 0x0100 + 0x1010 * (x & 8);
+}
 
-  if( bChroma )
+static const uint16_t shuffleTab[4][2][8] = {
   {
-    CHECK( 0, "Chroma doesn't support 7x7" );
-  }
-  const SPS*     sps = cs.slice->getSPS();
-  bool isDualTree = CS::isDualITree(cs);
-  bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag();
-  ChromaFormat nChromaFormat = sps->getChromaFormatIdc();
-  const CPelBuf srcLuma = recSrc.get( compId );
-  PelBuf dstLuma = recDst.get( compId );
-
-  const int srcStride = srcLuma.stride;
-  const int dstStride = dstLuma.stride;
-
-  const Pel* srcExt = srcLuma.buf;
-  Pel* dst = dstLuma.buf;
-
-  const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5, *pImgYPad6;
-
-  short *coef = filterSet;
-  const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4;
-  const Pel *pImg5, *pImg6;
-
-  const int numBitsMinus1 = AdaptiveLoopFilter::m_NUM_BITS - 1;
-  const int offset = ( 1 << ( AdaptiveLoopFilter::m_NUM_BITS - 2 ) );
-
-  const int startHeight = blk.y;
-  const int endHeight = blk.y + blk.height;
-  const int startWidth = blk.x;
-  const int endWidth = blk.x + blk.width;
-
-  Pel* imgYRecPost = dst;
-  imgYRecPost += startHeight * dstStride;
+    { sh(0), sh(1), sh(2), sh(3), sh(4), sh(5), sh(6), sh(7) },
+    { sh(8), sh(9), sh(10), sh(11), sh(12), sh(13), sh(14), sh(15) },
+  },
+  {
+    { sh(9), sh(4), sh(10), sh(8), sh(1), sh(5), sh(11), sh(7) },
+    { sh(3), sh(0), sh(2), sh(6), sh(12), sh(13), sh(14), sh(15) },
+  },
+  {
+    { sh(0), sh(3), sh(2), sh(1), sh(8), sh(7), sh(6), sh(5) },
+    { sh(4), sh(9), sh(10), sh(11), sh(12), sh(13), sh(14), sh(15) },
+  },
+  {
+    { sh(9), sh(8), sh(10), sh(4), sh(3), sh(7), sh(11), sh(5) },
+    { sh(1), sh(0), sh(2), sh(6), sh(12), sh(13), sh(14), sh(15) },
+  },
+};
 
-  int transposeIdx = 0;
+template<X86_VEXT vext>
+static void simdFilter7x7Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+  const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+  const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+  int vbPos)
+{
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+  CHECK(isChroma(compId), "7x7 ALF filter is meant for luma only");
 
-  const int clsSizeY = 4;
-  const int clsSizeX = 4;
 
-  bool pcmFlags2x2[4] = {0,0,0,0};
-  Pel  pcmRec2x2[16];
+  const CPelBuf srcBuffer = recSrc.get(compId);
+  PelBuf        dstBuffer = recDst.get(compId);
 
-  CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" );
-  CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" );
-  CHECK( ( endHeight - startHeight ) % clsSizeY, "Wrong endHeight in filtering" );
-  CHECK( ( endWidth - startWidth ) % clsSizeX, "Wrong endWidth in filtering" );
+  const size_t srcStride = srcBuffer.stride;
+  const size_t dstStride = dstBuffer.stride;
 
-  const Pel* imgYRec = srcExt;
+  constexpr int SHIFT = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int ROUND = 1 << (SHIFT - 1);
 
-  Pel *pRec;
-  AlfClassifier *pClass = nullptr;
+  const size_t width  = blk.width;
+  const size_t height = blk.height;
 
-  int dstStride2 = dstStride * clsSizeY;
-  int srcStride2 = srcStride * clsSizeY;
+  constexpr size_t STEP_X = 8;
+  constexpr size_t STEP_Y = 4;
 
-  const __m128i mmOffset = _mm_set1_epi32( offset );
-  const __m128i mmMin = _mm_set1_epi32( clpRng.min );
-  const __m128i mmMax = _mm_set1_epi32( clpRng.max );
+  CHECK(blk.y % STEP_Y, "Wrong startHeight in filtering");
+  CHECK(blk.x % STEP_X, "Wrong startWidth in filtering");
+  CHECK(height % STEP_Y, "Wrong endHeight in filtering");
+  CHECK(width % STEP_X, "Wrong endWidth in filtering");
 
-  const __m128i xmm10 = _mm_loadu_si128( ( __m128i* )mask35 );
+  const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
 
-  pImgYPad0 = imgYRec + startHeight * srcStride + startWidth;
-  pImgYPad1 = pImgYPad0 + srcStride;
-  pImgYPad2 = pImgYPad0 - srcStride;
-  pImgYPad3 = pImgYPad1 + srcStride;
-  pImgYPad4 = pImgYPad2 - srcStride;
-  pImgYPad5 = pImgYPad3 + srcStride;
-  pImgYPad6 = pImgYPad4 - srcStride;
+  const __m128i mmOffset = _mm_set1_epi32(ROUND);
+  const __m128i mmMin = _mm_set1_epi16( clpRng.min );
+  const __m128i mmMax = _mm_set1_epi16( clpRng.max );
 
-  pRec = imgYRecPost + startWidth;
 
-  for( int i = 0; i < endHeight - startHeight; i += 4 )
+  for (size_t i = 0; i < height; i += STEP_Y)
   {
-    pRec = imgYRecPost + startWidth + i * dstStride;
+    const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
 
-    if( !bChroma )
+    for (size_t j = 0; j < width; j += STEP_X)
     {
-      pClass = classifier[startHeight + i] + startWidth;
-    }
+      __m128i params[2][2][6];
 
-    for( int j = 0; j < endWidth - startWidth; j += 4 )
-    {
-      if( !bChroma )
+      for (int k = 0; k < 2; ++k)
       {
-        AlfClassifier& cl = pClass[j];
-        transposeIdx = cl.transposeIdx;
-        if ( isPCMFilterDisabled && cl.classIdx == AdaptiveLoopFilter::m_ALF_UNUSED_CLASSIDX && transposeIdx == AdaptiveLoopFilter::m_ALF_UNUSED_TRANSPOSIDX )
-        {
-          pRec += 4;
-          continue;
-        }
-        coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+        const AlfClassifier &cl = pClass[j + 4 * k];
+
+        const int transposeIdx = cl.transposeIdx;
+        const int classIdx     = cl.classIdx;
+
+        static_assert(sizeof(*filterSet) == 2, "ALF coeffs must be 16-bit wide");
+        static_assert(sizeof(*fClipSet) == 2, "ALF clip values must be 16-bit wide");
+
+        __m128i rawCoeff0, rawCoeff1;
+        __m128i rawClip0, rawClip1;
+
+          rawCoeff0 = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+          rawCoeff1 = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+
+          rawClip0 = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+          rawClip1 = _mm_loadl_epi64((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+
+        const __m128i s0 = _mm_loadu_si128((const __m128i *) shuffleTab[transposeIdx][0]);
+        const __m128i s1 = _mm_xor_si128(s0, _mm_set1_epi8((char) 0x80));
+        const __m128i s2 = _mm_loadu_si128((const __m128i *) shuffleTab[transposeIdx][1]);
+        const __m128i s3 = _mm_xor_si128(s2, _mm_set1_epi8((char) 0x80));
+
+        const __m128i rawCoeffLo = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s0), _mm_shuffle_epi8(rawCoeff1, s1));
+        const __m128i rawCoeffHi = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s2), _mm_shuffle_epi8(rawCoeff1, s3));
+        const __m128i rawClipLo  = _mm_or_si128(_mm_shuffle_epi8(rawClip0, s0), _mm_shuffle_epi8(rawClip1, s1));
+        const __m128i rawClipHi  = _mm_or_si128(_mm_shuffle_epi8(rawClip0, s2), _mm_shuffle_epi8(rawClip1, s3));
+
+        params[k][0][0] = _mm_shuffle_epi32(rawCoeffLo, 0x00);
+        params[k][0][1] = _mm_shuffle_epi32(rawCoeffLo, 0x55);
+        params[k][0][2] = _mm_shuffle_epi32(rawCoeffLo, 0xaa);
+        params[k][0][3] = _mm_shuffle_epi32(rawCoeffLo, 0xff);
+        params[k][0][4] = _mm_shuffle_epi32(rawCoeffHi, 0x00);
+        params[k][0][5] = _mm_shuffle_epi32(rawCoeffHi, 0x55);
+        params[k][1][0] = _mm_shuffle_epi32(rawClipLo, 0x00);
+        params[k][1][1] = _mm_shuffle_epi32(rawClipLo, 0x55);
+        params[k][1][2] = _mm_shuffle_epi32(rawClipLo, 0xaa);
+        params[k][1][3] = _mm_shuffle_epi32(rawClipLo, 0xff);
+        params[k][1][4] = _mm_shuffle_epi32(rawClipHi, 0x00);
+        params[k][1][5] = _mm_shuffle_epi32(rawClipHi, 0x55);
       }
-      else if ( isPCMFilterDisabled )
-      {
-        int  blkX, blkY;
-        bool *flags  = pcmFlags2x2;
-        Pel  *pcmRec = pcmRec2x2;
 
-        // check which chroma 2x2 blocks use PCM
-        // chroma PCM may not be aligned with 4x4 ALF processing grid
-        for( blkY=0; blkY<4; blkY+=2 )
+      for (size_t ii = 0; ii < STEP_Y; ii++)
+      {
+        const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
+
+        pImg0 = src + j + ii * srcStride;
+        pImg1 = pImg0 + srcStride;
+        pImg2 = pImg0 - srcStride;
+        pImg3 = pImg1 + srcStride;
+        pImg4 = pImg2 - srcStride;
+        pImg5 = pImg3 + srcStride;
+        pImg6 = pImg4 - srcStride;
+
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - 4))   // above
         {
-          for( blkX=0; blkX<4; blkX+=2 )
-          {
-            Position pos(j+startWidth+blkX, i+startHeight+blkY);
-            CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L);
-            *flags++ = cu->ipcm ? 1 : 0;
-
-            // save original samples from 2x2 PCM blocks
-            if( cu->ipcm )
-            {
-              *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+0)];
-              *pcmRec++ = pRec[(blkY+0)*dstStride + (blkX+1)];
-              *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+0)];
-              *pcmRec++ = pRec[(blkY+1)*dstStride + (blkX+1)];
-            }
-          }
-        }
+          pImg1 = (yVb == vbPos - 1) ? pImg0 : pImg1;
+          pImg3 = (yVb >= vbPos - 2) ? pImg1 : pImg3;
+          pImg5 = (yVb >= vbPos - 3) ? pImg3 : pImg5;
 
-        // skip entire 4x4 if all chroma 2x2 blocks use PCM
-        if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] )
+          pImg2 = (yVb == vbPos - 1) ? pImg0 : pImg2;
+          pImg4 = (yVb >= vbPos - 2) ? pImg2 : pImg4;
+          pImg6 = (yVb >= vbPos - 3) ? pImg4 : pImg6;
+        }
+        else if (yVb >= vbPos && (yVb <= vbPos + 3))   // bottom
         {
-          pRec += 4;
-          continue;
+          pImg2 = (yVb == vbPos) ? pImg0 : pImg2;
+          pImg4 = (yVb <= vbPos + 1) ? pImg2 : pImg4;
+          pImg6 = (yVb <= vbPos + 2) ? pImg4 : pImg6;
+
+          pImg1 = (yVb == vbPos) ? pImg0 : pImg1;
+          pImg3 = (yVb <= vbPos + 1) ? pImg1 : pImg3;
+          pImg5 = (yVb <= vbPos + 2) ? pImg3 : pImg5;
         }
-      }
+        __m128i cur = _mm_loadu_si128((const __m128i *) pImg0);
 
-      __m128i c0, c2, t1, t2;
+        __m128i accumA = mmOffset;
+        __m128i accumB = mmOffset;
 
-      t1 = _mm_loadu_si128( ( __m128i* )( coef + 0 ) );
-      t2 = _mm_loadu_si128( ( __m128i* )( coef + 1 ) );
-      c2 = _mm_loadu_si128( ( __m128i* )( coef + 4 - 3 ) );
-      c0 = _mm_loadu_si128( ( __m128i* )( coef + 9 - 1 ) );
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) {
+          const __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr0), cur);
+          const __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr2), cur);
+          const __m128i val01 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr1), cur);
+          const __m128i val11 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr3), cur);
 
-      c0 = _mm_blend_epi16( c0, t1, 0x01 );
-      c2 = _mm_blend_epi16( c2, t2, 0x07 );
+          __m128i val01A = _mm_unpacklo_epi16(val00, val10);
+          __m128i val01B = _mm_unpackhi_epi16(val00, val10);
+          __m128i val01C = _mm_unpacklo_epi16(val01, val11);
+          __m128i val01D = _mm_unpackhi_epi16(val01, val11);
 
-      if( transposeIdx & 1 )
-      {
-        t1 = _mm_loadu_si128( ( __m128i* )mask00 );
-        t2 = _mm_loadu_si128( ( __m128i* )mask02 );
-        __m128i t3 = _mm_loadu_si128( ( __m128i* )mask20 );
-        __m128i t4 = _mm_loadu_si128( ( __m128i* )mask22 );
-
-        t1 = _mm_shuffle_epi8( c0, t1 );
-        t2 = _mm_shuffle_epi8( c2, t2 );
-        t3 = _mm_shuffle_epi8( c0, t3 );
-        t4 = _mm_shuffle_epi8( c2, t4 );
-
-        c0 = _mm_blend_epi16( t1, t2, 0x6C );
-        c2 = _mm_blend_epi16( t4, t3, 0x22 );
-      }
-      else
-      {
-        c0 = _mm_shuffle_epi8( c0, _mm_loadu_si128( ( __m128i* )mask0 ) );
-      }
+          __m128i limit01A = params[0][1][i];
+          __m128i limit01B = params[1][1][i];
 
-      if( transposeIdx == 0 || transposeIdx == 3 )
-      {
-        c2 = _mm_shuffle_epi8( c2, xmm10 );
-      }
+          val01A = _mm_min_epi16(val01A, limit01A);
+          val01B = _mm_min_epi16(val01B, limit01B);
+          val01C = _mm_min_epi16(val01C, limit01A);
+          val01D = _mm_min_epi16(val01D, limit01B);
 
-      pImg0 = pImgYPad0 + j;
-      pImg1 = pImgYPad1 + j;
-      pImg2 = pImgYPad2 + j;
-      pImg3 = pImgYPad3 + j;
-      pImg4 = pImgYPad4 + j;
-      pImg5 = pImgYPad5 + j;
-      pImg6 = pImgYPad6 + j;
+          limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
+          limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
 
-      for( int k = 0; k < 4; k++ )
-      {
-        __m128i xmm6 = _mm_lddqu_si128( ( __m128i* ) pImg6 );
-        __m128i xmm4 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 - 1 ) );
-        __m128i xmm2 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 2 ) );
-        __m128i xmm0 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 3 ) );
-        __m128i xmm11 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 5 ) );
-        __m128i xmm1 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 2 - 1 ) );
-        __m128i xmm8 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 5 ) );
-        __m128i xmm3 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 - 2 ) );
-        __m128i xmm5 = _mm_lddqu_si128( ( __m128i* ) ( pImg5 - 1 ) );
-
-        xmm6 = _mm_add_epi16( xmm6, _mm_srli_si128( xmm5, 2 ) );
-
-        __m128i xmm12 = _mm_blend_epi16( _mm_slli_si128( xmm0, 2 ), xmm6, 0x01 );
-        __m128i xmm13 = _mm_blend_epi16( xmm0, _mm_srli_si128( xmm6, 2 ), 0x01 );
-
-        __m128i xmm14 = _mm_blend_epi16( _mm_slli_si128( xmm2, 6 ), xmm4, 0x07 );
-        __m128i xmm16 = _mm_blend_epi16( _mm_slli_si128( xmm1, 4 ), _mm_srli_si128( xmm3, 2 ), 0x07 );
-        xmm14 = _mm_shuffle_epi8( xmm14, xmm10 );
-        xmm14 = _mm_add_epi16( xmm14, xmm16 );
-        __m128i xmm15 = _mm_blend_epi16( _mm_slli_si128( xmm2, 4 ), _mm_srli_si128( xmm4, 2 ), 0x07 );
-        __m128i xmm17 = _mm_blend_epi16( _mm_slli_si128( xmm1, 2 ), _mm_srli_si128( xmm3, 4 ), 0x07 );
-        xmm15 = _mm_shuffle_epi8( xmm15, xmm10 );
-        xmm15 = _mm_add_epi16( xmm15, xmm17 );
-
-        xmm12 = _mm_madd_epi16( xmm12, c0 );
-        xmm13 = _mm_madd_epi16( xmm13, c0 );
-        xmm14 = _mm_madd_epi16( xmm14, c2 );
-        xmm15 = _mm_madd_epi16( xmm15, c2 );
-
-        xmm12 = _mm_add_epi32( xmm12, xmm14 );
-        xmm13 = _mm_add_epi32( xmm13, xmm15 );
-        xmm14 = _mm_shuffle_epi32( xmm12, 0x1B );
-        xmm15 = _mm_shuffle_epi32( xmm13, 0x1B );
-        xmm12 = _mm_add_epi32( xmm12, xmm14 );
-        xmm13 = _mm_add_epi32( xmm13, xmm15 );
-
-        __m128i xmm7 = _mm_blend_epi16( xmm12, xmm13, 0xF0 );
-
-        xmm12 = _mm_blend_epi16( _mm_alignr_epi8( xmm11, xmm0, 2 ), _mm_srli_si128( xmm6, 4 ), 0x01 );
-        xmm13 = _mm_blend_epi16( _mm_alignr_epi8( xmm11, xmm0, 4 ), _mm_srli_si128( xmm6, 6 ), 0x01 );
-
-        xmm14 = _mm_blend_epi16( _mm_slli_si128( xmm2, 2 ), _mm_srli_si128( xmm4, 4 ), 0x07 );
-        xmm16 = _mm_blend_epi16( xmm1, _mm_srli_si128( xmm3, 6 ), 0x07 );
-        xmm14 = _mm_shuffle_epi8( xmm14, xmm10 );
-        xmm14 = _mm_add_epi16( xmm14, xmm16 );
-        xmm15 = _mm_blend_epi16( xmm2, _mm_srli_si128( xmm4, 6 ), 0x07 );
-        xmm8 = _mm_alignr_epi8( xmm8, xmm1, 2 );
-        xmm17 = _mm_blend_epi16( xmm8, _mm_srli_si128( xmm3, 8 ), 0x07 );
-        xmm15 = _mm_shuffle_epi8( xmm15, xmm10 );
-        xmm15 = _mm_add_epi16( xmm15, xmm17 );
-
-        xmm12 = _mm_madd_epi16( xmm12, c0 );
-        xmm13 = _mm_madd_epi16( xmm13, c0 );
-        xmm14 = _mm_madd_epi16( xmm14, c2 );
-        xmm15 = _mm_madd_epi16( xmm15, c2 );
-
-        xmm12 = _mm_add_epi32( xmm12, xmm14 );
-        xmm13 = _mm_add_epi32( xmm13, xmm15 );
-        xmm14 = _mm_shuffle_epi32( xmm12, 0x1B );
-        xmm15 = _mm_shuffle_epi32( xmm13, 0x1B );
-        xmm12 = _mm_add_epi32( xmm12, xmm14 );
-        xmm13 = _mm_add_epi32( xmm13, xmm15 );
-
-        __m128i xmm9 = _mm_blend_epi16( xmm12, xmm13, 0xF0 );
-
-        xmm12 = _mm_hadd_epi32( xmm7, xmm9 );
-
-        xmm12 = _mm_add_epi32( xmm12, mmOffset );
-        xmm12 = _mm_srai_epi32( xmm12, numBitsMinus1 );
-
-        xmm12 = _mm_min_epi32( mmMax, _mm_max_epi32( xmm12, mmMin ) );
-
-        xmm12 = _mm_packus_epi32( xmm12, xmm12 );
-
-        _mm_storel_epi64( ( __m128i* )( pRec ), xmm12 );
-
-        pRec += dstStride;
-
-        pImg0 += srcStride;
-        pImg1 += srcStride;
-        pImg2 += srcStride;
-        pImg3 += srcStride;
-        pImg4 += srcStride;
-        pImg5 += srcStride;
-        pImg6 += srcStride;
-      }
+          val01A = _mm_max_epi16(val01A, limit01A);
+          val01B = _mm_max_epi16(val01B, limit01B);
+          val01C = _mm_max_epi16(val01C, limit01A);
+          val01D = _mm_max_epi16(val01D, limit01B);
 
-      pRec -= ( 4 * dstStride );
+          val01A = _mm_add_epi16(val01A, val01C);
+          val01B = _mm_add_epi16(val01B, val01D);
 
-      // restore 2x2 PCM chroma blocks
-      if( bChroma && isPCMFilterDisabled )
-      {
-        int  blkX, blkY;
-        bool *flags  = pcmFlags2x2;
-        Pel  *pcmRec = pcmRec2x2;
-        for( blkY=0; blkY<4; blkY+=2 )
+          const __m128i coeff01A = params[0][0][i];
+          const __m128i coeff01B = params[1][0][i];
+
+          accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
+          accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+        };
+
+
+        process2coeffs(0, pImg5 + 0, pImg6 + 0, pImg3 + 1, pImg4 - 1);
+        process2coeffs(1, pImg3 + 0, pImg4 + 0, pImg3 - 1, pImg4 + 1);
+        process2coeffs(2, pImg1 + 2, pImg2 - 2, pImg1 + 1, pImg2 - 1);
+        process2coeffs(3, pImg1 + 0, pImg2 + 0, pImg1 - 1, pImg2 + 1);
+        process2coeffs(4, pImg1 - 2, pImg2 + 2, pImg0 + 3, pImg0 - 3);
+        process2coeffs(5, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+
+
+#if JVET_Q0150
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+        if (!(isNearVBabove || isNearVBbelow))
         {
-          for( blkX=0; blkX<4; blkX+=2 )
-          {
-            if( *flags++ )
-            {
-              pRec[(blkY+0)*dstStride + (blkX+0)] = *pcmRec++;
-              pRec[(blkY+0)*dstStride + (blkX+1)] = *pcmRec++;
-              pRec[(blkY+1)*dstStride + (blkX+0)] = *pcmRec++;
-              pRec[(blkY+1)*dstStride + (blkX+1)] = *pcmRec++;
-            }
-          }
+          accumA = _mm_srai_epi32(accumA, SHIFT);
+          accumB = _mm_srai_epi32(accumB, SHIFT);
         }
-      }
+        else
+        {
+          accumA = _mm_srai_epi32(accumA, SHIFT + 3);
+          accumB = _mm_srai_epi32(accumB, SHIFT + 3);
+        }
+#else
+        accumA = _mm_srai_epi32(accumA, SHIFT);
+        accumB = _mm_srai_epi32(accumB, SHIFT);
+#endif
+        accumA = _mm_packs_epi32(accumA, accumB);
+        accumA = _mm_add_epi16(accumA, cur);
+        accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
 
-      pRec += 4;
+        _mm_storeu_si128((__m128i *) (dst + ii * dstStride + j), accumA);
+      }
     }
 
-    pRec += dstStride2;
-
-    pImgYPad0 += srcStride2;
-    pImgYPad1 += srcStride2;
-    pImgYPad2 += srcStride2;
-    pImgYPad3 += srcStride2;
-    pImgYPad4 += srcStride2;
-    pImgYPad5 += srcStride2;
-    pImgYPad6 += srcStride2;
+    src += srcStride * STEP_Y;
+    dst += dstStride * STEP_Y;
   }
 }
 
@@ -918,5 +690,4 @@ void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86()
 }
 
 template void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86<SIMDX86>();
-#endif //#ifdef TARGET_SIMD_X86
-//! \}
+#endif   // TARGET_SIMD_X86
diff --git a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h
index b49d703b9113dabc67a7c9d0637da9035a8cb3f7..bc8676e258c6090ce4577931d3aa1ac4e451c729 100644
--- a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h
+++ b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index 00399014c56ec3d9e68b860ab55618e549f06238..c763a2977ca6fb745ea905d289bed80fba69787b 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,7 @@
 #include "CommonDefX86.h"
 #include "CommonLib/Unit.h"
 #include "CommonLib/Buffer.h"
-
+#include "CommonLib/InterpolationFilter.h"
 
 #if ENABLE_SIMD_OPT_BUFFER
 #ifdef TARGET_SIMD_X86
@@ -53,44 +53,33 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s
 {
   if( W == 8 )
   {
-    // TODO: AVX2 impl
-    {
-      __m128i vzero    = _mm_setzero_si128();
-      __m128i voffset  = _mm_set1_epi32( offset );
-      __m128i vibdimin = _mm_set1_epi16( clpRng.min );
-      __m128i vibdimax = _mm_set1_epi16( clpRng.max );
+    CHECK(offset & 1, "offset must be even");
+    CHECK(offset < -32768 || offset > 32767, "offset must be a 16-bit value");
 
-      for( int row = 0; row < height; row++ )
-      {
-        for( int col = 0; col < width; col += 8 )
-        {
-          __m128i vsrc0 = _mm_loadu_si128( ( const __m128i * )&src0[col] );
-          __m128i vsrc1 = _mm_loadu_si128( ( const __m128i * )&src1[col] );
+    __m128i vibdimin = _mm_set1_epi16(clpRng.min);
+    __m128i vibdimax = _mm_set1_epi16(clpRng.max);
 
-          __m128i vtmp, vsum, vdst;
-          vsum = _mm_cvtepi16_epi32   ( vsrc0 );
-          vdst = _mm_cvtepi16_epi32   ( vsrc1 );
-          vsum = _mm_add_epi32        ( vsum, vdst );
-          vsum = _mm_add_epi32        ( vsum, voffset );
-          vtmp = _mm_srai_epi32       ( vsum, shift );
-
-          vsrc0 = _mm_unpackhi_epi64  ( vsrc0, vzero );
-          vsrc1 = _mm_unpackhi_epi64  ( vsrc1, vzero );
-          vsum = _mm_cvtepi16_epi32   ( vsrc0 );
-          vdst = _mm_cvtepi16_epi32   ( vsrc1 );
-          vsum = _mm_add_epi32        ( vsum, vdst );
-          vsum = _mm_add_epi32        ( vsum, voffset );
-          vsum = _mm_srai_epi32       ( vsum, shift );
-          vsum = _mm_packs_epi32      ( vtmp, vsum );
-
-          vsum = _mm_min_epi16( vibdimax, _mm_max_epi16( vibdimin, vsum ) );
-          _mm_storeu_si128( ( __m128i * )&dst[col], vsum );
-        }
-
-        src0 += src0Stride;
-        src1 += src1Stride;
-        dst  +=  dstStride;
+    for (int row = 0; row < height; row++)
+    {
+      for (int col = 0; col < width; col += 8)
+      {
+        __m128i vsrc0 = _mm_loadu_si128((const __m128i *) &src0[col]);
+        __m128i vsrc1 = _mm_loadu_si128((const __m128i *) &src1[col]);
+
+        vsrc0 = _mm_xor_si128(vsrc0, _mm_set1_epi16(0x7fff));
+        vsrc1 = _mm_xor_si128(vsrc1, _mm_set1_epi16(0x7fff));
+        vsrc0 = _mm_avg_epu16(vsrc0, vsrc1);
+        vsrc0 = _mm_xor_si128(vsrc0, _mm_set1_epi16(0x7fff));
+        vsrc0 = _mm_adds_epi16(vsrc0, _mm_set1_epi16(offset >> 1));
+        vsrc0 = _mm_sra_epi16(vsrc0, _mm_cvtsi32_si128(shift - 1));
+        vsrc0 = _mm_max_epi16(vsrc0, vibdimin);
+        vsrc0 = _mm_min_epi16(vsrc0, vibdimax);
+        _mm_storeu_si128((__m128i *) &dst[col], vsrc0);
       }
+
+      src0 += src0Stride;
+      src1 += src1Stride;
+      dst += dstStride;
     }
   }
   else if( W == 4 )
@@ -131,126 +120,118 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s
 template<X86_VEXT vext>
 void copyBufferSimd(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height)
 {
-  __m128i x;
-#ifdef USE_AVX2
-  __m256i x16;
-#endif
-  int j, temp;
-  for (int i = 0; i < height; i++)
+  if (width < 8)
   {
-    j = 0;
-    temp = width;
-#ifdef USE_AVX2
-    while ((temp >> 4) > 0)
-    {
-      x16 = _mm256_loadu_si256((const __m256i*)(&src[i * srcStride + j]));
-      _mm256_storeu_si256((__m256i*)(&dst[i * dstStride + j]), x16);
-      j += 16;
-      temp -= 16;
-    }
-#endif
-    while ((temp >> 3) > 0)
-    {
-      x = _mm_loadu_si128((const __m128i*)(&src[ i * srcStride + j]));
-      _mm_storeu_si128((__m128i*)(&dst[ i * dstStride + j]), x);
-      j += 8;
-      temp -= 8;
-    }
-    while ((temp >> 2) > 0)
+    CHECK(width < 4, "width must be at least 4");
+
+    for (size_t x = 0; x < width; x += 4)
     {
-      x = _mm_loadl_epi64((const __m128i*)(&src[i * srcStride + j]));
-      _mm_storel_epi64((__m128i*)(&dst[i*dstStride + j]), x);
-      j += 4;
-      temp -= 4;
+      if (x > width - 4)
+        x = width - 4;
+      for (size_t y = 0; y < height; y++)
+      {
+        __m128i val = _mm_loadl_epi64((const __m128i *) (src + y * srcStride + x));
+        _mm_storel_epi64((__m128i *) (dst + y * dstStride + x), val);
+      }
     }
-    while (temp > 0)
+  }
+  else
+  {
+    for (size_t x = 0; x < width; x += 8)
     {
-      dst[i * dstStride + j] = src[i * srcStride + j];
-      j++;
-      temp--;
+      if (x > width - 8)
+        x = width - 8;
+      for (size_t y = 0; y < height; y++)
+      {
+        __m128i val = _mm_loadu_si128((const __m128i *) (src + y * srcStride + x));
+        _mm_storeu_si128((__m128i *) (dst + y * dstStride + x), val);
+      }
     }
   }
 }
 
-
 template<X86_VEXT vext>
 void paddingSimd(Pel *dst, int stride, int width, int height, int padSize)
 {
-  __m128i x;
-#ifdef USE_AVX2
-  __m256i x16;
-#endif
-  int temp, j;
-  for (int i = 1; i <= padSize; i++)
+  size_t extWidth = width + 2 * padSize;
+  CHECK(extWidth < 8, "width plus 2 times padding size must be at least 8");
+
+  if (padSize == 1)
   {
-    j = 0;
-    temp = width;
-#ifdef USE_AVX2
-    while ((temp >> 4) > 0)
+    for (size_t i = 0; i < height; i++)
     {
+      Pel left                = dst[i * stride];
+      Pel right               = dst[i * stride + width - 1];
+      dst[i * stride - 1]     = left;
+      dst[i * stride + width] = right;
+    }
 
-      x16 = _mm256_loadu_si256((const __m256i*)(&(dst[j])));
-      _mm256_storeu_si256((__m256i*)(dst + j - i*stride), x16);
-      x16 = _mm256_loadu_si256((const __m256i*)(dst + j + (height - 1)*stride));
-      _mm256_storeu_si256((__m256i*)(dst + j + (height - 1 + i)*stride), x16);
-
+    dst -= 1;
 
-      j = j + 16;
-      temp = temp - 16;
-    }
-#endif
-    while ((temp >> 3) > 0)
+    for (size_t i = 0; i < extWidth - 8; i++)
     {
+      __m128i top = _mm_loadu_si128((const __m128i *) (dst + i));
+      _mm_storeu_si128((__m128i *) (dst - stride + i), top);
+    }
+    __m128i top = _mm_loadu_si128((const __m128i *) (dst + extWidth - 8));
+    _mm_storeu_si128((__m128i *) (dst - stride + extWidth - 8), top);
 
-      x = _mm_loadu_si128((const __m128i*)(&(dst[j])));
-      _mm_storeu_si128((__m128i*)(dst + j - i*stride), x);
-      x = _mm_loadu_si128((const __m128i*)(dst + j + (height - 1)*stride));
-      _mm_storeu_si128((__m128i*)(dst + j + (height - 1 + i)*stride), x);
+    dst += height * stride;
 
-      j = j + 8;
-      temp = temp - 8;
-    }
-    while ((temp >> 2) > 0)
+    for (size_t i = 0; i < extWidth - 8; i++)
     {
-      x = _mm_loadl_epi64((const __m128i*)(&dst[j]));
-      _mm_storel_epi64((__m128i*)(dst + j - i*stride), x);
-      x = _mm_loadl_epi64((const __m128i*)(dst + j + (height - 1)*stride));
-      _mm_storel_epi64((__m128i*)(dst + j + (height - 1 + i)*stride), x);
-
-      j = j + 4;
-      temp = temp - 4;
+      __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + i));
+      _mm_storeu_si128((__m128i *) (dst + i), bottom);
     }
-    while (temp > 0)
+    __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + extWidth - 8));
+    _mm_storeu_si128((__m128i *) (dst + extWidth - 8), bottom);
+  }
+  else if (padSize == 2)
+  {
+    for (size_t i = 0; i < height; i++)
     {
-      dst[j - i*stride] = dst[j];
-      dst[j + (height - 1 + i)*stride] = dst[j + (height - 1)*stride];
-      j++;
-      temp--;
+      Pel left                    = dst[i * stride];
+      Pel right                   = dst[i * stride + width - 1];
+      dst[i * stride - 2]         = left;
+      dst[i * stride - 1]         = left;
+      dst[i * stride + width]     = right;
+      dst[i * stride + width + 1] = right;
     }
-  }
 
+    dst -= 2;
 
-  //Left and Right Padding
-  Pel* ptr1 = dst - padSize*stride;
-  Pel* ptr2 = dst - padSize*stride + width - 1;
-  int offset = 0;
-  for (int i = 0; i < height + 2 * padSize; i++)
-  {
-    offset = stride * i;
-    for (int j = 1; j <= padSize; j++)
+    for (size_t i = 0; i < extWidth - 8; i++)
     {
-      *(ptr1 - j + offset) = *(ptr1 + offset);
-      *(ptr2 + j + offset) = *(ptr2 + offset);
+      __m128i top = _mm_loadu_si128((const __m128i *) (dst + i));
+      _mm_storeu_si128((__m128i *) (dst - 2 * stride + i), top);
+      _mm_storeu_si128((__m128i *) (dst - stride + i), top);
     }
+    __m128i top = _mm_loadu_si128((const __m128i *) (dst + extWidth - 8));
+    _mm_storeu_si128((__m128i *) (dst - 2 * stride + extWidth - 8), top);
+    _mm_storeu_si128((__m128i *) (dst - stride + extWidth - 8), top);
+
+    dst += height * stride;
 
+    for (size_t i = 0; i < extWidth - 8; i++)
+    {
+      __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + i));
+      _mm_storeu_si128((__m128i *) (dst + i), bottom);
+      _mm_storeu_si128((__m128i *) (dst + stride + i), bottom);
+    }
+    __m128i bottom = _mm_loadu_si128((const __m128i *) (dst - stride + extWidth - 8));
+    _mm_storeu_si128((__m128i *) (dst + extWidth - 8), bottom);
+    _mm_storeu_si128((__m128i *) (dst + stride + extWidth - 8), bottom);
+  }
+  else
+  {
+    CHECK(false, "padding size must be 1 or 2");
   }
 }
+
 template< X86_VEXT vext >
 void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
 {
-  __m128i mm_tmpx = _mm_unpacklo_epi64(_mm_set1_epi16(tmpx), _mm_set1_epi16(tmpy));
-  __m128i mm_boffset = _mm_set1_epi32(1);
-  __m128i mm_offset = _mm_set1_epi32(offset);
+  __m128i c        = _mm_unpacklo_epi16(_mm_set1_epi16(tmpx), _mm_set1_epi16(tmpy));
   __m128i vibdimin = _mm_set1_epi16(clpRng.min);
   __m128i vibdimax = _mm_set1_epi16(clpRng.max);
 
@@ -258,20 +239,22 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St
   {
     for (int x = 0; x < width; x += 4)
     {
-      __m128i mm_a = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)(gradX0 + x)), _mm_loadl_epi64((const __m128i *)(gradY0 + x)));
-      __m128i mm_b = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)(gradX1 + x)), _mm_loadl_epi64((const __m128i *)(gradY1 + x)));
-      mm_a = _mm_sub_epi16(mm_a, mm_b);
-      mm_b = _mm_mulhi_epi16(mm_a, mm_tmpx);
-      mm_a = _mm_mullo_epi16(mm_a, mm_tmpx);
-
-      __m128i mm_sum = _mm_add_epi32(_mm_unpacklo_epi16(mm_a, mm_b), _mm_unpackhi_epi16(mm_a, mm_b));
-      mm_sum = _mm_srai_epi32(_mm_add_epi32(mm_sum, mm_boffset), 1);
-      mm_a = _mm_cvtepi16_epi32(_mm_loadl_epi64((const __m128i *)(src0 + x)));
-      mm_b = _mm_cvtepi16_epi32(_mm_loadl_epi64((const __m128i *)(src1 + x)));
-      mm_sum = _mm_add_epi32(_mm_add_epi32(mm_sum, mm_a), _mm_add_epi32(mm_b, mm_offset));
-      mm_sum = _mm_packs_epi32(_mm_srai_epi32(mm_sum, shift), mm_a);
-      mm_sum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_sum));
-      _mm_storel_epi64((__m128i *)(dst + x), mm_sum);
+      __m128i a   = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (gradX0 + x)),
+                                     _mm_loadl_epi64((const __m128i *) (gradY0 + x)));
+      __m128i b   = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (gradX1 + x)),
+                                     _mm_loadl_epi64((const __m128i *) (gradY1 + x)));
+      a           = _mm_sub_epi16(a, b);
+      __m128i sum = _mm_madd_epi16(a, c);
+
+      a   = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (src0 + x)),
+                             _mm_loadl_epi64((const __m128i *) (src1 + x)));
+      sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(1)));
+      sum = _mm_add_epi32(sum, _mm_set1_epi32(offset));
+      sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift));
+      sum = _mm_packs_epi32(sum, sum);
+      sum = _mm_max_epi16(sum, vibdimin);
+      sum = _mm_min_epi16(sum, vibdimax);
+      _mm_storel_epi64((__m128i *) (dst + x), sum);
     }
     dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
     gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
@@ -279,44 +262,343 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St
 }
 
 template< X86_VEXT vext >
+void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX)
+
+{
+  int shift4 = 4;
+  int shift5 = 1;
+
+  __m128i sumAbsGXTmp = _mm_setzero_si128();
+  __m128i sumDIXTmp = _mm_setzero_si128();
+  __m128i sumAbsGYTmp = _mm_setzero_si128();
+  __m128i sumDIYTmp = _mm_setzero_si128();
+  __m128i sumSignGyGxTmp = _mm_setzero_si128();
+
+  for (int y = 0; y < 6; y++)
+  {
+    // Note: loading 8 values also works, but valgrind doesn't like it
+    auto load6values = [](const Pel *ptr) {
+      __m128i a = _mm_loadl_epi64((const __m128i *) ptr);
+      __m128i b = _mm_cvtsi32_si128(*(uint32_t *) (ptr + 4));
+      return _mm_unpacklo_epi64(a, b);
+    };
+
+    __m128i shiftSrcY0Tmp = _mm_srai_epi16(load6values(srcY0Tmp), shift4);
+    __m128i shiftSrcY1Tmp = _mm_srai_epi16(load6values(srcY1Tmp), shift4);
+    __m128i loadGradX0    = load6values(gradX0);
+    __m128i loadGradX1    = load6values(gradX1);
+    __m128i loadGradY0    = load6values(gradY0);
+    __m128i loadGradY1    = load6values(gradY1);
+
+    __m128i subTemp1 = _mm_sub_epi16(shiftSrcY1Tmp, shiftSrcY0Tmp);
+    __m128i packTempX = _mm_srai_epi16(_mm_add_epi16(loadGradX0, loadGradX1), shift5);
+    __m128i packTempY = _mm_srai_epi16(_mm_add_epi16(loadGradY0, loadGradY1), shift5);
+    __m128i gX = _mm_abs_epi16(packTempX);
+    __m128i gY = _mm_abs_epi16(packTempY);
+    __m128i dIX       = _mm_sign_epi16(subTemp1,  packTempX );
+    __m128i dIY       = _mm_sign_epi16(subTemp1,  packTempY );
+    __m128i signGY_GX = _mm_sign_epi16(packTempX, packTempY );
+
+    sumAbsGXTmp = _mm_add_epi16(sumAbsGXTmp, gX);
+    sumDIXTmp = _mm_add_epi16(sumDIXTmp, dIX);
+    sumAbsGYTmp = _mm_add_epi16(sumAbsGYTmp, gY);
+    sumDIYTmp = _mm_add_epi16(sumDIYTmp, dIY);
+    sumSignGyGxTmp = _mm_add_epi16(sumSignGyGxTmp, signGY_GX);
+    srcY0Tmp += src0Stride;
+    srcY1Tmp += src1Stride;
+    gradX0 += widthG;
+    gradX1 += widthG;
+    gradY0 += widthG;
+    gradY1 += widthG;
+  }
+
+  sumAbsGXTmp    = _mm_madd_epi16(sumAbsGXTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0));
+  sumDIXTmp      = _mm_madd_epi16(sumDIXTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0));
+  sumAbsGYTmp    = _mm_madd_epi16(sumAbsGYTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0));
+  sumDIYTmp      = _mm_madd_epi16(sumDIYTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0));
+  sumSignGyGxTmp = _mm_madd_epi16(sumSignGyGxTmp, _mm_setr_epi16(1, 1, 1, 1, 1, 1, 0, 0));
+
+  __m128i a12 = _mm_unpacklo_epi32(sumAbsGXTmp, sumAbsGYTmp);
+  __m128i a3  = _mm_unpackhi_epi32(sumAbsGXTmp, sumAbsGYTmp);
+  __m128i b12 = _mm_unpacklo_epi32(sumDIXTmp, sumDIYTmp);
+  __m128i b3  = _mm_unpackhi_epi32(sumDIXTmp, sumDIYTmp);
+  __m128i c1  = _mm_unpacklo_epi64(a12, b12);
+  __m128i c2  = _mm_unpackhi_epi64(a12, b12);
+  __m128i c3  = _mm_unpacklo_epi64(a3, b3);
+
+  c1 = _mm_add_epi32(c1, c2);
+  c1 = _mm_add_epi32(c1, c3);
+
+  *sumAbsGX = _mm_cvtsi128_si32(c1);
+  *sumAbsGY = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0x55));
+  *sumDIX   = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0xaa));
+  *sumDIY   = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0xff));
+
+  sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0x4e));   // 01001110
+  sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0xb1));   // 10110001
+  *sumSignGY_GX  = _mm_cvtsi128_si32(sumSignGyGxTmp);
+}
+
+template< X86_VEXT vext >
+void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng)
+{
+  CHECKD((width & 3), "block width error!");
+
+  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
+
+#ifdef USE_AVX2
+  __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0, mm_src;
+  __m256i mm_offset = _mm256_set1_epi16(offset);
+  __m256i vibdimin = _mm256_set1_epi16(clpRng.min);
+  __m256i vibdimax = _mm256_set1_epi16(clpRng.max);
+  __m256i mm_dimin = _mm256_set1_epi32(-dILimit);
+  __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1);
+#else
+  __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0;
+  __m128i mm_offset = _mm_set1_epi16(offset);
+  __m128i vibdimin = _mm_set1_epi16(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi16(clpRng.max);
+  __m128i mm_dimin = _mm_set1_epi32(-dILimit);
+  __m128i mm_dimax = _mm_set1_epi32(dILimit - 1);
+#endif
+
+#if USE_AVX2
+  for (int h = 0; h < height; h += 4)
+#else
+  for (int h = 0; h < height; h += 2)
+#endif
+  {
+    const int* vX = dMvX;
+    const int* vY = dMvY;
+    const Pel* gX = gradX;
+    const Pel* gY = gradY;
+    const Pel* src = srcPel;
+    Pel*       dst = dstPel;
+
+    for (int w = 0; w < width; w += 4)
+    {
+#if USE_AVX2
+      const int *vX0 = vX, *vY0 = vY;
+      const Pel *gX0 = gX, *gY0 = gY;
+
+      // first two rows
+      mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1);
+      mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1);
+      mm_gradx = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1);
+      mm_grady = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1);
+      mm_dI0 = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
+      mm_dI0 = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI0));
+
+      // next two rows
+      vX0 += (dMvStride << 1); vY0 += (dMvStride << 1); gX0 += (gradStride << 1); gY0 += (gradStride << 1);
+      mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1);
+      mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1);
+      mm_gradx = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1);
+      mm_grady = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1);
+      mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
+      mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI));
+
+      // combine four rows
+      mm_dI = _mm256_packs_epi32(mm_dI0, mm_dI);
+      const Pel* src0 = src + srcStride;
+      mm_src = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + (srcStride << 1))))),
+        _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src0), _mm_loadl_epi64((const __m128i *)(src0 + (srcStride << 1)))),
+        1
+      );
+      mm_dI = _mm256_add_epi16(mm_dI, mm_src);
+      if (!bi)
+      {
+        mm_dI = _mm256_srai_epi16(_mm256_adds_epi16(mm_dI, mm_offset), shiftNum);
+        mm_dI = _mm256_min_epi16(vibdimax, _mm256_max_epi16(vibdimin, mm_dI));
+      }
+
+      // store final results
+      __m128i dITmp = _mm256_extractf128_si256(mm_dI, 1);
+      Pel* dst0 = dst;
+      _mm_storel_epi64((__m128i *)dst0, _mm256_castsi256_si128(mm_dI));
+      dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, dITmp);
+      dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(_mm256_castsi256_si128(mm_dI), _mm256_castsi256_si128(mm_dI)));
+      dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(dITmp, dITmp));
+#else
+      // first row
+      mm_dmvx = _mm_loadu_si128((const __m128i *)vX);
+      mm_dmvy = _mm_loadu_si128((const __m128i *)vY);
+      mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX));
+      mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY));
+      mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
+      mm_dI0 = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI0));
+
+      // second row
+      mm_dmvx = _mm_loadu_si128((const __m128i *)(vX + dMvStride));
+      mm_dmvy = _mm_loadu_si128((const __m128i *)(vY + dMvStride));
+      mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX + gradStride)));
+      mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY + gradStride)));
+      mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
+      mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI));
+
+      // combine both rows
+      mm_dI = _mm_packs_epi32(mm_dI0, mm_dI);
+      mm_dI = _mm_add_epi16(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + srcStride))), mm_dI);
+      if (!bi)
+      {
+        mm_dI = _mm_srai_epi16(_mm_adds_epi16(mm_dI, mm_offset), shiftNum);
+        mm_dI = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_dI));
+      }
+
+      _mm_storel_epi64((__m128i *)dst, mm_dI);
+      _mm_storel_epi64((__m128i *)(dst + dstStride), _mm_unpackhi_epi64(mm_dI, mm_dI));
+#endif
+      vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4;
+    }
+
+#if USE_AVX2
+    dMvX += (dMvStride << 2);
+    dMvY += (dMvStride << 2);
+    gradX += (gradStride << 2);
+    gradY += (gradStride << 2);
+    srcPel += (srcStride << 2);
+    dstPel += (dstStride << 2);
+#else
+    dMvX += (dMvStride << 1);
+    dMvY += (dMvStride << 1);
+    gradX += (gradStride << 1);
+    gradY += (gradStride << 1);
+    srcPel += (srcStride << 1);
+    dstPel += (dstStride << 1);
+#endif
+  }
+}
+
+
+template< X86_VEXT vext >
+void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLimit)
+{
+  CHECKD(size % 16 != 0, "Size must be multiple of 16!");
+#ifdef USE_AVX512
+  if (vext >= AVX512 && size >= 16)
+  {
+    __m512i dMvMin = _mm256_set1_epi32(-dmvLimit);
+    __m512i dMvMax = _mm256_set1_epi32( dmvLimit );
+    __m512i nOffset = _mm512_set1_epi32((1 << (nShift - 1)));
+    __m512i vones = _mm512_set1_epi32(1);
+    __m512i vzero = _mm512_setzero_si512();
+    for (int i = 0; i < size; i += 16, v += 16)
+    {
+      __m512i src = _mm512_loadu_si512(v);
+      __mmask16 mask = _mm512_cmpge_epi32_mask(src, vzero);
+      src = __mm512_add_epi32(src, nOffset);
+      __mm512i dst = _mm512_srai_epi32(_mm512_mask_sub_epi32(src, mask, src, vones), nShift);
+      dst = _mm512_min_epi32(dMvMax, _mm512_max_epi32(dMvMin, dst));
+      _mm512_storeu_si512(v, dst);
+    }
+  }
+  else
+#endif
+#ifdef USE_AVX2
+  if (vext >= AVX2 && size >= 8)
+  {
+    __m256i dMvMin = _mm256_set1_epi32(-dmvLimit);
+    __m256i dMvMax = _mm256_set1_epi32( dmvLimit );
+    __m256i nOffset = _mm256_set1_epi32(1 << (nShift - 1));
+    __m256i vzero = _mm256_setzero_si256();
+    for (int i = 0; i < size; i += 8, v += 8)
+    {
+      __m256i src = _mm256_lddqu_si256((__m256i*)v);
+      __m256i of  = _mm256_cmpgt_epi32(src, vzero);
+      __m256i dst = _mm256_srai_epi32(_mm256_add_epi32(_mm256_add_epi32(src, nOffset), of), nShift);
+      dst = _mm256_min_epi32(dMvMax, _mm256_max_epi32(dMvMin, dst));
+      _mm256_storeu_si256((__m256i*)v, dst);
+    }
+  }
+  else
+#endif
+  {
+    __m128i dMvMin = _mm_set1_epi32(-dmvLimit);
+    __m128i dMvMax = _mm_set1_epi32( dmvLimit );
+    __m128i nOffset = _mm_set1_epi32((1 << (nShift - 1)));
+    __m128i vzero = _mm_setzero_si128();
+    for (int i = 0; i < size; i += 4, v += 4)
+    {
+      __m128i src = _mm_loadu_si128((__m128i*)v);
+      __m128i of  = _mm_cmpgt_epi32(src, vzero);
+      __m128i dst = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(src, nOffset), of), nShift);
+      dst = _mm_min_epi32(dMvMax, _mm_max_epi32(dMvMin, dst));
+      _mm_storeu_si128((__m128i*)v, dst);
+    }
+  }
+}
+
+template< X86_VEXT vext, bool PAD = true>
 void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
 {
-  __m128i vzero = _mm_setzero_si128();
   Pel* srcTmp = src + srcStride + 1;
   Pel* gradXTmp = gradX + gradStride + 1;
   Pel* gradYTmp = gradY + gradStride + 1;
 
   int widthInside = width - 2 * BIO_EXTEND_SIZE;
   int heightInside = height - 2 * BIO_EXTEND_SIZE;
-  int shift1 = std::max<int>(2, (14 - bitDepth));
+  int shift1 = 6;
+  __m128i mmShift1 = _mm_cvtsi32_si128( shift1 );
+  assert((widthInside & 3) == 0);
 
+  if ( ( widthInside & 7 ) == 0 )
+  {
+    for (int y = 0; y < heightInside; y++)
+    {
+      int x = 0;
+      for ( ; x < widthInside; x += 8 )
+      {
+        __m128i mmPixTop    = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - srcStride ) ), mmShift1 );
+        __m128i mmPixBottom = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + srcStride ) ), mmShift1 );
+        __m128i mmPixLeft   = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - 1 ) ), mmShift1 );
+        __m128i mmPixRight  = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + 1 ) ), mmShift1 );
 
-  assert((widthInside & 3) == 0);
+        __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop );
+        __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft );
 
-  for (int y = 0; y < heightInside; y++)
+        _mm_storeu_si128( ( __m128i * ) ( gradYTmp + x ), mmGradVer );
+        _mm_storeu_si128( ( __m128i * ) ( gradXTmp + x ), mmGradHor );
+      }
+      gradXTmp += gradStride;
+      gradYTmp += gradStride;
+      srcTmp += srcStride;
+    }
+  }
+  else
   {
-    int x = 0;
-    for (; x < widthInside; x += 4)
+    __m128i mmPixTop = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp ) ) ), mmShift1 );
+    for ( int y = 0; y < heightInside; y += 2 )
     {
-      __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - srcStride)));
-      __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + srcStride)));
-      __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1)));
-      __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1)));
-
-      __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1));
-      __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1));
-      mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
-      mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
-
-      _mm_storel_epi64((__m128i *)(gradYTmp + x), mmGradVer);
-      _mm_storel_epi64((__m128i *)(gradXTmp + x), mmGradHor);
+      __m128i mmPixBottom = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + ( srcStride << 1 ) ) ) ), mmShift1 );
+      __m128i mmPixLeft   = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 + srcStride ) ) ), mmShift1 );
+      __m128i mmPixRight  = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 + srcStride ) ) ), mmShift1 );
+
+      __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop );
+      __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft );
+
+      _mm_storel_epi64( (__m128i *) gradYTmp, mmGradVer );
+      _mm_storel_epi64( (__m128i *) ( gradYTmp + gradStride ), _mm_unpackhi_epi64( mmGradVer, mmGradHor ) );
+      _mm_storel_epi64( (__m128i *) gradXTmp, mmGradHor );
+      _mm_storel_epi64( (__m128i *) ( gradXTmp + gradStride ), _mm_unpackhi_epi64( mmGradHor, mmGradVer ) );
+
+      mmPixTop = mmPixBottom;
+      gradXTmp += gradStride << 1;
+      gradYTmp += gradStride << 1;
+      srcTmp   += srcStride << 1;
     }
-
-    gradXTmp += gradStride;
-    gradYTmp += gradStride;
-    srcTmp += srcStride;
   }
 
+  if (PAD)
+  {
   gradXTmp = gradX + gradStride + 1;
   gradYTmp = gradY + gradStride + 1;
   for (int y = 0; y < heightInside; y++)
@@ -336,155 +618,10 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
   ::memcpy(gradXTmp + heightInside*gradStride, gradXTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width));
   ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
   ::memcpy(gradYTmp + heightInside*gradStride, gradYTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width));
-}
-
-template< X86_VEXT vext >
-void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth)
-{
-  int shift4 = std::min<int>(8, (bitDepth - 4));
-  int shift5 = std::min<int>(5, (bitDepth - 7));
-  for (int y = 0; y < heightG; y++)
-  {
-    int x = 0;
-    for (; x < ((widthG >> 3) << 3); x += 8)
-    {
-      __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4));
-      __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4));
-      __m128i mmGradX0 = _mm_loadu_si128((__m128i*)(gradX0 + x));
-      __m128i mmGradX1 = _mm_loadu_si128((__m128i*)(gradX1 + x));
-      __m128i mmGradY0 = _mm_loadu_si128((__m128i*)(gradY0 + x));
-      __m128i mmGradY1 = _mm_loadu_si128((__m128i*)(gradY1 + x));
-
-      __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp);
-      __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5));
-      __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5));
-
-      // m_piDotProductTemp1
-      __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX);
-      __m128i mm_a = _mm_mullo_epi16(mmTempX, mmTempX);
-
-      __m128i mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-      __m128i mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp1 + x), mm_l);
-      _mm_storeu_si128((__m128i *)(dotProductTemp1 + x + 4), mm_h);
-
-      // m_piDotProductTemp2
-      mm_b = _mm_mulhi_epi16(mmTempX, mmTempY);
-      mm_a = _mm_mullo_epi16(mmTempX, mmTempY);
-
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp2 + x), mm_l);
-      _mm_storeu_si128((__m128i *)(dotProductTemp2 + x + 4), mm_h);
-
-      // m_piDotProductTemp3
-      mm_b = _mm_mulhi_epi16(mmTempX, mmTemp1);
-      mm_a = _mm_mullo_epi16(mmTempX, mmTemp1);
-
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp3 + x), mm_l);
-      _mm_storeu_si128((__m128i *)(dotProductTemp3 + x + 4), mm_h);
-
-      // m_piDotProductTemp5
-      mm_b = _mm_mulhi_epi16(mmTempY, mmTempY);
-      mm_a = _mm_mullo_epi16(mmTempY, mmTempY);
-
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp5 + x), mm_l);
-      _mm_storeu_si128((__m128i *)(dotProductTemp5 + x + 4), mm_h);
-
-      // m_piDotProductTemp6
-      mm_b = _mm_mulhi_epi16(mmTempY, mmTemp1);
-      mm_a = _mm_mullo_epi16(mmTempY, mmTemp1);
-
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp6 + x), mm_l);
-      _mm_storeu_si128((__m128i *)(dotProductTemp6 + x + 4), mm_h);
-    }
-
-    for (; x < ((widthG >> 2) << 2); x += 4)
-    {
-      __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4));
-      __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4));
-      __m128i mmGradX0 = _mm_loadl_epi64((__m128i*)(gradX0 + x));
-      __m128i mmGradX1 = _mm_loadl_epi64((__m128i*)(gradX1 + x));
-      __m128i mmGradY0 = _mm_loadl_epi64((__m128i*)(gradY0 + x));
-      __m128i mmGradY1 = _mm_loadl_epi64((__m128i*)(gradY1 + x));
-
-      __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp);
-      __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5));
-      __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5));
-
-      // m_piDotProductTemp1
-      __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX);
-      __m128i mm_a = _mm_mullo_epi16(mmTempX, mmTempX);
-      __m128i mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp1 + x), mm_l);
-
-      // m_piDotProductTemp2
-      mm_b = _mm_mulhi_epi16(mmTempX, mmTempY);
-      mm_a = _mm_mullo_epi16(mmTempX, mmTempY);
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp2 + x), mm_l);
-
-      // m_piDotProductTemp3
-      mm_b = _mm_mulhi_epi16(mmTempX, mmTemp1);
-      mm_a = _mm_mullo_epi16(mmTempX, mmTemp1);
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp3 + x), mm_l);
-
-      // m_piDotProductTemp5
-      mm_b = _mm_mulhi_epi16(mmTempY, mmTempY);
-      mm_a = _mm_mullo_epi16(mmTempY, mmTempY);
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp5 + x), mm_l);
-
-      // m_piDotProductTemp6
-      mm_b = _mm_mulhi_epi16(mmTempY, mmTemp1);
-      mm_a = _mm_mullo_epi16(mmTempY, mmTemp1);
-      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
-
-      _mm_storeu_si128((__m128i *)(dotProductTemp6 + x), mm_l);
-    }
-
-    for (; x < widthG; x++)
-    {
-      int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4);
-      int tempX = (gradX0[x] + gradX1[x]) >> shift5;
-      int tempY = (gradY0[x] + gradY1[x]) >> shift5;
-      dotProductTemp1[x] = tempX * tempX;
-      dotProductTemp2[x] = tempX * tempY;
-      dotProductTemp3[x] = -tempX * temp;
-      dotProductTemp5[x] = tempY * tempY;
-      dotProductTemp6[x] = -tempY * temp;
-    }
-
-    srcY0Temp += src0Stride;
-    srcY1Temp += src1Stride;
-    gradX0 += gradStride;
-    gradX1 += gradStride;
-    gradY0 += gradStride;
-    gradY1 += gradStride;
-    dotProductTemp1 += widthG;
-    dotProductTemp2 += widthG;
-    dotProductTemp3 += widthG;
-    dotProductTemp5 += widthG;
-    dotProductTemp6 += widthG;
   }
 }
 
+
 template< X86_VEXT vext >
 void calcBlkGradient_SSE(int sx, int sy, int     *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
 {
@@ -632,13 +769,13 @@ void reco_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int src
   }
 }
 
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
 template< X86_VEXT vext, int W >
-void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int width, int height, int shift, int gbiWeight)
+void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int width, int height, int shift, int bcwWeight)
 {
-  int normalizer = ((1 << 16) + (gbiWeight>0 ? (gbiWeight >> 1) : -(gbiWeight >> 1))) / gbiWeight;
-  int weight0 = normalizer << g_GbiLog2WeightBase;
-  int weight1 = (g_GbiWeightBase - gbiWeight)*normalizer;
+  int normalizer = ((1 << 16) + (bcwWeight>0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
+  int weight0 = normalizer << g_BcwLog2WeightBase;
+  int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer;
   int offset = 1 << (shift - 1);
   if (W == 8)
   {
@@ -651,8 +788,8 @@ void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1
     {
       for (int col = 0; col < width; col += 8)
       {
-        __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]);
-        __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]);
+        __m128i vsrc0 = _mm_loadu_si128( (const __m128i *)&src0[col] );
+        __m128i vsrc1 = _mm_loadu_si128( (const __m128i *)&src1[col] );
 
         __m128i vtmp, vdst, vsrc;
         vdst = _mm_cvtepi16_epi32(vsrc0);
@@ -721,8 +858,8 @@ void removeHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1, int
       {
         for (int col = 0; col < width; col += 8)
         {
-          __m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]);
-          __m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]);
+          __m128i vsrc0 = _mm_loadu_si128( (const __m128i *)&src0[col] );
+          __m128i vsrc1 = _mm_loadu_si128( (const __m128i *)&src1[col] );
 
           vsrc0 = _mm_sub_epi16(_mm_slli_epi16(vsrc0, 1), vsrc1);
           _mm_store_si128((__m128i *)&src0[col], vsrc0);
@@ -919,8 +1056,7 @@ void PelBufferOps::_initPelBufOpsX86()
 
   addBIOAvg4      = addBIOAvg4_SSE<vext>;
   bioGradFilter   = gradFilter_SSE<vext>;
-  calcBIOPar      = calcBIOPar_SSE<vext>;
-  calcBlkGradient = calcBlkGradient_SSE<vext>;
+  calcBIOSums = calcBIOSums_SSE<vext>;
 
   copyBuffer = copyBufferSimd<vext>;
   padding    = paddingSimd<vext>;
@@ -929,12 +1065,15 @@ void PelBufferOps::_initPelBufOpsX86()
 
   linTf8 = linTf_SSE_entry<vext, 8>;
   linTf4 = linTf_SSE_entry<vext, 4>;
-#if ENABLE_SIMD_OPT_GBI
+#if ENABLE_SIMD_OPT_BCW
   removeWeightHighFreq8 = removeWeightHighFreq_SSE<vext, 8>;
   removeWeightHighFreq4 = removeWeightHighFreq_SSE<vext, 4>;
   removeHighFreq8 = removeHighFreq_SSE<vext, 8>;
   removeHighFreq4 = removeHighFreq_SSE<vext, 4>;
 #endif
+  profGradFilter = gradFilter_SSE<vext, false>;
+  applyPROF      = applyPROF_SSE<vext>;
+  roundIntVector = roundIntVector_SIMD<vext>;
 }
 
 template void PelBufferOps::_initPelBufOpsX86<SIMDX86>();
diff --git a/source/Lib/CommonLib/x86/CommonDefX86.cpp b/source/Lib/CommonLib/x86/CommonDefX86.cpp
index d7b5f8cd2c4cc106d5eea32b705344c7fd189518..448b627bb7ed2e01a03ef24c94fa66dc0347f904 100644
--- a/source/Lib/CommonLib/x86/CommonDefX86.cpp
+++ b/source/Lib/CommonLib/x86/CommonDefX86.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/CommonDefX86.h b/source/Lib/CommonLib/x86/CommonDefX86.h
index 28091299b667645d0c8781bbdf8b7b037c8b541a..29f90397f8521217176ec94724687d50382e18dc 100644
--- a/source/Lib/CommonLib/x86/CommonDefX86.h
+++ b/source/Lib/CommonLib/x86/CommonDefX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/IbcHashMapX86.h b/source/Lib/CommonLib/x86/IbcHashMapX86.h
index 67e4c063d06741b1e4d7fe5dd7870326e6da83de..2d0ce5f033d844e8f8865fad0e431ec399c2621d 100644
--- a/source/Lib/CommonLib/x86/IbcHashMapX86.h
+++ b/source/Lib/CommonLib/x86/IbcHashMapX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp
index 334987013a8315ad5381a103efed5cbf226c448c..458839510c00487d6f61fc80ba13d15197a09de5 100644
--- a/source/Lib/CommonLib/x86/InitX86.cpp
+++ b/source/Lib/CommonLib/x86/InitX86.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index 59c455d30fc0a5d7a74e7a3c80226ec10056965d..2b5bda2df04e9ae2626a74e1696069675ad41aa8 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -195,21 +195,6 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid
 template<X86_VEXT vext, bool isFirst, bool isLast>
 static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, int16_t* dst, int dstStride, int width, int height, bool biMCForDMVR)
 {
-#if !HM_JEM_CLIP_PEL
-  if( vext >= AVX2 && ( width % 16 ) == 0 )
-  {
-    fullPelCopyAVX2<Pel, 16, isFirst, isLast >( clpRng, src, srcStride, dst, dstStride, width, height );
-  }
-  else if( ( width % 16 ) == 0 )
-  {
-    fullPelCopySSE<Pel, 16, isFirst, isLast >( clpRng, src, srcStride, dst, dstStride, width, height );
-  }
-  else if( ( width % 8 ) == 0 )
-  {
-    fullPelCopySSE<Pel, 8, isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height );
-  }
-  else
-#endif
   { //Scalar
     InterpolationFilter::filterCopy<isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height, biMCForDMVR);
   }
@@ -1023,6 +1008,110 @@ static inline __m128i simdInterpolateLuma10Bit2P4(int16_t const *src, int srcStr
   return sumLo;
 }
 
+#ifdef USE_AVX2
+static inline __m256i simdInterpolateLumaHighBit2P16(int16_t const *src1, int srcStride, __m256i *mmCoeff, const __m256i & mmOffset, __m128i &mmShift)
+{
+  __m256i mm_mul_lo = _mm256_setzero_si256();
+  __m256i mm_mul_hi = _mm256_setzero_si256();
+
+  for (int coefIdx = 0; coefIdx < 2; coefIdx++)
+  {
+    __m256i mmPix = _mm256_lddqu_si256((__m256i*)(src1 + coefIdx * srcStride));
+    __m256i mm_hi = _mm256_mulhi_epi16(mmPix, mmCoeff[coefIdx]);
+    __m256i mm_lo = _mm256_mullo_epi16(mmPix, mmCoeff[coefIdx]);
+    mm_mul_lo = _mm256_add_epi32(mm_mul_lo, _mm256_unpacklo_epi16(mm_lo, mm_hi));
+    mm_mul_hi = _mm256_add_epi32(mm_mul_hi, _mm256_unpackhi_epi16(mm_lo, mm_hi));
+  }
+  mm_mul_lo = _mm256_sra_epi32(_mm256_add_epi32(mm_mul_lo, mmOffset), mmShift);
+  mm_mul_hi = _mm256_sra_epi32(_mm256_add_epi32(mm_mul_hi, mmOffset), mmShift);
+  __m256i mm_sum = _mm256_packs_epi32(mm_mul_lo, mm_mul_hi);
+  return (mm_sum);
+}
+#endif
+
+static inline __m128i simdInterpolateLumaHighBit2P8(int16_t const *src1, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, __m128i &mmShift)
+{
+  __m128i mm_mul_lo = _mm_setzero_si128();
+  __m128i mm_mul_hi = _mm_setzero_si128();
+
+  for (int coefIdx = 0; coefIdx < 2; coefIdx++)
+  {
+    __m128i mmPix = _mm_loadu_si128((__m128i*)(src1 + coefIdx * srcStride));
+    __m128i mm_hi = _mm_mulhi_epi16(mmPix, mmCoeff[coefIdx]);
+    __m128i mm_lo = _mm_mullo_epi16(mmPix, mmCoeff[coefIdx]);
+    mm_mul_lo = _mm_add_epi32(mm_mul_lo, _mm_unpacklo_epi16(mm_lo, mm_hi));
+    mm_mul_hi = _mm_add_epi32(mm_mul_hi, _mm_unpackhi_epi16(mm_lo, mm_hi));
+  }
+  mm_mul_lo = _mm_sra_epi32(_mm_add_epi32(mm_mul_lo, mmOffset), mmShift);
+  mm_mul_hi = _mm_sra_epi32(_mm_add_epi32(mm_mul_hi, mmOffset), mmShift);
+  __m128i mm_sum = _mm_packs_epi32(mm_mul_lo, mm_mul_hi);
+  return(mm_sum);
+}
+
+static inline __m128i simdInterpolateLumaHighBit2P4(int16_t const *src1, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, __m128i &mmShift)
+{
+  __m128i mm_sum = _mm_setzero_si128();
+  __m128i mm_zero = _mm_setzero_si128();
+  for (int coefIdx = 0; coefIdx < 2; coefIdx++)
+  {
+    __m128i mmPix = _mm_loadl_epi64((__m128i*)(src1 + coefIdx * srcStride));
+    __m128i mm_hi = _mm_mulhi_epi16(mmPix, mmCoeff[coefIdx]);
+    __m128i mm_lo = _mm_mullo_epi16(mmPix, mmCoeff[coefIdx]);
+    __m128i mm_mul = _mm_unpacklo_epi16(mm_lo, mm_hi);
+    mm_sum = _mm_add_epi32(mm_sum, mm_mul);
+  }
+  mm_sum = _mm_sra_epi32(_mm_add_epi32(mm_sum, mmOffset), mmShift);
+  mm_sum = _mm_packs_epi32(mm_sum, mm_zero);
+  return(mm_sum);
+}
+
+template<X86_VEXT vext, bool isLast>
+static void simdInterpolateN2_HIGHBIT_M4(const int16_t* src, int srcStride, int16_t *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *c)
+{
+#if USE_AVX2
+  __m256i mm256Offset = _mm256_set1_epi32(offset);
+  __m256i mm256Coeff[2];
+  for (int n = 0; n < 2; n++)
+  {
+    mm256Coeff[n] = _mm256_set1_epi16(c[n]);
+  }
+#endif
+  __m128i mmOffset = _mm_set1_epi32(offset);
+  __m128i mmCoeff[2];
+  for (int n = 0; n < 2; n++)
+    mmCoeff[n] = _mm_set1_epi16(c[n]);
+
+  __m128i mmShift = _mm_cvtsi64_si128(shift);
+
+  CHECK(isLast, "Not Supported");
+  CHECK(width % 4 != 0, "Not Supported");
+
+  for (int row = 0; row < height; row++)
+  {
+    int col = 0;
+#if USE_AVX2
+    for (; col < ((width >> 4) << 4); col += 16)
+    {
+      __m256i mmFiltered = simdInterpolateLumaHighBit2P16(src + col, cStride, mm256Coeff, mm256Offset, mmShift);
+      _mm256_storeu_si256((__m256i *)(dst + col), mmFiltered);
+    }
+#endif
+    for (; col < ((width >> 3) << 3); col += 8)
+    {
+      __m128i mmFiltered = simdInterpolateLumaHighBit2P8(src + col, cStride, mmCoeff, mmOffset, mmShift);
+      _mm_storeu_si128((__m128i *)(dst + col), mmFiltered);
+    }
+
+    for (; col < ((width >> 2) << 2); col += 4)
+    {
+      __m128i mmFiltered = simdInterpolateLumaHighBit2P4(src + col, cStride, mmCoeff, mmOffset, mmShift);
+      _mm_storel_epi64((__m128i *)(dst + col), mmFiltered);
+    }
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+
 template<X86_VEXT vext, bool isLast>
 static void simdInterpolateN2_10BIT_M4(const int16_t* src, int srcStride, int16_t *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *c)
 {
@@ -1127,7 +1216,6 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
       offset = 1 << (shift - 1);
     }
   }
-  if( clpRng.bd <= 10 )
   {
     if( N == 8 && !( width & 0x07 ) )
     {
@@ -1179,7 +1267,14 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
     {
       if (N == 2 && !(width & 0x03))
       {
+        if (clpRng.bd <= 10)
+        {
         simdInterpolateN2_10BIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        }
+        else
+        {
+          simdInterpolateN2_HIGHBIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        }
         return;
       }
     }
@@ -1232,6 +1327,129 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
   }
 }
 
+template< X86_VEXT vext >
+void xWeightedTriangleBlk_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
+{
+  Pel* dst = predDst.get(compIdx).buf;
+  Pel* src0 = predSrc0.get(compIdx).buf;
+  Pel* src1 = predSrc1.get(compIdx).buf;
+  int32_t strideDst = predDst.get(compIdx).stride;
+  int32_t strideSrc0 = predSrc0.get(compIdx).stride;
+  int32_t strideSrc1 = predSrc1.get(compIdx).stride;
+
+  int32_t chromaScaleX = getComponentScaleX(compIdx, pu.chromaFormat);
+  int32_t chromaScaleY = getComponentScaleY(compIdx, pu.chromaFormat);
+  int8_t log2WidthY = floorLog2(width << chromaScaleX) - 1;
+  int8_t log2HeightY = floorLog2(height << chromaScaleY) - 1;
+  const char    log2WeightBase = 3;
+  const ClpRng  clpRng = pu.cu->slice->clpRngs().comp[compIdx];
+  const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)) + log2WeightBase;
+  const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
+  int16_t *weight = g_triangleWeights[splitDir][log2HeightY][log2WidthY];
+  int16_t stepY = width << (chromaScaleX + chromaScaleY);
+
+  const __m128i mmEight = _mm_set1_epi16(8);
+  const __m128i mmOffset = _mm_set1_epi32(offsetWeighted);
+  const __m128i mmShift = _mm_cvtsi32_si128(shiftWeighted);
+  const __m128i mmMin = _mm_set1_epi16(clpRng.min);
+  const __m128i mmMax = _mm_set1_epi16(clpRng.max);
+
+  if (width == 2)
+  {
+    const __m128i mask = _mm_set_epi16( (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, 0x0504, 0x0100 );
+    for (int y = 0; y < height; y++)
+    {
+      __m128i s0 = _mm_cvtsi32_si128(*(uint32_t *) src0);
+      __m128i s1 = _mm_cvtsi32_si128(*(uint32_t *) src1);
+      __m128i w0 = _mm_loadl_epi64((__m128i *) (weight));
+      if (chromaScaleX == 1)
+      {
+        w0 = _mm_shuffle_epi8(w0, mask);
+      }
+      __m128i w1 = _mm_sub_epi16(mmEight, w0);
+      s0 = _mm_unpacklo_epi16(s0, s1);
+      w0 = _mm_unpacklo_epi16(w0, w1);
+      s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset);
+      s0 = _mm_sra_epi32(s0, mmShift);
+      s0 = _mm_packs_epi32(s0, s0);
+      s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin));
+
+      *(uint32_t *) dst = _mm_cvtsi128_si32(s0);
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+  else if(width == 4)
+  {
+    const __m128i mask = _mm_set_epi16( (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, 0x0D0C, 0x0908, 0x0504, 0x0100 );
+    for (int y = 0; y < height; y++)
+    {
+      __m128i s0 = _mm_loadl_epi64((__m128i *) (src0));
+      __m128i s1 = _mm_loadl_epi64((__m128i *) (src1));
+      __m128i w0 = _mm_loadu_si128((__m128i *) (weight));
+      if (chromaScaleX == 1)
+      {
+        w0 = _mm_shuffle_epi8(w0, mask);
+      }
+      __m128i w1 = _mm_sub_epi16(mmEight, w0);
+      s0 = _mm_unpacklo_epi16(s0, s1);
+      w0 = _mm_unpacklo_epi16(w0, w1);
+      s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset);
+      s0 = _mm_sra_epi32(s0, mmShift);
+      s0 = _mm_packs_epi32(s0, s0);
+      s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin));
+      _mm_storel_epi64((__m128i *) (dst), s0);
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+  else
+  {
+    const __m128i mask1 = _mm_set_epi16( 0x0D0C, 0x0908, 0x0504, 0x0100, (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080 );
+    const __m128i mask2 = _mm_set_epi16( (short) 0x8080, (short) 0x8080, (short) 0x8080, (short) 0x8080, 0x0D0C, 0x0908, 0x0504, 0x0100 );
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < width; x += 8)
+      {
+        __m128i s0 = _mm_loadu_si128((__m128i *) (src0 + x));
+        __m128i s1 = _mm_loadu_si128((__m128i *) (src1 + x));
+        
+        __m128i w0 = _mm_loadu_si128((__m128i *) (weight + (x << chromaScaleX)));
+        if (chromaScaleX == 1)
+        {
+          __m128i w01 = _mm_loadu_si128((__m128i *) (weight + (x << chromaScaleX) + 8));
+          w0 = _mm_shuffle_epi8(w0, mask1);
+          w01 = _mm_shuffle_epi8(w01, mask2);
+          w0 = _mm_alignr_epi8(w01, w0, 8);
+        }
+        __m128i w1 = _mm_sub_epi16(mmEight, w0);
+
+        __m128i s0tmp = _mm_unpacklo_epi16(s0, s1);
+        __m128i w0tmp = _mm_unpacklo_epi16(w0, w1);
+        s0tmp = _mm_add_epi32(_mm_madd_epi16(s0tmp, w0tmp), mmOffset);
+        s0tmp = _mm_sra_epi32(s0tmp, mmShift);
+
+        s0 = _mm_unpackhi_epi16(s0, s1);
+        w0 = _mm_unpackhi_epi16(w0, w1);
+        s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset);
+        s0 = _mm_sra_epi32(s0, mmShift);
+
+        s0 = _mm_packs_epi32(s0tmp, s0);
+        s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin));
+        _mm_storeu_si128((__m128i *) (dst + x), s0);
+      }
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+}
+
 template <X86_VEXT vext>
 void InterpolationFilter::_initInterpolationFilterX86()
 {
@@ -1270,6 +1488,8 @@ void InterpolationFilter::_initInterpolationFilterX86()
   m_filterCopy[0][1]   = simdFilterCopy<vext, false, true>;
   m_filterCopy[1][0]   = simdFilterCopy<vext, true, false>;
   m_filterCopy[1][1]   = simdFilterCopy<vext, true, true>;
+
+  m_weightedTriangleBlk = xWeightedTriangleBlk_SSE<vext>;
 }
 
 template void InterpolationFilter::_initInterpolationFilterX86<SIMDX86>();
diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h
index 109b6b1a221c057ae0494581fd1544583e165d6f..b5e3288be1949c3540b222b59ef011d53d84b32a 100644
--- a/source/Lib/CommonLib/x86/RdCostX86.h
+++ b/source/Lib/CommonLib/x86/RdCostX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,10 @@
 
 #ifdef TARGET_SIMD_X86
 
-template< typename Torg, typename Tcur, X86_VEXT vext >
+typedef Pel Torg;
+typedef Pel Tcur;
+
+template<X86_VEXT vext >
 Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam )
 {
   if( rcDtParam.bitDepth > 10 )
@@ -67,8 +70,8 @@ Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam )
     {
       for( int iX = 0; iX < iCols; iX+=16 )
       {
-        __m256i Src1 = ( sizeof( Torg ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )( &pSrc1[iX] ) ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )( &pSrc1[iX] ) ) ), 0xD8 ), _mm256_setzero_si256() ) );
-        __m256i Src2 = ( sizeof( Tcur ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )( &pSrc2[iX] ) ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )( &pSrc2[iX] ) ) ), 0xD8 ), _mm256_setzero_si256() ) );
+        __m256i Src1 = ( _mm256_lddqu_si256( ( __m256i* )( &pSrc1[iX] ) ) );
+        __m256i Src2 = ( _mm256_lddqu_si256( ( __m256i* )( &pSrc2[iX] ) ) );
         __m256i Diff = _mm256_sub_epi16( Src1, Src2 );
         __m256i Res = _mm256_madd_epi16( Diff, Diff );
         Sum = _mm256_add_epi32( Sum, Res );
@@ -125,7 +128,7 @@ Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam )
 }
 
 
-template< typename Torg, typename Tcur, int iWidth, X86_VEXT vext >
+template<int iWidth, X86_VEXT vext >
 Distortion RdCost::xGetSSE_NxN_SIMD( const DistParam &rcDtParam )
 {
   if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
@@ -354,7 +357,7 @@ Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
 
   if( iWidth == 4 )
   {
-    if( iRows == 4 )
+    if( iRows == 4 && iSubShift == 0 )
     {
       __m128i vzero = _mm_setzero_si128();
       __m128i vsum = vzero;
@@ -453,7 +456,6 @@ Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
 }
 
 
-template< typename Torg, typename Tcur >
 static uint32_t xCalcHAD4x4_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur )
 {
   __m128i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm_loadl_epi64( ( const __m128i* )&piOrg[0] ) ) : ( _mm_unpacklo_epi8( _mm_cvtsi32_si128( *(const int*)&piOrg[0] ), _mm_setzero_si128() ) );
@@ -538,172 +540,124 @@ static uint32_t xCalcHAD4x4_SSE( const Torg *piOrg, const Tcur *piCur, const int
 }
 
 //working up to 12-bit
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
 static uint32_t xCalcHAD8x8_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
-  __m128i m1[8], m2[8];
+  __m128i m1[8][2], m2[8][2];
 
   for( int k = 0; k < 8; k++ )
   {
     __m128i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm_loadu_si128( ( __m128i* )piOrg ) ) : ( _mm_unpacklo_epi8( _mm_loadl_epi64( ( const __m128i* )piOrg ), _mm_setzero_si128() ) );
     __m128i r1 = ( sizeof( Tcur ) > 1 ) ? ( _mm_lddqu_si128( ( __m128i* )piCur ) ) : ( _mm_unpacklo_epi8( _mm_loadl_epi64( ( const __m128i* )piCur ), _mm_setzero_si128() ) ); // th  _mm_loadu_si128( (__m128i*)piCur )
-    m2[k] = _mm_sub_epi16( r0, r1 );
+    m2[k][0] = _mm_sub_epi16( r0, r1 );
+    m2[k][1] = _mm_cvtepi16_epi32( _mm_srli_si128( m2[k][0], 8 ) );
+    m2[k][0] = _mm_cvtepi16_epi32( m2[k][0] );
     piCur += iStrideCur;
     piOrg += iStrideOrg;
   }
 
-  //horizontal
-  m1[0] = _mm_add_epi16( m2[0], m2[4] );
-  m1[1] = _mm_add_epi16( m2[1], m2[5] );
-  m1[2] = _mm_add_epi16( m2[2], m2[6] );
-  m1[3] = _mm_add_epi16( m2[3], m2[7] );
-  m1[4] = _mm_sub_epi16( m2[0], m2[4] );
-  m1[5] = _mm_sub_epi16( m2[1], m2[5] );
-  m1[6] = _mm_sub_epi16( m2[2], m2[6] );
-  m1[7] = _mm_sub_epi16( m2[3], m2[7] );
-
-  m2[0] = _mm_add_epi16( m1[0], m1[2] );
-  m2[1] = _mm_add_epi16( m1[1], m1[3] );
-  m2[2] = _mm_sub_epi16( m1[0], m1[2] );
-  m2[3] = _mm_sub_epi16( m1[1], m1[3] );
-  m2[4] = _mm_add_epi16( m1[4], m1[6] );
-  m2[5] = _mm_add_epi16( m1[5], m1[7] );
-  m2[6] = _mm_sub_epi16( m1[4], m1[6] );
-  m2[7] = _mm_sub_epi16( m1[5], m1[7] );
+  for( int i = 0; i < 2; i++ )
+  {
+    //horizontal
+    m1[0][i] = _mm_add_epi32( m2[0][i], m2[4][i] );
+    m1[1][i] = _mm_add_epi32( m2[1][i], m2[5][i] );
+    m1[2][i] = _mm_add_epi32( m2[2][i], m2[6][i] );
+    m1[3][i] = _mm_add_epi32( m2[3][i], m2[7][i] );
+    m1[4][i] = _mm_sub_epi32( m2[0][i], m2[4][i] );
+    m1[5][i] = _mm_sub_epi32( m2[1][i], m2[5][i] );
+    m1[6][i] = _mm_sub_epi32( m2[2][i], m2[6][i] );
+    m1[7][i] = _mm_sub_epi32( m2[3][i], m2[7][i] );
+
+    m2[0][i] = _mm_add_epi32( m1[0][i], m1[2][i] );
+    m2[1][i] = _mm_add_epi32( m1[1][i], m1[3][i] );
+    m2[2][i] = _mm_sub_epi32( m1[0][i], m1[2][i] );
+    m2[3][i] = _mm_sub_epi32( m1[1][i], m1[3][i] );
+    m2[4][i] = _mm_add_epi32( m1[4][i], m1[6][i] );
+    m2[5][i] = _mm_add_epi32( m1[5][i], m1[7][i] );
+    m2[6][i] = _mm_sub_epi32( m1[4][i], m1[6][i] );
+    m2[7][i] = _mm_sub_epi32( m1[5][i], m1[7][i] );
+
+    m1[0][i] = _mm_add_epi32( m2[0][i], m2[1][i] );
+    m1[1][i] = _mm_sub_epi32( m2[0][i], m2[1][i] );
+    m1[2][i] = _mm_add_epi32( m2[2][i], m2[3][i] );
+    m1[3][i] = _mm_sub_epi32( m2[2][i], m2[3][i] );
+    m1[4][i] = _mm_add_epi32( m2[4][i], m2[5][i] );
+    m1[5][i] = _mm_sub_epi32( m2[4][i], m2[5][i] );
+    m1[6][i] = _mm_add_epi32( m2[6][i], m2[7][i] );
+    m1[7][i] = _mm_sub_epi32( m2[6][i], m2[7][i] );
+
+    m2[0][i] = _mm_unpacklo_epi32( m1[0][i], m1[1][i] );
+    m2[1][i] = _mm_unpacklo_epi32( m1[2][i], m1[3][i] );
+    m2[2][i] = _mm_unpackhi_epi32( m1[0][i], m1[1][i] );
+    m2[3][i] = _mm_unpackhi_epi32( m1[2][i], m1[3][i] );
+    m2[4][i] = _mm_unpacklo_epi32( m1[4][i], m1[5][i] );
+    m2[5][i] = _mm_unpacklo_epi32( m1[6][i], m1[7][i] );
+    m2[6][i] = _mm_unpackhi_epi32( m1[4][i], m1[5][i] );
+    m2[7][i] = _mm_unpackhi_epi32( m1[6][i], m1[7][i] );
+
+    m1[0][i] = _mm_unpacklo_epi64( m2[0][i], m2[1][i] );
+    m1[1][i] = _mm_unpackhi_epi64( m2[0][i], m2[1][i] );
+    m1[2][i] = _mm_unpacklo_epi64( m2[2][i], m2[3][i] );
+    m1[3][i] = _mm_unpackhi_epi64( m2[2][i], m2[3][i] );
+    m1[4][i] = _mm_unpacklo_epi64( m2[4][i], m2[5][i] );
+    m1[5][i] = _mm_unpackhi_epi64( m2[4][i], m2[5][i] );
+    m1[6][i] = _mm_unpacklo_epi64( m2[6][i], m2[7][i] );
+    m1[7][i] = _mm_unpackhi_epi64( m2[6][i], m2[7][i] );
+  }
 
-  m1[0] = _mm_add_epi16( m2[0], m2[1] );
-  m1[1] = _mm_sub_epi16( m2[0], m2[1] );
-  m1[2] = _mm_add_epi16( m2[2], m2[3] );
-  m1[3] = _mm_sub_epi16( m2[2], m2[3] );
-  m1[4] = _mm_add_epi16( m2[4], m2[5] );
-  m1[5] = _mm_sub_epi16( m2[4], m2[5] );
-  m1[6] = _mm_add_epi16( m2[6], m2[7] );
-  m1[7] = _mm_sub_epi16( m2[6], m2[7] );
+  __m128i n1[8][2];
+  __m128i n2[8][2];
 
+  for( int i = 0; i < 8; i++ )
   {
-    m2[0] = _mm_unpacklo_epi16( m1[0], m1[1] );
-    m2[1] = _mm_unpacklo_epi16( m1[2], m1[3] );
-    m2[2] = _mm_unpacklo_epi16( m1[4], m1[5] );
-    m2[3] = _mm_unpacklo_epi16( m1[6], m1[7] );
-    m2[4] = _mm_unpackhi_epi16( m1[0], m1[1] );
-    m2[5] = _mm_unpackhi_epi16( m1[2], m1[3] );
-    m2[6] = _mm_unpackhi_epi16( m1[4], m1[5] );
-    m2[7] = _mm_unpackhi_epi16( m1[6], m1[7] );
+    int ii = i % 4;
+    int ij = i >> 2;
 
-    m1[0] = _mm_unpacklo_epi32( m2[0], m2[1] );
-    m1[1] = _mm_unpackhi_epi32( m2[0], m2[1] );
-    m1[2] = _mm_unpacklo_epi32( m2[2], m2[3] );
-    m1[3] = _mm_unpackhi_epi32( m2[2], m2[3] );
-    m1[4] = _mm_unpacklo_epi32( m2[4], m2[5] );
-    m1[5] = _mm_unpackhi_epi32( m2[4], m2[5] );
-    m1[6] = _mm_unpacklo_epi32( m2[6], m2[7] );
-    m1[7] = _mm_unpackhi_epi32( m2[6], m2[7] );
-
-    m2[0] = _mm_unpacklo_epi64( m1[0], m1[2] );
-    m2[1] = _mm_unpackhi_epi64( m1[0], m1[2] );
-    m2[2] = _mm_unpacklo_epi64( m1[1], m1[3] );
-    m2[3] = _mm_unpackhi_epi64( m1[1], m1[3] );
-    m2[4] = _mm_unpacklo_epi64( m1[4], m1[6] );
-    m2[5] = _mm_unpackhi_epi64( m1[4], m1[6] );
-    m2[6] = _mm_unpacklo_epi64( m1[5], m1[7] );
-    m2[7] = _mm_unpackhi_epi64( m1[5], m1[7] );
+    n2[i][0] = m1[ii    ][ij];
+    n2[i][1] = m1[ii + 4][ij];
   }
 
-  if( iBitDepth >= 10 /*sizeof( Torg ) > 1 || sizeof( Tcur ) > 1*/ )
+  for( int i = 0; i < 2; i++ )
   {
-    //  if (g_bitDepthY >=10){
-    __m128i n1[8][2];
-    __m128i n2[8][2];
-
-    for( int i = 0; i < 8; i++ )
-    {
-      n2[i][0] = _mm_cvtepi16_epi32( m2[i] );
-      n2[i][1] = _mm_cvtepi16_epi32( _mm_shuffle_epi32( m2[i], 0xEE ) );
-    }
-
-    for( int i = 0; i < 2; i++ )
-    {
-      n1[0][i] = _mm_add_epi32( n2[0][i], n2[4][i] );
-      n1[1][i] = _mm_add_epi32( n2[1][i], n2[5][i] );
-      n1[2][i] = _mm_add_epi32( n2[2][i], n2[6][i] );
-      n1[3][i] = _mm_add_epi32( n2[3][i], n2[7][i] );
-      n1[4][i] = _mm_sub_epi32( n2[0][i], n2[4][i] );
-      n1[5][i] = _mm_sub_epi32( n2[1][i], n2[5][i] );
-      n1[6][i] = _mm_sub_epi32( n2[2][i], n2[6][i] );
-      n1[7][i] = _mm_sub_epi32( n2[3][i], n2[7][i] );
-
-      n2[0][i] = _mm_add_epi32( n1[0][i], n1[2][i] );
-      n2[1][i] = _mm_add_epi32( n1[1][i], n1[3][i] );
-      n2[2][i] = _mm_sub_epi32( n1[0][i], n1[2][i] );
-      n2[3][i] = _mm_sub_epi32( n1[1][i], n1[3][i] );
-      n2[4][i] = _mm_add_epi32( n1[4][i], n1[6][i] );
-      n2[5][i] = _mm_add_epi32( n1[5][i], n1[7][i] );
-      n2[6][i] = _mm_sub_epi32( n1[4][i], n1[6][i] );
-      n2[7][i] = _mm_sub_epi32( n1[5][i], n1[7][i] );
-
-      n1[0][i] = _mm_abs_epi32( _mm_add_epi32( n2[0][i], n2[1][i] ) );
-      n1[1][i] = _mm_abs_epi32( _mm_sub_epi32( n2[0][i], n2[1][i] ) );
-      n1[2][i] = _mm_abs_epi32( _mm_add_epi32( n2[2][i], n2[3][i] ) );
-      n1[3][i] = _mm_abs_epi32( _mm_sub_epi32( n2[2][i], n2[3][i] ) );
-      n1[4][i] = _mm_abs_epi32( _mm_add_epi32( n2[4][i], n2[5][i] ) );
-      n1[5][i] = _mm_abs_epi32( _mm_sub_epi32( n2[4][i], n2[5][i] ) );
-      n1[6][i] = _mm_abs_epi32( _mm_add_epi32( n2[6][i], n2[7][i] ) );
-      n1[7][i] = _mm_abs_epi32( _mm_sub_epi32( n2[6][i], n2[7][i] ) );
-    }
-    for( int i = 0; i < 8; i++ )
-    {
-      m1[i] = _mm_add_epi32( n1[i][0], n1[i][1] );
-    }
+    n1[0][i] = _mm_add_epi32( n2[0][i], n2[4][i] );
+    n1[1][i] = _mm_add_epi32( n2[1][i], n2[5][i] );
+    n1[2][i] = _mm_add_epi32( n2[2][i], n2[6][i] );
+    n1[3][i] = _mm_add_epi32( n2[3][i], n2[7][i] );
+    n1[4][i] = _mm_sub_epi32( n2[0][i], n2[4][i] );
+    n1[5][i] = _mm_sub_epi32( n2[1][i], n2[5][i] );
+    n1[6][i] = _mm_sub_epi32( n2[2][i], n2[6][i] );
+    n1[7][i] = _mm_sub_epi32( n2[3][i], n2[7][i] );
+
+    n2[0][i] = _mm_add_epi32( n1[0][i], n1[2][i] );
+    n2[1][i] = _mm_add_epi32( n1[1][i], n1[3][i] );
+    n2[2][i] = _mm_sub_epi32( n1[0][i], n1[2][i] );
+    n2[3][i] = _mm_sub_epi32( n1[1][i], n1[3][i] );
+    n2[4][i] = _mm_add_epi32( n1[4][i], n1[6][i] );
+    n2[5][i] = _mm_add_epi32( n1[5][i], n1[7][i] );
+    n2[6][i] = _mm_sub_epi32( n1[4][i], n1[6][i] );
+    n2[7][i] = _mm_sub_epi32( n1[5][i], n1[7][i] );
+
+    n1[0][i] = _mm_abs_epi32( _mm_add_epi32( n2[0][i], n2[1][i] ) );
+    n1[1][i] = _mm_abs_epi32( _mm_sub_epi32( n2[0][i], n2[1][i] ) );
+    n1[2][i] = _mm_abs_epi32( _mm_add_epi32( n2[2][i], n2[3][i] ) );
+    n1[3][i] = _mm_abs_epi32( _mm_sub_epi32( n2[2][i], n2[3][i] ) );
+    n1[4][i] = _mm_abs_epi32( _mm_add_epi32( n2[4][i], n2[5][i] ) );
+    n1[5][i] = _mm_abs_epi32( _mm_sub_epi32( n2[4][i], n2[5][i] ) );
+    n1[6][i] = _mm_abs_epi32( _mm_add_epi32( n2[6][i], n2[7][i] ) );
+    n1[7][i] = _mm_abs_epi32( _mm_sub_epi32( n2[6][i], n2[7][i] ) );
   }
-  else
+  for( int i = 0; i < 8; i++ )
   {
-    m1[0] = _mm_add_epi16( m2[0], m2[4] );
-    m1[1] = _mm_add_epi16( m2[1], m2[5] );
-    m1[2] = _mm_add_epi16( m2[2], m2[6] );
-    m1[3] = _mm_add_epi16( m2[3], m2[7] );
-    m1[4] = _mm_sub_epi16( m2[0], m2[4] );
-    m1[5] = _mm_sub_epi16( m2[1], m2[5] );
-    m1[6] = _mm_sub_epi16( m2[2], m2[6] );
-    m1[7] = _mm_sub_epi16( m2[3], m2[7] );
-
-    m2[0] = _mm_add_epi16( m1[0], m1[2] );
-    m2[1] = _mm_add_epi16( m1[1], m1[3] );
-    m2[2] = _mm_sub_epi16( m1[0], m1[2] );
-    m2[3] = _mm_sub_epi16( m1[1], m1[3] );
-    m2[4] = _mm_add_epi16( m1[4], m1[6] );
-    m2[5] = _mm_add_epi16( m1[5], m1[7] );
-    m2[6] = _mm_sub_epi16( m1[4], m1[6] );
-    m2[7] = _mm_sub_epi16( m1[5], m1[7] );
-
-    m1[0] = _mm_abs_epi16( _mm_add_epi16( m2[0], m2[1] ) );
-    m1[1] = _mm_abs_epi16( _mm_sub_epi16( m2[0], m2[1] ) );
-    m1[2] = _mm_abs_epi16( _mm_add_epi16( m2[2], m2[3] ) );
-    m1[3] = _mm_abs_epi16( _mm_sub_epi16( m2[2], m2[3] ) );
-    m1[4] = _mm_abs_epi16( _mm_add_epi16( m2[4], m2[5] ) );
-    m1[5] = _mm_abs_epi16( _mm_sub_epi16( m2[4], m2[5] ) );
-    m1[6] = _mm_abs_epi16( _mm_add_epi16( m2[6], m2[7] ) );
-    m1[7] = _mm_abs_epi16( _mm_sub_epi16( m2[6], m2[7] ) );
-
-    __m128i ma1, ma2;
-    __m128i vzero = _mm_setzero_si128();
-
-    for( int i = 0; i < 8; i++ )
-    {
-      ma1 = _mm_unpacklo_epi16( m1[i], vzero );
-      ma2 = _mm_unpackhi_epi16( m1[i], vzero );
-      m1[i] = _mm_add_epi32( ma1, ma2 );
-    }
+    m1[i][0] = _mm_add_epi32( n1[i][0], n1[i][1] );
   }
 
+  m1[0][0] = _mm_add_epi32( m1[0][0], m1[1][0] );
+  m1[2][0] = _mm_add_epi32( m1[2][0], m1[3][0] );
+  m1[4][0] = _mm_add_epi32( m1[4][0], m1[5][0] );
+  m1[6][0] = _mm_add_epi32( m1[6][0], m1[7][0] );
 
-  m1[0] = _mm_add_epi32( m1[0], m1[1] );
-  m1[2] = _mm_add_epi32( m1[2], m1[3] );
-  m1[4] = _mm_add_epi32( m1[4], m1[5] );
-  m1[6] = _mm_add_epi32( m1[6], m1[7] );
-
-  m1[0] = _mm_add_epi32( m1[0], m1[2] );
-  m1[4] = _mm_add_epi32( m1[4], m1[6] );
-  __m128i iSum = _mm_add_epi32( m1[0], m1[4] );
+  m1[0][0] = _mm_add_epi32( m1[0][0], m1[2][0] );
+  m1[4][0] = _mm_add_epi32( m1[4][0], m1[6][0] );
+  __m128i iSum = _mm_add_epi32( m1[0][0], m1[4][0] );
 
   iSum = _mm_hadd_epi32( iSum, iSum );
   iSum = _mm_hadd_epi32( iSum, iSum );
@@ -716,10 +670,9 @@ static uint32_t xCalcHAD8x8_SSE( const Torg *piOrg, const Tcur *piCur, const int
 
 
 //working up to 12-bit
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
 static uint32_t xCalcHAD16x8_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
-  __m128i m1[16][2], m2[16][2];
+  __m128i m1[16][2][2], m2[16][2][2];
   __m128i iSum = _mm_setzero_si128();
 
   for( int l = 0; l < 2; l++ )
@@ -728,345 +681,186 @@ static uint32_t xCalcHAD16x8_SSE( const Torg *piOrg, const Tcur *piCur, const in
     const Tcur *piCurPtr = piCur + l*8;
     for( int k = 0; k < 8; k++ )
     {
-      __m128i r0 = (sizeof( Torg ) > 1) ? (_mm_loadu_si128( (__m128i*)piOrgPtr )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piOrgPtr ), _mm_setzero_si128() ));
-      __m128i r1 = (sizeof( Tcur ) > 1) ? (_mm_lddqu_si128( (__m128i*)piCurPtr )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piCurPtr ), _mm_setzero_si128() )); // th  _mm_loadu_si128( (__m128i*)piCurPtr )
-      m2[k][l] = _mm_sub_epi16( r0, r1 );
+      __m128i r0 = _mm_loadu_si128( (__m128i*) piOrgPtr );
+      __m128i r1 = _mm_lddqu_si128( (__m128i*) piCurPtr );
+      m2[k][l][0] = _mm_sub_epi16( r0, r1 );
+      m2[k][l][1] = _mm_cvtepi16_epi32( _mm_srli_si128( m2[k][l][0], 8 ) );
+      m2[k][l][0] = _mm_cvtepi16_epi32( m2[k][l][0] );
       piCurPtr += iStrideCur;
       piOrgPtr += iStrideOrg;
     }
 
-    //vertical
-    m1[0][l] = _mm_add_epi16( m2[0][l], m2[4][l] );
-    m1[1][l] = _mm_add_epi16( m2[1][l], m2[5][l] );
-    m1[2][l] = _mm_add_epi16( m2[2][l], m2[6][l] );
-    m1[3][l] = _mm_add_epi16( m2[3][l], m2[7][l] );
-    m1[4][l] = _mm_sub_epi16( m2[0][l], m2[4][l] );
-    m1[5][l] = _mm_sub_epi16( m2[1][l], m2[5][l] );
-    m1[6][l] = _mm_sub_epi16( m2[2][l], m2[6][l] );
-    m1[7][l] = _mm_sub_epi16( m2[3][l], m2[7][l] );
-
-    m2[0][l] = _mm_add_epi16( m1[0][l], m1[2][l] );
-    m2[1][l] = _mm_add_epi16( m1[1][l], m1[3][l] );
-    m2[2][l] = _mm_sub_epi16( m1[0][l], m1[2][l] );
-    m2[3][l] = _mm_sub_epi16( m1[1][l], m1[3][l] );
-    m2[4][l] = _mm_add_epi16( m1[4][l], m1[6][l] );
-    m2[5][l] = _mm_add_epi16( m1[5][l], m1[7][l] );
-    m2[6][l] = _mm_sub_epi16( m1[4][l], m1[6][l] );
-    m2[7][l] = _mm_sub_epi16( m1[5][l], m1[7][l] );
-
-    m1[0][l] = _mm_add_epi16( m2[0][l], m2[1][l] );
-    m1[1][l] = _mm_sub_epi16( m2[0][l], m2[1][l] );
-    m1[2][l] = _mm_add_epi16( m2[2][l], m2[3][l] );
-    m1[3][l] = _mm_sub_epi16( m2[2][l], m2[3][l] );
-    m1[4][l] = _mm_add_epi16( m2[4][l], m2[5][l] );
-    m1[5][l] = _mm_sub_epi16( m2[4][l], m2[5][l] );
-    m1[6][l] = _mm_add_epi16( m2[6][l], m2[7][l] );
-    m1[7][l] = _mm_sub_epi16( m2[6][l], m2[7][l] );
+    for( int i = 0; i < 2; i++ )
+    {
+      //vertical
+      m1[0][l][i] = _mm_add_epi32( m2[0][l][i], m2[4][l][i] );
+      m1[1][l][i] = _mm_add_epi32( m2[1][l][i], m2[5][l][i] );
+      m1[2][l][i] = _mm_add_epi32( m2[2][l][i], m2[6][l][i] );
+      m1[3][l][i] = _mm_add_epi32( m2[3][l][i], m2[7][l][i] );
+      m1[4][l][i] = _mm_sub_epi32( m2[0][l][i], m2[4][l][i] );
+      m1[5][l][i] = _mm_sub_epi32( m2[1][l][i], m2[5][l][i] );
+      m1[6][l][i] = _mm_sub_epi32( m2[2][l][i], m2[6][l][i] );
+      m1[7][l][i] = _mm_sub_epi32( m2[3][l][i], m2[7][l][i] );
+
+      m2[0][l][i] = _mm_add_epi32( m1[0][l][i], m1[2][l][i] );
+      m2[1][l][i] = _mm_add_epi32( m1[1][l][i], m1[3][l][i] );
+      m2[2][l][i] = _mm_sub_epi32( m1[0][l][i], m1[2][l][i] );
+      m2[3][l][i] = _mm_sub_epi32( m1[1][l][i], m1[3][l][i] );
+      m2[4][l][i] = _mm_add_epi32( m1[4][l][i], m1[6][l][i] );
+      m2[5][l][i] = _mm_add_epi32( m1[5][l][i], m1[7][l][i] );
+      m2[6][l][i] = _mm_sub_epi32( m1[4][l][i], m1[6][l][i] );
+      m2[7][l][i] = _mm_sub_epi32( m1[5][l][i], m1[7][l][i] );
+
+      m1[0][l][i] = _mm_add_epi32( m2[0][l][i], m2[1][l][i] );
+      m1[1][l][i] = _mm_sub_epi32( m2[0][l][i], m2[1][l][i] );
+      m1[2][l][i] = _mm_add_epi32( m2[2][l][i], m2[3][l][i] );
+      m1[3][l][i] = _mm_sub_epi32( m2[2][l][i], m2[3][l][i] );
+      m1[4][l][i] = _mm_add_epi32( m2[4][l][i], m2[5][l][i] );
+      m1[5][l][i] = _mm_sub_epi32( m2[4][l][i], m2[5][l][i] );
+      m1[6][l][i] = _mm_add_epi32( m2[6][l][i], m2[7][l][i] );
+      m1[7][l][i] = _mm_sub_epi32( m2[6][l][i], m2[7][l][i] );
+    }
   }
 
-  __m128i vzero = _mm_setzero_si128();
-
   // 4 x 8x4 blocks
   // 0 1
   // 2 3
 
-  if( iBitDepth >= 10 )
-  {
-    // transpose and do horizontal in two steps
-    for( int l = 0; l < 2; l++ )
-    {
-      int off = l * 4;
-
-      // transpose 8x4 -> 4x8
-      // 0 1  ->  0   and        ->  2
-      //          1         2 3      3
-
-      // transpose 8x4 -> 4x8, block 0(2)
-      m2[0][0] = _mm_unpacklo_epi16( m1[0 + off][0], m1[1 + off][0] );
-      m2[1][0] = _mm_unpacklo_epi16( m1[2 + off][0], m1[3 + off][0] );
-      m2[2][0] = _mm_unpackhi_epi16( m1[0 + off][0], m1[1 + off][0] );
-      m2[3][0] = _mm_unpackhi_epi16( m1[2 + off][0], m1[3 + off][0] );
-
-      m1[0][0] = _mm_unpacklo_epi32( m2[0][0], m2[1][0] );
-      m1[1][0] = _mm_unpackhi_epi32( m2[0][0], m2[1][0] );
-      m1[2][0] = _mm_unpacklo_epi32( m2[2][0], m2[3][0] );
-      m1[3][0] = _mm_unpackhi_epi32( m2[2][0], m2[3][0] );
-
-      m2[0][0] = _mm_unpacklo_epi64( m1[0][0], vzero );
-      m2[1][0] = _mm_unpackhi_epi64( m1[0][0], vzero );
-      m2[2][0] = _mm_unpacklo_epi64( m1[1][0], vzero );
-      m2[3][0] = _mm_unpackhi_epi64( m1[1][0], vzero );
-      m2[4][0] = _mm_unpacklo_epi64( m1[2][0], vzero );
-      m2[5][0] = _mm_unpackhi_epi64( m1[2][0], vzero );
-      m2[6][0] = _mm_unpacklo_epi64( m1[3][0], vzero );
-      m2[7][0] = _mm_unpackhi_epi64( m1[3][0], vzero );
-
-      // transpose 8x4 -> 4x8, block 1(3)
-      m2[0 + 8][0] = _mm_unpacklo_epi16( m1[0 + off][1], m1[1 + off][1] );
-      m2[1 + 8][0] = _mm_unpacklo_epi16( m1[2 + off][1], m1[3 + off][1] );
-      m2[2 + 8][0] = _mm_unpackhi_epi16( m1[0 + off][1], m1[1 + off][1] );
-      m2[3 + 8][0] = _mm_unpackhi_epi16( m1[2 + off][1], m1[3 + off][1] );
-
-      m1[0 + 8][0] = _mm_unpacklo_epi32( m2[0 + 8][0], m2[1 + 8][0] );
-      m1[1 + 8][0] = _mm_unpackhi_epi32( m2[0 + 8][0], m2[1 + 8][0] );
-      m1[2 + 8][0] = _mm_unpacklo_epi32( m2[2 + 8][0], m2[3 + 8][0] );
-      m1[3 + 8][0] = _mm_unpackhi_epi32( m2[2 + 8][0], m2[3 + 8][0] );
-
-      m2[0 + 8][0] = _mm_unpacklo_epi64( m1[0 + 8][0], vzero );
-      m2[1 + 8][0] = _mm_unpackhi_epi64( m1[0 + 8][0], vzero );
-      m2[2 + 8][0] = _mm_unpacklo_epi64( m1[1 + 8][0], vzero );
-      m2[3 + 8][0] = _mm_unpackhi_epi64( m1[1 + 8][0], vzero );
-      m2[4 + 8][0] = _mm_unpacklo_epi64( m1[2 + 8][0], vzero );
-      m2[5 + 8][0] = _mm_unpackhi_epi64( m1[2 + 8][0], vzero );
-      m2[6 + 8][0] = _mm_unpacklo_epi64( m1[3 + 8][0], vzero );
-      m2[7 + 8][0] = _mm_unpackhi_epi64( m1[3 + 8][0], vzero );
-
-      // horizontal
-      //if( iBitDepth >= 10 )
-      {
-        __m128i n1[16];
-        __m128i n2[16];
-
-        for( int i = 0; i < 16; i++ )
-        {
-          n1[i] = _mm_cvtepi16_epi32( m2[i][0] );
-        }
-
-        n2[0] = _mm_add_epi32( n1[0], n1[8] );
-        n2[1] = _mm_add_epi32( n1[1], n1[9] );
-        n2[2] = _mm_add_epi32( n1[2], n1[10] );
-        n2[3] = _mm_add_epi32( n1[3], n1[11] );
-        n2[4] = _mm_add_epi32( n1[4], n1[12] );
-        n2[5] = _mm_add_epi32( n1[5], n1[13] );
-        n2[6] = _mm_add_epi32( n1[6], n1[14] );
-        n2[7] = _mm_add_epi32( n1[7], n1[15] );
-        n2[8] = _mm_sub_epi32( n1[0], n1[8] );
-        n2[9] = _mm_sub_epi32( n1[1], n1[9] );
-        n2[10] = _mm_sub_epi32( n1[2], n1[10] );
-        n2[11] = _mm_sub_epi32( n1[3], n1[11] );
-        n2[12] = _mm_sub_epi32( n1[4], n1[12] );
-        n2[13] = _mm_sub_epi32( n1[5], n1[13] );
-        n2[14] = _mm_sub_epi32( n1[6], n1[14] );
-        n2[15] = _mm_sub_epi32( n1[7], n1[15] );
-
-        n1[0] = _mm_add_epi32( n2[0], n2[4] );
-        n1[1] = _mm_add_epi32( n2[1], n2[5] );
-        n1[2] = _mm_add_epi32( n2[2], n2[6] );
-        n1[3] = _mm_add_epi32( n2[3], n2[7] );
-        n1[4] = _mm_sub_epi32( n2[0], n2[4] );
-        n1[5] = _mm_sub_epi32( n2[1], n2[5] );
-        n1[6] = _mm_sub_epi32( n2[2], n2[6] );
-        n1[7] = _mm_sub_epi32( n2[3], n2[7] );
-        n1[8] = _mm_add_epi32( n2[8], n2[12] );
-        n1[9] = _mm_add_epi32( n2[9], n2[13] );
-        n1[10] = _mm_add_epi32( n2[10], n2[14] );
-        n1[11] = _mm_add_epi32( n2[11], n2[15] );
-        n1[12] = _mm_sub_epi32( n2[8], n2[12] );
-        n1[13] = _mm_sub_epi32( n2[9], n2[13] );
-        n1[14] = _mm_sub_epi32( n2[10], n2[14] );
-        n1[15] = _mm_sub_epi32( n2[11], n2[15] );
-
-        n2[0] = _mm_add_epi32( n1[0], n1[2] );
-        n2[1] = _mm_add_epi32( n1[1], n1[3] );
-        n2[2] = _mm_sub_epi32( n1[0], n1[2] );
-        n2[3] = _mm_sub_epi32( n1[1], n1[3] );
-        n2[4] = _mm_add_epi32( n1[4], n1[6] );
-        n2[5] = _mm_add_epi32( n1[5], n1[7] );
-        n2[6] = _mm_sub_epi32( n1[4], n1[6] );
-        n2[7] = _mm_sub_epi32( n1[5], n1[7] );
-        n2[8] = _mm_add_epi32( n1[8], n1[10] );
-        n2[9] = _mm_add_epi32( n1[9], n1[11] );
-        n2[10] = _mm_sub_epi32( n1[8], n1[10] );
-        n2[11] = _mm_sub_epi32( n1[9], n1[11] );
-        n2[12] = _mm_add_epi32( n1[12], n1[14] );
-        n2[13] = _mm_add_epi32( n1[13], n1[15] );
-        n2[14] = _mm_sub_epi32( n1[12], n1[14] );
-        n2[15] = _mm_sub_epi32( n1[13], n1[15] );
-
-        n1[0] = _mm_abs_epi32( _mm_add_epi32( n2[0], n2[1] ) );
-        n1[1] = _mm_abs_epi32( _mm_sub_epi32( n2[0], n2[1] ) );
-        n1[2] = _mm_abs_epi32( _mm_add_epi32( n2[2], n2[3] ) );
-        n1[3] = _mm_abs_epi32( _mm_sub_epi32( n2[2], n2[3] ) );
-        n1[4] = _mm_abs_epi32( _mm_add_epi32( n2[4], n2[5] ) );
-        n1[5] = _mm_abs_epi32( _mm_sub_epi32( n2[4], n2[5] ) );
-        n1[6] = _mm_abs_epi32( _mm_add_epi32( n2[6], n2[7] ) );
-        n1[7] = _mm_abs_epi32( _mm_sub_epi32( n2[6], n2[7] ) );
-        n1[8] = _mm_abs_epi32( _mm_add_epi32( n2[8], n2[9] ) );
-        n1[9] = _mm_abs_epi32( _mm_sub_epi32( n2[8], n2[9] ) );
-        n1[10] = _mm_abs_epi32( _mm_add_epi32( n2[10], n2[11] ) );
-        n1[11] = _mm_abs_epi32( _mm_sub_epi32( n2[10], n2[11] ) );
-        n1[12] = _mm_abs_epi32( _mm_add_epi32( n2[12], n2[13] ) );
-        n1[13] = _mm_abs_epi32( _mm_sub_epi32( n2[12], n2[13] ) );
-        n1[14] = _mm_abs_epi32( _mm_add_epi32( n2[14], n2[15] ) );
-        n1[15] = _mm_abs_epi32( _mm_sub_epi32( n2[14], n2[15] ) );
-
-        // sum up
-        n1[0] = _mm_add_epi32( n1[0], n1[1] );
-        n1[2] = _mm_add_epi32( n1[2], n1[3] );
-        n1[4] = _mm_add_epi32( n1[4], n1[5] );
-        n1[6] = _mm_add_epi32( n1[6], n1[7] );
-        n1[8] = _mm_add_epi32( n1[8], n1[9] );
-        n1[10] = _mm_add_epi32( n1[10], n1[11] );
-        n1[12] = _mm_add_epi32( n1[12], n1[13] );
-        n1[14] = _mm_add_epi32( n1[14], n1[15] );
-
-        n1[0] = _mm_add_epi32( n1[0], n1[2] );
-        n1[4] = _mm_add_epi32( n1[4], n1[6] );
-        n1[8] = _mm_add_epi32( n1[8], n1[10] );
-        n1[12] = _mm_add_epi32( n1[12], n1[14] );
-
-        n1[0] = _mm_add_epi32( n1[0], n1[4] );
-        n1[8] = _mm_add_epi32( n1[8], n1[12] );
-
-        n1[0] = _mm_add_epi32( n1[0], n1[8] );
-        iSum = _mm_add_epi32( iSum, n1[0] );
-      }
-    }
-  }
-  else
+  // transpose and do horizontal in two steps
+  for( int l = 0; l < 2; l++ )
   {
-    const int off = 4;
-    // transpose 8x8
-    // block 0
-    m2[0][0] = _mm_unpacklo_epi16( m1[0][0], m1[1][0] );
-    m2[1][0] = _mm_unpacklo_epi16( m1[2][0], m1[3][0] );
-    m2[2][0] = _mm_unpackhi_epi16( m1[0][0], m1[1][0] );
-    m2[3][0] = _mm_unpackhi_epi16( m1[2][0], m1[3][0] );
-
-    m1[0][0] = _mm_unpacklo_epi32( m2[0][0], m2[1][0] );
-    m1[1][0] = _mm_unpackhi_epi32( m2[0][0], m2[1][0] );
-    m1[2][0] = _mm_unpacklo_epi32( m2[2][0], m2[3][0] );
-    m1[3][0] = _mm_unpackhi_epi32( m2[2][0], m2[3][0] );
-
-    // block 2
-    m2[0 + off][0] = _mm_unpacklo_epi16( m1[0 + off][0], m1[1 + off][0] );
-    m2[1 + off][0] = _mm_unpacklo_epi16( m1[2 + off][0], m1[3 + off][0] );
-    m2[2 + off][0] = _mm_unpackhi_epi16( m1[0 + off][0], m1[1 + off][0] );
-    m2[3 + off][0] = _mm_unpackhi_epi16( m1[2 + off][0], m1[3 + off][0] );
-
-    m1[0 + off][0] = _mm_unpacklo_epi32( m2[0 + off][0], m2[1 + off][0] );
-    m1[1 + off][0] = _mm_unpackhi_epi32( m2[0 + off][0], m2[1 + off][0] );
-    m1[2 + off][0] = _mm_unpacklo_epi32( m2[2 + off][0], m2[3 + off][0] );
-    m1[3 + off][0] = _mm_unpackhi_epi32( m2[2 + off][0], m2[3 + off][0] );
-
-    m2[0][0] = _mm_unpacklo_epi64( m1[0][0], m1[0 + off][0] );
-    m2[1][0] = _mm_unpackhi_epi64( m1[0][0], m1[0 + off][0] );
-    m2[2][0] = _mm_unpacklo_epi64( m1[1][0], m1[1 + off][0] );
-    m2[3][0] = _mm_unpackhi_epi64( m1[1][0], m1[1 + off][0] );
-    m2[4][0] = _mm_unpacklo_epi64( m1[2][0], m1[2 + off][0] );
-    m2[5][0] = _mm_unpackhi_epi64( m1[2][0], m1[2 + off][0] );
-    m2[6][0] = _mm_unpacklo_epi64( m1[3][0], m1[3 + off][0] );
-    m2[7][0] = _mm_unpackhi_epi64( m1[3][0], m1[3 + off][0] );
-
-    // transpose 8x8
-    // block 1
-    m2[0][1] = _mm_unpacklo_epi16( m1[0][1], m1[1][1] );
-    m2[1][1] = _mm_unpacklo_epi16( m1[2][1], m1[3][1] );
-    m2[2][1] = _mm_unpackhi_epi16( m1[0][1], m1[1][1] );
-    m2[3][1] = _mm_unpackhi_epi16( m1[2][1], m1[3][1] );
-
-    m1[0][1] = _mm_unpacklo_epi32( m2[0][1], m2[1][1] );
-    m1[1][1] = _mm_unpackhi_epi32( m2[0][1], m2[1][1] );
-    m1[2][1] = _mm_unpacklo_epi32( m2[2][1], m2[3][1] );
-    m1[3][1] = _mm_unpackhi_epi32( m2[2][1], m2[3][1] );
-
-    // block 3
-    m2[0 + off][1] = _mm_unpacklo_epi16( m1[0 + off][1], m1[1 + off][1] );
-    m2[1 + off][1] = _mm_unpacklo_epi16( m1[2 + off][1], m1[3 + off][1] );
-    m2[2 + off][1] = _mm_unpackhi_epi16( m1[0 + off][1], m1[1 + off][1] );
-    m2[3 + off][1] = _mm_unpackhi_epi16( m1[2 + off][1], m1[3 + off][1] );
-
-    m1[0 + off][1] = _mm_unpacklo_epi32( m2[0 + off][1], m2[1 + off][1] );
-    m1[1 + off][1] = _mm_unpackhi_epi32( m2[0 + off][1], m2[1 + off][1] );
-    m1[2 + off][1] = _mm_unpacklo_epi32( m2[2 + off][1], m2[3 + off][1] );
-    m1[3 + off][1] = _mm_unpackhi_epi32( m2[2 + off][1], m2[3 + off][1] );
-
-    m2[0 + 8][0] = _mm_unpacklo_epi64( m1[0][1], m1[0 + off][1] );
-    m2[1 + 8][0] = _mm_unpackhi_epi64( m1[0][1], m1[0 + off][1] );
-    m2[2 + 8][0] = _mm_unpacklo_epi64( m1[1][1], m1[1 + off][1] );
-    m2[3 + 8][0] = _mm_unpackhi_epi64( m1[1][1], m1[1 + off][1] );
-    m2[4 + 8][0] = _mm_unpacklo_epi64( m1[2][1], m1[2 + off][1] );
-    m2[5 + 8][0] = _mm_unpackhi_epi64( m1[2][1], m1[2 + off][1] );
-    m2[6 + 8][0] = _mm_unpacklo_epi64( m1[3][1], m1[3 + off][1] );
-    m2[7 + 8][0] = _mm_unpackhi_epi64( m1[3][1], m1[3 + off][1] );
-
-    // horizontal
-    m1[0][0] = _mm_add_epi16( m2[0][0], m2[8][0] );
-    m1[1][0] = _mm_add_epi16( m2[1][0], m2[9][0] );
-    m1[2][0] = _mm_add_epi16( m2[2][0], m2[10][0] );
-    m1[3][0] = _mm_add_epi16( m2[3][0], m2[11][0] );
-    m1[4][0] = _mm_add_epi16( m2[4][0], m2[12][0] );
-    m1[5][0] = _mm_add_epi16( m2[5][0], m2[13][0] );
-    m1[6][0] = _mm_add_epi16( m2[6][0], m2[14][0] );
-    m1[7][0] = _mm_add_epi16( m2[7][0], m2[15][0] );
-    m1[8][0] = _mm_sub_epi16( m2[0][0], m2[8][0] );
-    m1[9][0] = _mm_sub_epi16( m2[1][0], m2[9][0] );
-    m1[10][0] = _mm_sub_epi16( m2[2][0], m2[10][0] );
-    m1[11][0] = _mm_sub_epi16( m2[3][0], m2[11][0] );
-    m1[12][0] = _mm_sub_epi16( m2[4][0], m2[12][0] );
-    m1[13][0] = _mm_sub_epi16( m2[5][0], m2[13][0] );
-    m1[14][0] = _mm_sub_epi16( m2[6][0], m2[14][0] );
-    m1[15][0] = _mm_sub_epi16( m2[7][0], m2[15][0] );
-
-    m2[0][0] = _mm_add_epi16( m1[0][0], m1[4][0] );
-    m2[1][0] = _mm_add_epi16( m1[1][0], m1[5][0] );
-    m2[2][0] = _mm_add_epi16( m1[2][0], m1[6][0] );
-    m2[3][0] = _mm_add_epi16( m1[3][0], m1[7][0] );
-    m2[4][0] = _mm_sub_epi16( m1[0][0], m1[4][0] );
-    m2[5][0] = _mm_sub_epi16( m1[1][0], m1[5][0] );
-    m2[6][0] = _mm_sub_epi16( m1[2][0], m1[6][0] );
-    m2[7][0] = _mm_sub_epi16( m1[3][0], m1[7][0] );
-    m2[8][0] = _mm_add_epi16( m1[8][0], m1[12][0] );
-    m2[9][0] = _mm_add_epi16( m1[9][0], m1[13][0] );
-    m2[10][0] = _mm_add_epi16( m1[10][0], m1[14][0] );
-    m2[11][0] = _mm_add_epi16( m1[11][0], m1[15][0] );
-    m2[12][0] = _mm_sub_epi16( m1[8][0], m1[12][0] );
-    m2[13][0] = _mm_sub_epi16( m1[9][0], m1[13][0] );
-    m2[14][0] = _mm_sub_epi16( m1[10][0], m1[14][0] );
-    m2[15][0] = _mm_sub_epi16( m1[11][0], m1[15][0] );
-
-    m1[0][0] = _mm_add_epi16( m2[0][0], m2[2][0] );
-    m1[1][0] = _mm_add_epi16( m2[1][0], m2[3][0] );
-    m1[2][0] = _mm_sub_epi16( m2[0][0], m2[2][0] );
-    m1[3][0] = _mm_sub_epi16( m2[1][0], m2[3][0] );
-    m1[4][0] = _mm_add_epi16( m2[4][0], m2[6][0] );
-    m1[5][0] = _mm_add_epi16( m2[5][0], m2[7][0] );
-    m1[6][0] = _mm_sub_epi16( m2[4][0], m2[6][0] );
-    m1[7][0] = _mm_sub_epi16( m2[5][0], m2[7][0] );
-    m1[8][0] = _mm_add_epi16( m2[8][0], m2[10][0] );
-    m1[9][0] = _mm_add_epi16( m2[9][0], m2[11][0] );
-    m1[10][0] = _mm_sub_epi16( m2[8][0], m2[10][0] );
-    m1[11][0] = _mm_sub_epi16( m2[9][0], m2[11][0] );
-    m1[12][0] = _mm_add_epi16( m2[12][0], m2[14][0] );
-    m1[13][0] = _mm_add_epi16( m2[13][0], m2[15][0] );
-    m1[14][0] = _mm_sub_epi16( m2[12][0], m2[14][0] );
-    m1[15][0] = _mm_sub_epi16( m2[13][0], m2[15][0] );
-
-    m2[0][0] = _mm_abs_epi16( _mm_add_epi16( m1[0][0], m1[1][0] ) );
-    m2[1][0] = _mm_abs_epi16( _mm_sub_epi16( m1[0][0], m1[1][0] ) );
-    m2[2][0] = _mm_abs_epi16( _mm_add_epi16( m1[2][0], m1[3][0] ) );
-    m2[3][0] = _mm_abs_epi16( _mm_sub_epi16( m1[2][0], m1[3][0] ) );
-    m2[4][0] = _mm_abs_epi16( _mm_add_epi16( m1[4][0], m1[5][0] ) );
-    m2[5][0] = _mm_abs_epi16( _mm_sub_epi16( m1[4][0], m1[5][0] ) );
-    m2[6][0] = _mm_abs_epi16( _mm_add_epi16( m1[6][0], m1[7][0] ) );
-    m2[7][0] = _mm_abs_epi16( _mm_sub_epi16( m1[6][0], m1[7][0] ) );
-    m2[8][0] = _mm_abs_epi16( _mm_add_epi16( m1[8][0], m1[9][0] ) );
-    m2[9][0] = _mm_abs_epi16( _mm_sub_epi16( m1[8][0], m1[9][0] ) );
-    m2[10][0] = _mm_abs_epi16( _mm_add_epi16( m1[10][0], m1[11][0] ) );
-    m2[11][0] = _mm_abs_epi16( _mm_sub_epi16( m1[10][0], m1[11][0] ) );
-    m2[12][0] = _mm_abs_epi16( _mm_add_epi16( m1[12][0], m1[13][0] ) );
-    m2[13][0] = _mm_abs_epi16( _mm_sub_epi16( m1[12][0], m1[13][0] ) );
-    m2[14][0] = _mm_abs_epi16( _mm_add_epi16( m1[14][0], m1[15][0] ) );
-    m2[15][0] = _mm_abs_epi16( _mm_sub_epi16( m1[14][0], m1[15][0] ) );
-
-    __m128i ma1, ma2;
+    int off = l * 4;
+
+    __m128i n1[16];
+    __m128i n2[16];
+
+    m2[0][0][0] = _mm_unpacklo_epi32( m1[0 + off][0][0], m1[1 + off][0][0] );
+    m2[1][0][0] = _mm_unpacklo_epi32( m1[2 + off][0][0], m1[3 + off][0][0] );
+    m2[2][0][0] = _mm_unpackhi_epi32( m1[0 + off][0][0], m1[1 + off][0][0] );
+    m2[3][0][0] = _mm_unpackhi_epi32( m1[2 + off][0][0], m1[3 + off][0][0] );
+
+    m2[0][0][1] = _mm_unpacklo_epi32( m1[0 + off][0][1], m1[1 + off][0][1] );
+    m2[1][0][1] = _mm_unpacklo_epi32( m1[2 + off][0][1], m1[3 + off][0][1] );
+    m2[2][0][1] = _mm_unpackhi_epi32( m1[0 + off][0][1], m1[1 + off][0][1] );
+    m2[3][0][1] = _mm_unpackhi_epi32( m1[2 + off][0][1], m1[3 + off][0][1] );
+
+    n1[0]       = _mm_unpacklo_epi64( m2[0][0][0], m2[1][0][0] );
+    n1[1]       = _mm_unpackhi_epi64( m2[0][0][0], m2[1][0][0] );
+    n1[2]       = _mm_unpacklo_epi64( m2[2][0][0], m2[3][0][0] );
+    n1[3]       = _mm_unpackhi_epi64( m2[2][0][0], m2[3][0][0] );
+    n1[4]       = _mm_unpacklo_epi64( m2[0][0][1], m2[1][0][1] );
+    n1[5]       = _mm_unpackhi_epi64( m2[0][0][1], m2[1][0][1] );
+    n1[6]       = _mm_unpacklo_epi64( m2[2][0][1], m2[3][0][1] );
+    n1[7]       = _mm_unpackhi_epi64( m2[2][0][1], m2[3][0][1] );
+
+    // transpose 8x4 -> 4x8, block 1(3)
+    m2[8+0][0][0] = _mm_unpacklo_epi32( m1[0 + off][1][0], m1[1 + off][1][0] );
+    m2[8+1][0][0] = _mm_unpacklo_epi32( m1[2 + off][1][0], m1[3 + off][1][0] );
+    m2[8+2][0][0] = _mm_unpackhi_epi32( m1[0 + off][1][0], m1[1 + off][1][0] );
+    m2[8+3][0][0] = _mm_unpackhi_epi32( m1[2 + off][1][0], m1[3 + off][1][0] );
+
+    m2[8+0][0][1] = _mm_unpacklo_epi32( m1[0 + off][1][1], m1[1 + off][1][1] );
+    m2[8+1][0][1] = _mm_unpacklo_epi32( m1[2 + off][1][1], m1[3 + off][1][1] );
+    m2[8+2][0][1] = _mm_unpackhi_epi32( m1[0 + off][1][1], m1[1 + off][1][1] );
+    m2[8+3][0][1] = _mm_unpackhi_epi32( m1[2 + off][1][1], m1[3 + off][1][1] );
+
+    n1[8+0]       = _mm_unpacklo_epi64( m2[8+0][0][0], m2[8+1][0][0] );
+    n1[8+1]       = _mm_unpackhi_epi64( m2[8+0][0][0], m2[8+1][0][0] );
+    n1[8+2]       = _mm_unpacklo_epi64( m2[8+2][0][0], m2[8+3][0][0] );
+    n1[8+3]       = _mm_unpackhi_epi64( m2[8+2][0][0], m2[8+3][0][0] );
+    n1[8+4]       = _mm_unpacklo_epi64( m2[8+0][0][1], m2[8+1][0][1] );
+    n1[8+5]       = _mm_unpackhi_epi64( m2[8+0][0][1], m2[8+1][0][1] );
+    n1[8+6]       = _mm_unpacklo_epi64( m2[8+2][0][1], m2[8+3][0][1] );
+    n1[8+7]       = _mm_unpackhi_epi64( m2[8+2][0][1], m2[8+3][0][1] );
+
+    n2[0] = _mm_add_epi32( n1[0], n1[8] );
+    n2[1] = _mm_add_epi32( n1[1], n1[9] );
+    n2[2] = _mm_add_epi32( n1[2], n1[10] );
+    n2[3] = _mm_add_epi32( n1[3], n1[11] );
+    n2[4] = _mm_add_epi32( n1[4], n1[12] );
+    n2[5] = _mm_add_epi32( n1[5], n1[13] );
+    n2[6] = _mm_add_epi32( n1[6], n1[14] );
+    n2[7] = _mm_add_epi32( n1[7], n1[15] );
+    n2[8] = _mm_sub_epi32( n1[0], n1[8] );
+    n2[9] = _mm_sub_epi32( n1[1], n1[9] );
+    n2[10] = _mm_sub_epi32( n1[2], n1[10] );
+    n2[11] = _mm_sub_epi32( n1[3], n1[11] );
+    n2[12] = _mm_sub_epi32( n1[4], n1[12] );
+    n2[13] = _mm_sub_epi32( n1[5], n1[13] );
+    n2[14] = _mm_sub_epi32( n1[6], n1[14] );
+    n2[15] = _mm_sub_epi32( n1[7], n1[15] );
+
+    n1[0] = _mm_add_epi32( n2[0], n2[4] );
+    n1[1] = _mm_add_epi32( n2[1], n2[5] );
+    n1[2] = _mm_add_epi32( n2[2], n2[6] );
+    n1[3] = _mm_add_epi32( n2[3], n2[7] );
+    n1[4] = _mm_sub_epi32( n2[0], n2[4] );
+    n1[5] = _mm_sub_epi32( n2[1], n2[5] );
+    n1[6] = _mm_sub_epi32( n2[2], n2[6] );
+    n1[7] = _mm_sub_epi32( n2[3], n2[7] );
+    n1[8] = _mm_add_epi32( n2[8], n2[12] );
+    n1[9] = _mm_add_epi32( n2[9], n2[13] );
+    n1[10] = _mm_add_epi32( n2[10], n2[14] );
+    n1[11] = _mm_add_epi32( n2[11], n2[15] );
+    n1[12] = _mm_sub_epi32( n2[8], n2[12] );
+    n1[13] = _mm_sub_epi32( n2[9], n2[13] );
+    n1[14] = _mm_sub_epi32( n2[10], n2[14] );
+    n1[15] = _mm_sub_epi32( n2[11], n2[15] );
+
+    n2[0] = _mm_add_epi32( n1[0], n1[2] );
+    n2[1] = _mm_add_epi32( n1[1], n1[3] );
+    n2[2] = _mm_sub_epi32( n1[0], n1[2] );
+    n2[3] = _mm_sub_epi32( n1[1], n1[3] );
+    n2[4] = _mm_add_epi32( n1[4], n1[6] );
+    n2[5] = _mm_add_epi32( n1[5], n1[7] );
+    n2[6] = _mm_sub_epi32( n1[4], n1[6] );
+    n2[7] = _mm_sub_epi32( n1[5], n1[7] );
+    n2[8] = _mm_add_epi32( n1[8], n1[10] );
+    n2[9] = _mm_add_epi32( n1[9], n1[11] );
+    n2[10] = _mm_sub_epi32( n1[8], n1[10] );
+    n2[11] = _mm_sub_epi32( n1[9], n1[11] );
+    n2[12] = _mm_add_epi32( n1[12], n1[14] );
+    n2[13] = _mm_add_epi32( n1[13], n1[15] );
+    n2[14] = _mm_sub_epi32( n1[12], n1[14] );
+    n2[15] = _mm_sub_epi32( n1[13], n1[15] );
+
+    n1[0] = _mm_abs_epi32( _mm_add_epi32( n2[0], n2[1] ) );
+    n1[1] = _mm_abs_epi32( _mm_sub_epi32( n2[0], n2[1] ) );
+    n1[2] = _mm_abs_epi32( _mm_add_epi32( n2[2], n2[3] ) );
+    n1[3] = _mm_abs_epi32( _mm_sub_epi32( n2[2], n2[3] ) );
+    n1[4] = _mm_abs_epi32( _mm_add_epi32( n2[4], n2[5] ) );
+    n1[5] = _mm_abs_epi32( _mm_sub_epi32( n2[4], n2[5] ) );
+    n1[6] = _mm_abs_epi32( _mm_add_epi32( n2[6], n2[7] ) );
+    n1[7] = _mm_abs_epi32( _mm_sub_epi32( n2[6], n2[7] ) );
+    n1[8] = _mm_abs_epi32( _mm_add_epi32( n2[8], n2[9] ) );
+    n1[9] = _mm_abs_epi32( _mm_sub_epi32( n2[8], n2[9] ) );
+    n1[10] = _mm_abs_epi32( _mm_add_epi32( n2[10], n2[11] ) );
+    n1[11] = _mm_abs_epi32( _mm_sub_epi32( n2[10], n2[11] ) );
+    n1[12] = _mm_abs_epi32( _mm_add_epi32( n2[12], n2[13] ) );
+    n1[13] = _mm_abs_epi32( _mm_sub_epi32( n2[12], n2[13] ) );
+    n1[14] = _mm_abs_epi32( _mm_add_epi32( n2[14], n2[15] ) );
+    n1[15] = _mm_abs_epi32( _mm_sub_epi32( n2[14], n2[15] ) );
 
-    for( int i = 0; i < 16; i++ )
-    {
-      ma1 = _mm_unpacklo_epi16( m2[i][0], vzero );
-      ma2 = _mm_unpackhi_epi16( m2[i][0], vzero );
-      iSum = _mm_add_epi32( iSum, _mm_add_epi32( ma1, ma2 ) );
-    }
+    // sum up
+    n1[0] = _mm_add_epi32( n1[0], n1[1] );
+    n1[2] = _mm_add_epi32( n1[2], n1[3] );
+    n1[4] = _mm_add_epi32( n1[4], n1[5] );
+    n1[6] = _mm_add_epi32( n1[6], n1[7] );
+    n1[8] = _mm_add_epi32( n1[8], n1[9] );
+    n1[10] = _mm_add_epi32( n1[10], n1[11] );
+    n1[12] = _mm_add_epi32( n1[12], n1[13] );
+    n1[14] = _mm_add_epi32( n1[14], n1[15] );
+
+    n1[0] = _mm_add_epi32( n1[0], n1[2] );
+    n1[4] = _mm_add_epi32( n1[4], n1[6] );
+    n1[8] = _mm_add_epi32( n1[8], n1[10] );
+    n1[12] = _mm_add_epi32( n1[12], n1[14] );
+
+    n1[0] = _mm_add_epi32( n1[0], n1[4] );
+    n1[8] = _mm_add_epi32( n1[8], n1[12] );
+
+    n1[0] = _mm_add_epi32( n1[0], n1[8] );
+    iSum = _mm_add_epi32( iSum, n1[0] );
   }
 
   iSum = _mm_hadd_epi32( iSum, iSum );
@@ -1081,223 +875,174 @@ static uint32_t xCalcHAD16x8_SSE( const Torg *piOrg, const Tcur *piCur, const in
 
 
 //working up to 12-bit
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
 static uint32_t xCalcHAD8x16_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
-  __m128i m1[16], m2[16];
+  __m128i m1[2][16], m2[2][16];
   __m128i iSum = _mm_setzero_si128();
 
   for( int k = 0; k < 16; k++ )
   {
-    __m128i r0 = (sizeof( Torg ) > 1) ? (_mm_loadu_si128( (__m128i*)piOrg )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piOrg ), _mm_setzero_si128() ));
-    __m128i r1 = (sizeof( Tcur ) > 1) ? (_mm_lddqu_si128( (__m128i*)piCur )) : (_mm_unpacklo_epi8( _mm_loadl_epi64( (const __m128i*)piCur ), _mm_setzero_si128() )); // th  _mm_loadu_si128( (__m128i*)piCur )
-    m1[k] = _mm_sub_epi16( r0, r1 );
+    __m128i r0 =_mm_loadu_si128( (__m128i*)piOrg );
+    __m128i r1 =_mm_lddqu_si128( (__m128i*)piCur );
+    m1[0][k] = _mm_sub_epi16( r0, r1 );
+    m1[1][k] = _mm_cvtepi16_epi32( _mm_srli_si128( m1[0][k], 8 ) );
+    m1[0][k] = _mm_cvtepi16_epi32( m1[0][k] );
     piCur += iStrideCur;
     piOrg += iStrideOrg;
   }
 
-  // vertical
-  m2[0] = _mm_add_epi16( m1[0], m1[8] );
-  m2[1] = _mm_add_epi16( m1[1], m1[9] );
-  m2[2] = _mm_add_epi16( m1[2], m1[10] );
-  m2[3] = _mm_add_epi16( m1[3], m1[11] );
-  m2[4] = _mm_add_epi16( m1[4], m1[12] );
-  m2[5] = _mm_add_epi16( m1[5], m1[13] );
-  m2[6] = _mm_add_epi16( m1[6], m1[14] );
-  m2[7] = _mm_add_epi16( m1[7], m1[15] );
-  m2[8] = _mm_sub_epi16( m1[0], m1[8] );
-  m2[9] = _mm_sub_epi16( m1[1], m1[9] );
-  m2[10] = _mm_sub_epi16( m1[2], m1[10] );
-  m2[11] = _mm_sub_epi16( m1[3], m1[11] );
-  m2[12] = _mm_sub_epi16( m1[4], m1[12] );
-  m2[13] = _mm_sub_epi16( m1[5], m1[13] );
-  m2[14] = _mm_sub_epi16( m1[6], m1[14] );
-  m2[15] = _mm_sub_epi16( m1[7], m1[15] );
-
-  m1[0] = _mm_add_epi16( m2[0], m2[4] );
-  m1[1] = _mm_add_epi16( m2[1], m2[5] );
-  m1[2] = _mm_add_epi16( m2[2], m2[6] );
-  m1[3] = _mm_add_epi16( m2[3], m2[7] );
-  m1[4] = _mm_sub_epi16( m2[0], m2[4] );
-  m1[5] = _mm_sub_epi16( m2[1], m2[5] );
-  m1[6] = _mm_sub_epi16( m2[2], m2[6] );
-  m1[7] = _mm_sub_epi16( m2[3], m2[7] );
-  m1[8] = _mm_add_epi16( m2[8], m2[12] );
-  m1[9] = _mm_add_epi16( m2[9], m2[13] );
-  m1[10] = _mm_add_epi16( m2[10], m2[14] );
-  m1[11] = _mm_add_epi16( m2[11], m2[15] );
-  m1[12] = _mm_sub_epi16( m2[8], m2[12] );
-  m1[13] = _mm_sub_epi16( m2[9], m2[13] );
-  m1[14] = _mm_sub_epi16( m2[10], m2[14] );
-  m1[15] = _mm_sub_epi16( m2[11], m2[15] );
+  for( int i = 0; i < 2; i++ )
+  {
+    // vertical
+    m2[i][ 0] = _mm_add_epi32( m1[i][ 0], m1[i][ 8] );
+    m2[i][ 1] = _mm_add_epi32( m1[i][ 1], m1[i][ 9] );
+    m2[i][ 2] = _mm_add_epi32( m1[i][ 2], m1[i][10] );
+    m2[i][ 3] = _mm_add_epi32( m1[i][ 3], m1[i][11] );
+    m2[i][ 4] = _mm_add_epi32( m1[i][ 4], m1[i][12] );
+    m2[i][ 5] = _mm_add_epi32( m1[i][ 5], m1[i][13] );
+    m2[i][ 6] = _mm_add_epi32( m1[i][ 6], m1[i][14] );
+    m2[i][ 7] = _mm_add_epi32( m1[i][ 7], m1[i][15] );
+    m2[i][ 8] = _mm_sub_epi32( m1[i][ 0], m1[i][ 8] );
+    m2[i][ 9] = _mm_sub_epi32( m1[i][ 1], m1[i][ 9] );
+    m2[i][10] = _mm_sub_epi32( m1[i][ 2], m1[i][10] );
+    m2[i][11] = _mm_sub_epi32( m1[i][ 3], m1[i][11] );
+    m2[i][12] = _mm_sub_epi32( m1[i][ 4], m1[i][12] );
+    m2[i][13] = _mm_sub_epi32( m1[i][ 5], m1[i][13] );
+    m2[i][14] = _mm_sub_epi32( m1[i][ 6], m1[i][14] );
+    m2[i][15] = _mm_sub_epi32( m1[i][ 7], m1[i][15] );
+
+    m1[i][ 0] = _mm_add_epi32( m2[i][ 0], m2[i][ 4] );
+    m1[i][ 1] = _mm_add_epi32( m2[i][ 1], m2[i][ 5] );
+    m1[i][ 2] = _mm_add_epi32( m2[i][ 2], m2[i][ 6] );
+    m1[i][ 3] = _mm_add_epi32( m2[i][ 3], m2[i][ 7] );
+    m1[i][ 4] = _mm_sub_epi32( m2[i][ 0], m2[i][ 4] );
+    m1[i][ 5] = _mm_sub_epi32( m2[i][ 1], m2[i][ 5] );
+    m1[i][ 6] = _mm_sub_epi32( m2[i][ 2], m2[i][ 6] );
+    m1[i][ 7] = _mm_sub_epi32( m2[i][ 3], m2[i][ 7] );
+    m1[i][ 8] = _mm_add_epi32( m2[i][ 8], m2[i][12] );
+    m1[i][ 9] = _mm_add_epi32( m2[i][ 9], m2[i][13] );
+    m1[i][10] = _mm_add_epi32( m2[i][10], m2[i][14] );
+    m1[i][11] = _mm_add_epi32( m2[i][11], m2[i][15] );
+    m1[i][12] = _mm_sub_epi32( m2[i][ 8], m2[i][12] );
+    m1[i][13] = _mm_sub_epi32( m2[i][ 9], m2[i][13] );
+    m1[i][14] = _mm_sub_epi32( m2[i][10], m2[i][14] );
+    m1[i][15] = _mm_sub_epi32( m2[i][11], m2[i][15] );
+
+    m2[i][ 0] = _mm_add_epi32( m1[i][ 0], m1[i][ 2] );
+    m2[i][ 1] = _mm_add_epi32( m1[i][ 1], m1[i][ 3] );
+    m2[i][ 2] = _mm_sub_epi32( m1[i][ 0], m1[i][ 2] );
+    m2[i][ 3] = _mm_sub_epi32( m1[i][ 1], m1[i][ 3] );
+    m2[i][ 4] = _mm_add_epi32( m1[i][ 4], m1[i][ 6] );
+    m2[i][ 5] = _mm_add_epi32( m1[i][ 5], m1[i][ 7] );
+    m2[i][ 6] = _mm_sub_epi32( m1[i][ 4], m1[i][ 6] );
+    m2[i][ 7] = _mm_sub_epi32( m1[i][ 5], m1[i][ 7] );
+    m2[i][ 8] = _mm_add_epi32( m1[i][ 8], m1[i][10] );
+    m2[i][ 9] = _mm_add_epi32( m1[i][ 9], m1[i][11] );
+    m2[i][10] = _mm_sub_epi32( m1[i][ 8], m1[i][10] );
+    m2[i][11] = _mm_sub_epi32( m1[i][ 9], m1[i][11] );
+    m2[i][12] = _mm_add_epi32( m1[i][12], m1[i][14] );
+    m2[i][13] = _mm_add_epi32( m1[i][13], m1[i][15] );
+    m2[i][14] = _mm_sub_epi32( m1[i][12], m1[i][14] );
+    m2[i][15] = _mm_sub_epi32( m1[i][13], m1[i][15] );
+
+    m1[i][ 0] = _mm_add_epi32( m2[i][ 0], m2[i][ 1] );
+    m1[i][ 1] = _mm_sub_epi32( m2[i][ 0], m2[i][ 1] );
+    m1[i][ 2] = _mm_add_epi32( m2[i][ 2], m2[i][ 3] );
+    m1[i][ 3] = _mm_sub_epi32( m2[i][ 2], m2[i][ 3] );
+    m1[i][ 4] = _mm_add_epi32( m2[i][ 4], m2[i][ 5] );
+    m1[i][ 5] = _mm_sub_epi32( m2[i][ 4], m2[i][ 5] );
+    m1[i][ 6] = _mm_add_epi32( m2[i][ 6], m2[i][ 7] );
+    m1[i][ 7] = _mm_sub_epi32( m2[i][ 6], m2[i][ 7] );
+    m1[i][ 8] = _mm_add_epi32( m2[i][ 8], m2[i][ 9] );
+    m1[i][ 9] = _mm_sub_epi32( m2[i][ 8], m2[i][ 9] );
+    m1[i][10] = _mm_add_epi32( m2[i][10], m2[i][11] );
+    m1[i][11] = _mm_sub_epi32( m2[i][10], m2[i][11] );
+    m1[i][12] = _mm_add_epi32( m2[i][12], m2[i][13] );
+    m1[i][13] = _mm_sub_epi32( m2[i][12], m2[i][13] );
+    m1[i][14] = _mm_add_epi32( m2[i][14], m2[i][15] );
+    m1[i][15] = _mm_sub_epi32( m2[i][14], m2[i][15] );
+  }
 
-  m2[0] = _mm_add_epi16( m1[0], m1[2] );
-  m2[1] = _mm_add_epi16( m1[1], m1[3] );
-  m2[2] = _mm_sub_epi16( m1[0], m1[2] );
-  m2[3] = _mm_sub_epi16( m1[1], m1[3] );
-  m2[4] = _mm_add_epi16( m1[4], m1[6] );
-  m2[5] = _mm_add_epi16( m1[5], m1[7] );
-  m2[6] = _mm_sub_epi16( m1[4], m1[6] );
-  m2[7] = _mm_sub_epi16( m1[5], m1[7] );
-  m2[8] = _mm_add_epi16( m1[8], m1[10] );
-  m2[9] = _mm_add_epi16( m1[9], m1[11] );
-  m2[10] = _mm_sub_epi16( m1[8], m1[10] );
-  m2[11] = _mm_sub_epi16( m1[9], m1[11] );
-  m2[12] = _mm_add_epi16( m1[12], m1[14] );
-  m2[13] = _mm_add_epi16( m1[13], m1[15] );
-  m2[14] = _mm_sub_epi16( m1[12], m1[14] );
-  m2[15] = _mm_sub_epi16( m1[13], m1[15] );
-
-  m1[ 0] = _mm_add_epi16( m2[0], m2[1] );
-  m1[ 1] = _mm_sub_epi16( m2[0], m2[1] );
-  m1[ 2] = _mm_add_epi16( m2[2], m2[3] );
-  m1[ 3] = _mm_sub_epi16( m2[2], m2[3] );
-  m1[ 4] = _mm_add_epi16( m2[4], m2[5] );
-  m1[ 5] = _mm_sub_epi16( m2[4], m2[5] );
-  m1[ 6] = _mm_add_epi16( m2[6], m2[7] );
-  m1[ 7] = _mm_sub_epi16( m2[6], m2[7] );
-  m1[ 8] = _mm_add_epi16( m2[8], m2[9] );
-  m1[ 9] = _mm_sub_epi16( m2[8], m2[9] );
-  m1[10] = _mm_add_epi16( m2[10], m2[11] );
-  m1[11] = _mm_sub_epi16( m2[10], m2[11] );
-  m1[12] = _mm_add_epi16( m2[12], m2[13] );
-  m1[13] = _mm_sub_epi16( m2[12], m2[13] );
-  m1[14] = _mm_add_epi16( m2[14], m2[15] );
-  m1[15] = _mm_sub_epi16( m2[14], m2[15] );
+  // process horizontal in two steps ( 2 x 8x8 blocks )
 
+  for( int l = 0; l < 4; l++ )
+  {
+    int off = l * 4;
 
-  // process horizontal in two steps ( 2 x 8x8 blocks )
+    for( int i = 0; i < 2; i++ )
+    {
+      // transpose 4x4
+      m2[i][0 + off] = _mm_unpacklo_epi32( m1[i][0 + off], m1[i][1 + off] );
+      m2[i][1 + off] = _mm_unpackhi_epi32( m1[i][0 + off], m1[i][1 + off] );
+      m2[i][2 + off] = _mm_unpacklo_epi32( m1[i][2 + off], m1[i][3 + off] );
+      m2[i][3 + off] = _mm_unpackhi_epi32( m1[i][2 + off], m1[i][3 + off] );
+
+      m1[i][0 + off] = _mm_unpacklo_epi64( m2[i][0 + off], m2[i][2 + off] );
+      m1[i][1 + off] = _mm_unpackhi_epi64( m2[i][0 + off], m2[i][2 + off] );
+      m1[i][2 + off] = _mm_unpacklo_epi64( m2[i][1 + off], m2[i][3 + off] );
+      m1[i][3 + off] = _mm_unpackhi_epi64( m2[i][1 + off], m2[i][3 + off] );
+    }
+  }
 
   for( int l = 0; l < 2; l++ )
   {
     int off = l * 8;
 
-    // transpose 8x8
-    // blocks 0,1  and 2,3
-    m2[0] = _mm_unpacklo_epi16( m1[0 + off], m1[1 + off] );
-    m2[1] = _mm_unpacklo_epi16( m1[2 + off], m1[3 + off] );
-    m2[2] = _mm_unpacklo_epi16( m1[4 + off], m1[5 + off] );
-    m2[3] = _mm_unpacklo_epi16( m1[6 + off], m1[7 + off] );
-
-    m2[0 + 4] = _mm_unpackhi_epi16( m1[0 + off], m1[1 + off] );
-    m2[1 + 4] = _mm_unpackhi_epi16( m1[2 + off], m1[3 + off] );
-    m2[2 + 4] = _mm_unpackhi_epi16( m1[4 + off], m1[5 + off] );
-    m2[3 + 4] = _mm_unpackhi_epi16( m1[6 + off], m1[7 + off] );
+    __m128i n1[2][8];
+    __m128i n2[2][8];
 
-    m1[0] = _mm_unpacklo_epi32( m2[0], m2[1] );
-    m1[1] = _mm_unpackhi_epi32( m2[0], m2[1] );
-    m1[2] = _mm_unpacklo_epi32( m2[2], m2[3] );
-    m1[3] = _mm_unpackhi_epi32( m2[2], m2[3] );
-
-    m2[0] = _mm_unpacklo_epi64( m1[0], m1[2] );
-    m2[1] = _mm_unpackhi_epi64( m1[0], m1[2] );
-    m2[2] = _mm_unpacklo_epi64( m1[1], m1[3] );
-    m2[3] = _mm_unpackhi_epi64( m1[1], m1[3] );
-
-    m1[0 + 4] = _mm_unpacklo_epi32( m2[0 + 4], m2[1 + 4] );
-    m1[1 + 4] = _mm_unpackhi_epi32( m2[0 + 4], m2[1 + 4] );
-    m1[2 + 4] = _mm_unpacklo_epi32( m2[2 + 4], m2[3 + 4] );
-    m1[3 + 4] = _mm_unpackhi_epi32( m2[2 + 4], m2[3 + 4] );
-
-    m2[0 + 4] = _mm_unpacklo_epi64( m1[0 + 4], m1[2 + 4] );
-    m2[1 + 4] = _mm_unpackhi_epi64( m1[0 + 4], m1[2 + 4] );
-    m2[2 + 4] = _mm_unpacklo_epi64( m1[1 + 4], m1[3 + 4] );
-    m2[3 + 4] = _mm_unpackhi_epi64( m1[1 + 4], m1[3 + 4] );
-
-    // horizontal calculation
-    if( iBitDepth >= 10 )
+    for( int i = 0; i < 8; i++ )
     {
-      __m128i n1[8][2];
-      __m128i n2[8][2];
+      int ii = i % 4;
+      int ij = i >> 2;
 
-      for( int i = 0; i < 8; i++ )
-      {
-        n2[i][0] = _mm_cvtepi16_epi32( m2[i] );
-        n2[i][1] = _mm_cvtepi16_epi32( _mm_shuffle_epi32( m2[i], 0xEE ) );
-      }
-
-      for( int i = 0; i < 2; i++ )
-      {
-        n1[0][i] = _mm_add_epi32( n2[0][i], n2[4][i] );
-        n1[1][i] = _mm_add_epi32( n2[1][i], n2[5][i] );
-        n1[2][i] = _mm_add_epi32( n2[2][i], n2[6][i] );
-        n1[3][i] = _mm_add_epi32( n2[3][i], n2[7][i] );
-        n1[4][i] = _mm_sub_epi32( n2[0][i], n2[4][i] );
-        n1[5][i] = _mm_sub_epi32( n2[1][i], n2[5][i] );
-        n1[6][i] = _mm_sub_epi32( n2[2][i], n2[6][i] );
-        n1[7][i] = _mm_sub_epi32( n2[3][i], n2[7][i] );
-
-        n2[0][i] = _mm_add_epi32( n1[0][i], n1[2][i] );
-        n2[1][i] = _mm_add_epi32( n1[1][i], n1[3][i] );
-        n2[2][i] = _mm_sub_epi32( n1[0][i], n1[2][i] );
-        n2[3][i] = _mm_sub_epi32( n1[1][i], n1[3][i] );
-        n2[4][i] = _mm_add_epi32( n1[4][i], n1[6][i] );
-        n2[5][i] = _mm_add_epi32( n1[5][i], n1[7][i] );
-        n2[6][i] = _mm_sub_epi32( n1[4][i], n1[6][i] );
-        n2[7][i] = _mm_sub_epi32( n1[5][i], n1[7][i] );
-
-        n1[0][i] = _mm_abs_epi32( _mm_add_epi32( n2[0][i], n2[1][i] ) );
-        n1[1][i] = _mm_abs_epi32( _mm_sub_epi32( n2[0][i], n2[1][i] ) );
-        n1[2][i] = _mm_abs_epi32( _mm_add_epi32( n2[2][i], n2[3][i] ) );
-        n1[3][i] = _mm_abs_epi32( _mm_sub_epi32( n2[2][i], n2[3][i] ) );
-        n1[4][i] = _mm_abs_epi32( _mm_add_epi32( n2[4][i], n2[5][i] ) );
-        n1[5][i] = _mm_abs_epi32( _mm_sub_epi32( n2[4][i], n2[5][i] ) );
-        n1[6][i] = _mm_abs_epi32( _mm_add_epi32( n2[6][i], n2[7][i] ) );
-        n1[7][i] = _mm_abs_epi32( _mm_sub_epi32( n2[6][i], n2[7][i] ) );
-      }
-      for( int i = 0; i < 8; i++ )
-      {
-        m1[i] = _mm_add_epi32( n1[i][0], n1[i][1] );
-      }
+      n2[0][i] = m1[ij][off + ii    ];
+      n2[1][i] = m1[ij][off + ii + 4];
     }
-    else
+
+    for( int i = 0; i < 2; i++ )
     {
-      m1[0] = _mm_add_epi16( m2[0], m2[4] );
-      m1[1] = _mm_add_epi16( m2[1], m2[5] );
-      m1[2] = _mm_add_epi16( m2[2], m2[6] );
-      m1[3] = _mm_add_epi16( m2[3], m2[7] );
-      m1[4] = _mm_sub_epi16( m2[0], m2[4] );
-      m1[5] = _mm_sub_epi16( m2[1], m2[5] );
-      m1[6] = _mm_sub_epi16( m2[2], m2[6] );
-      m1[7] = _mm_sub_epi16( m2[3], m2[7] );
-
-      m2[0] = _mm_add_epi16( m1[0], m1[2] );
-      m2[1] = _mm_add_epi16( m1[1], m1[3] );
-      m2[2] = _mm_sub_epi16( m1[0], m1[2] );
-      m2[3] = _mm_sub_epi16( m1[1], m1[3] );
-      m2[4] = _mm_add_epi16( m1[4], m1[6] );
-      m2[5] = _mm_add_epi16( m1[5], m1[7] );
-      m2[6] = _mm_sub_epi16( m1[4], m1[6] );
-      m2[7] = _mm_sub_epi16( m1[5], m1[7] );
-
-      m1[0] = _mm_abs_epi16( _mm_add_epi16( m2[0], m2[1] ) );
-      m1[1] = _mm_abs_epi16( _mm_sub_epi16( m2[0], m2[1] ) );
-      m1[2] = _mm_abs_epi16( _mm_add_epi16( m2[2], m2[3] ) );
-      m1[3] = _mm_abs_epi16( _mm_sub_epi16( m2[2], m2[3] ) );
-      m1[4] = _mm_abs_epi16( _mm_add_epi16( m2[4], m2[5] ) );
-      m1[5] = _mm_abs_epi16( _mm_sub_epi16( m2[4], m2[5] ) );
-      m1[6] = _mm_abs_epi16( _mm_add_epi16( m2[6], m2[7] ) );
-      m1[7] = _mm_abs_epi16( _mm_sub_epi16( m2[6], m2[7] ) );
-
-      __m128i ma1, ma2;
-      __m128i vzero = _mm_setzero_si128();
+      n1[i][0] = _mm_add_epi32( n2[i][0], n2[i][4] );
+      n1[i][1] = _mm_add_epi32( n2[i][1], n2[i][5] );
+      n1[i][2] = _mm_add_epi32( n2[i][2], n2[i][6] );
+      n1[i][3] = _mm_add_epi32( n2[i][3], n2[i][7] );
+      n1[i][4] = _mm_sub_epi32( n2[i][0], n2[i][4] );
+      n1[i][5] = _mm_sub_epi32( n2[i][1], n2[i][5] );
+      n1[i][6] = _mm_sub_epi32( n2[i][2], n2[i][6] );
+      n1[i][7] = _mm_sub_epi32( n2[i][3], n2[i][7] );
+
+      n2[i][0] = _mm_add_epi32( n1[i][0], n1[i][2] );
+      n2[i][1] = _mm_add_epi32( n1[i][1], n1[i][3] );
+      n2[i][2] = _mm_sub_epi32( n1[i][0], n1[i][2] );
+      n2[i][3] = _mm_sub_epi32( n1[i][1], n1[i][3] );
+      n2[i][4] = _mm_add_epi32( n1[i][4], n1[i][6] );
+      n2[i][5] = _mm_add_epi32( n1[i][5], n1[i][7] );
+      n2[i][6] = _mm_sub_epi32( n1[i][4], n1[i][6] );
+      n2[i][7] = _mm_sub_epi32( n1[i][5], n1[i][7] );
+
+      n1[i][0] = _mm_abs_epi32( _mm_add_epi32( n2[i][0], n2[i][1] ) );
+      n1[i][1] = _mm_abs_epi32( _mm_sub_epi32( n2[i][0], n2[i][1] ) );
+      n1[i][2] = _mm_abs_epi32( _mm_add_epi32( n2[i][2], n2[i][3] ) );
+      n1[i][3] = _mm_abs_epi32( _mm_sub_epi32( n2[i][2], n2[i][3] ) );
+      n1[i][4] = _mm_abs_epi32( _mm_add_epi32( n2[i][4], n2[i][5] ) );
+      n1[i][5] = _mm_abs_epi32( _mm_sub_epi32( n2[i][4], n2[i][5] ) );
+      n1[i][6] = _mm_abs_epi32( _mm_add_epi32( n2[i][6], n2[i][7] ) );
+      n1[i][7] = _mm_abs_epi32( _mm_sub_epi32( n2[i][6], n2[i][7] ) );
+    }
 
-      for( int i = 0; i < 8; i++ )
-      {
-        ma1 = _mm_unpacklo_epi16( m1[i], vzero );
-        ma2 = _mm_unpackhi_epi16( m1[i], vzero );
-        m1[i] = _mm_add_epi32( ma1, ma2 );
-      }
+    for( int i = 0; i < 8; i++ )
+    {
+      n2[0][i] = _mm_add_epi32( n1[0][i], n1[1][i] );
     }
 
-    m1[0] = _mm_add_epi32( m1[0], m1[1] );
-    m1[2] = _mm_add_epi32( m1[2], m1[3] );
-    m1[4] = _mm_add_epi32( m1[4], m1[5] );
-    m1[6] = _mm_add_epi32( m1[6], m1[7] );
+    n2[0][0] = _mm_add_epi32( n2[0][0], n2[0][1] );
+    n2[0][2] = _mm_add_epi32( n2[0][2], n2[0][3] );
+    n2[0][4] = _mm_add_epi32( n2[0][4], n2[0][5] );
+    n2[0][6] = _mm_add_epi32( n2[0][6], n2[0][7] );
 
-    m1[0] = _mm_add_epi32( m1[0], m1[2] );
-    m1[4] = _mm_add_epi32( m1[4], m1[6] );
-    iSum = _mm_add_epi32( iSum, _mm_add_epi32( m1[0], m1[4] ) );
+    n2[0][0] = _mm_add_epi32( n2[0][0], n2[0][2] );
+    n2[0][4] = _mm_add_epi32( n2[0][4], n2[0][6] );
+    iSum = _mm_add_epi32( iSum, _mm_add_epi32( n2[0][0], n2[0][4] ) );
   }
 
   iSum = _mm_hadd_epi32( iSum, iSum );
@@ -1453,7 +1198,6 @@ static uint32_t xCalcHAD8x4_SSE( const Torg *piOrg, const Tcur *piCur, const int
 }
 
 
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
 static uint32_t xCalcHAD4x8_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
   __m128i m1[8], m2[8];
@@ -1585,182 +1329,154 @@ static uint32_t xCalcHAD4x8_SSE( const Torg *piOrg, const Tcur *piCur, const int
 }
 
 
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
 static uint32_t xCalcHAD16x16_AVX2( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
   uint32_t sad = 0;
 
 #ifdef USE_AVX2
-  // const int iLoops = ( bHorDownsampling && HAD_DOWNSAMPLING_HOR ) ? ( 1 ) : ( 2 );
   const int iLoops = 2;
-  __m256i m1[8], m2[8];
+  __m256i m1[2][8], m2[2][8];
 
   for( int l = 0; l < iLoops; l++ )
   {
     {
       for( int k = 0; k < 8; k++ )
       {
-        __m256i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )piOrg ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )piOrg ) ), 0xD8 ), _mm256_setzero_si256() ) );
-        __m256i r1 = ( sizeof( Tcur ) > 1 ) ? ( _mm256_lddqu_si256( ( __m256i* )piCur ) ) : ( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64( _mm256_castsi128_si256( _mm_lddqu_si128( ( __m128i* )piCur ) ), 0xD8 ), _mm256_setzero_si256() ) );
-        m2[k] = _mm256_sub_epi16( r0, r1 );
+        __m256i r0 = _mm256_lddqu_si256( ( __m256i* ) piOrg );
+        __m256i r1 = _mm256_lddqu_si256( ( __m256i* ) piCur );
+        m2[0][k] = _mm256_sub_epi16( r0, r1 );
+        m2[1][k] = _mm256_cvtepi16_epi32( _mm256_extracti128_si256( m2[0][k], 1 ) );
+        m2[0][k] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m2[0][k] ) );
         piCur += iStrideCur;
         piOrg += iStrideOrg;
       }
     }
 
-    // horizontal
-
-    m1[0] = _mm256_add_epi16( m2[0], m2[4] );
-    m1[1] = _mm256_add_epi16( m2[1], m2[5] );
-    m1[2] = _mm256_add_epi16( m2[2], m2[6] );
-    m1[3] = _mm256_add_epi16( m2[3], m2[7] );
-    m1[4] = _mm256_sub_epi16( m2[0], m2[4] );
-    m1[5] = _mm256_sub_epi16( m2[1], m2[5] );
-    m1[6] = _mm256_sub_epi16( m2[2], m2[6] );
-    m1[7] = _mm256_sub_epi16( m2[3], m2[7] );
-
-    m2[0] = _mm256_add_epi16( m1[0], m1[2] );
-    m2[1] = _mm256_add_epi16( m1[1], m1[3] );
-    m2[2] = _mm256_sub_epi16( m1[0], m1[2] );
-    m2[3] = _mm256_sub_epi16( m1[1], m1[3] );
-    m2[4] = _mm256_add_epi16( m1[4], m1[6] );
-    m2[5] = _mm256_add_epi16( m1[5], m1[7] );
-    m2[6] = _mm256_sub_epi16( m1[4], m1[6] );
-    m2[7] = _mm256_sub_epi16( m1[5], m1[7] );
-
-    m1[0] = _mm256_add_epi16( m2[0], m2[1] );
-    m1[1] = _mm256_sub_epi16( m2[0], m2[1] );
-    m1[2] = _mm256_add_epi16( m2[2], m2[3] );
-    m1[3] = _mm256_sub_epi16( m2[2], m2[3] );
-    m1[4] = _mm256_add_epi16( m2[4], m2[5] );
-    m1[5] = _mm256_sub_epi16( m2[4], m2[5] );
-    m1[6] = _mm256_add_epi16( m2[6], m2[7] );
-    m1[7] = _mm256_sub_epi16( m2[6], m2[7] );
-
-    // transpose 2 8x8 blocks in parallel
-
-    m2[0] = _mm256_unpacklo_epi16( m1[0], m1[1] );
-    m2[1] = _mm256_unpacklo_epi16( m1[2], m1[3] );
-    m2[2] = _mm256_unpacklo_epi16( m1[4], m1[5] );
-    m2[3] = _mm256_unpacklo_epi16( m1[6], m1[7] );
-    m2[4] = _mm256_unpackhi_epi16( m1[0], m1[1] );
-    m2[5] = _mm256_unpackhi_epi16( m1[2], m1[3] );
-    m2[6] = _mm256_unpackhi_epi16( m1[4], m1[5] );
-    m2[7] = _mm256_unpackhi_epi16( m1[6], m1[7] );
+    constexpr int perm_unpacklo_epi128 = ( 0 << 0 ) + ( 2 << 4 );
+    constexpr int perm_unpackhi_epi128 = ( 1 << 0 ) + ( 3 << 4 );
 
-    m1[0] = _mm256_unpacklo_epi32( m2[0], m2[1] );
-    m1[1] = _mm256_unpackhi_epi32( m2[0], m2[1] );
-    m1[2] = _mm256_unpacklo_epi32( m2[2], m2[3] );
-    m1[3] = _mm256_unpackhi_epi32( m2[2], m2[3] );
-    m1[4] = _mm256_unpacklo_epi32( m2[4], m2[5] );
-    m1[5] = _mm256_unpackhi_epi32( m2[4], m2[5] );
-    m1[6] = _mm256_unpacklo_epi32( m2[6], m2[7] );
-    m1[7] = _mm256_unpackhi_epi32( m2[6], m2[7] );
-
-    m2[0] = _mm256_unpacklo_epi64( m1[0], m1[2] );
-    m2[1] = _mm256_unpackhi_epi64( m1[0], m1[2] );
-    m2[2] = _mm256_unpacklo_epi64( m1[1], m1[3] );
-    m2[3] = _mm256_unpackhi_epi64( m1[1], m1[3] );
-    m2[4] = _mm256_unpacklo_epi64( m1[4], m1[6] );
-    m2[5] = _mm256_unpackhi_epi64( m1[4], m1[6] );
-    m2[6] = _mm256_unpacklo_epi64( m1[5], m1[7] );
-    m2[7] = _mm256_unpackhi_epi64( m1[5], m1[7] );
-
-    // vertical
-    if( iBitDepth >= 10 )
+    for( int i = 0; i < 2; i++ )
     {
-      __m256i n1[8][2];
-      __m256i n2[8][2];
+      m1[i][0] = _mm256_add_epi32( m2[i][0], m2[i][4] );
+      m1[i][1] = _mm256_add_epi32( m2[i][1], m2[i][5] );
+      m1[i][2] = _mm256_add_epi32( m2[i][2], m2[i][6] );
+      m1[i][3] = _mm256_add_epi32( m2[i][3], m2[i][7] );
+      m1[i][4] = _mm256_sub_epi32( m2[i][0], m2[i][4] );
+      m1[i][5] = _mm256_sub_epi32( m2[i][1], m2[i][5] );
+      m1[i][6] = _mm256_sub_epi32( m2[i][2], m2[i][6] );
+      m1[i][7] = _mm256_sub_epi32( m2[i][3], m2[i][7] );
+
+      m2[i][0] = _mm256_add_epi32( m1[i][0], m1[i][2] );
+      m2[i][1] = _mm256_add_epi32( m1[i][1], m1[i][3] );
+      m2[i][2] = _mm256_sub_epi32( m1[i][0], m1[i][2] );
+      m2[i][3] = _mm256_sub_epi32( m1[i][1], m1[i][3] );
+      m2[i][4] = _mm256_add_epi32( m1[i][4], m1[i][6] );
+      m2[i][5] = _mm256_add_epi32( m1[i][5], m1[i][7] );
+      m2[i][6] = _mm256_sub_epi32( m1[i][4], m1[i][6] );
+      m2[i][7] = _mm256_sub_epi32( m1[i][5], m1[i][7] );
+
+      m1[i][0] = _mm256_add_epi32( m2[i][0], m2[i][1] );
+      m1[i][1] = _mm256_sub_epi32( m2[i][0], m2[i][1] );
+      m1[i][2] = _mm256_add_epi32( m2[i][2], m2[i][3] );
+      m1[i][3] = _mm256_sub_epi32( m2[i][2], m2[i][3] );
+      m1[i][4] = _mm256_add_epi32( m2[i][4], m2[i][5] );
+      m1[i][5] = _mm256_sub_epi32( m2[i][4], m2[i][5] );
+      m1[i][6] = _mm256_add_epi32( m2[i][6], m2[i][7] );
+      m1[i][7] = _mm256_sub_epi32( m2[i][6], m2[i][7] );
+
+      // transpose
+      // 8x8
+      m2[i][0] = _mm256_unpacklo_epi32( m1[i][0], m1[i][1] );
+      m2[i][1] = _mm256_unpacklo_epi32( m1[i][2], m1[i][3] );
+      m2[i][2] = _mm256_unpacklo_epi32( m1[i][4], m1[i][5] );
+      m2[i][3] = _mm256_unpacklo_epi32( m1[i][6], m1[i][7] );
+      m2[i][4] = _mm256_unpackhi_epi32( m1[i][0], m1[i][1] );
+      m2[i][5] = _mm256_unpackhi_epi32( m1[i][2], m1[i][3] );
+      m2[i][6] = _mm256_unpackhi_epi32( m1[i][4], m1[i][5] );
+      m2[i][7] = _mm256_unpackhi_epi32( m1[i][6], m1[i][7] );
+
+      m1[i][0] = _mm256_unpacklo_epi64( m2[i][0], m2[i][1] );
+      m1[i][1] = _mm256_unpackhi_epi64( m2[i][0], m2[i][1] );
+      m1[i][2] = _mm256_unpacklo_epi64( m2[i][2], m2[i][3] );
+      m1[i][3] = _mm256_unpackhi_epi64( m2[i][2], m2[i][3] );
+      m1[i][4] = _mm256_unpacklo_epi64( m2[i][4], m2[i][5] );
+      m1[i][5] = _mm256_unpackhi_epi64( m2[i][4], m2[i][5] );
+      m1[i][6] = _mm256_unpacklo_epi64( m2[i][6], m2[i][7] );
+      m1[i][7] = _mm256_unpackhi_epi64( m2[i][6], m2[i][7] );
+
+      m2[i][0] = _mm256_permute2x128_si256( m1[i][0], m1[i][2], perm_unpacklo_epi128 );
+      m2[i][1] = _mm256_permute2x128_si256( m1[i][0], m1[i][2], perm_unpackhi_epi128 );
+      m2[i][2] = _mm256_permute2x128_si256( m1[i][1], m1[i][3], perm_unpacklo_epi128 );
+      m2[i][3] = _mm256_permute2x128_si256( m1[i][1], m1[i][3], perm_unpackhi_epi128 );
+      m2[i][4] = _mm256_permute2x128_si256( m1[i][4], m1[i][6], perm_unpacklo_epi128 );
+      m2[i][5] = _mm256_permute2x128_si256( m1[i][4], m1[i][6], perm_unpackhi_epi128 );
+      m2[i][6] = _mm256_permute2x128_si256( m1[i][5], m1[i][7], perm_unpacklo_epi128 );
+      m2[i][7] = _mm256_permute2x128_si256( m1[i][5], m1[i][7], perm_unpackhi_epi128 );
+    }
 
-      for( int i = 0; i < 8; i++ )
-      {
-        n2[i][0] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( _mm256_permute4x64_epi64( m2[i], 0xD8 ) ) );
-        n2[i][1] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( _mm256_permute4x64_epi64( m2[i], 0x8D ) ) );
-      }
+    m1[0][0] = _mm256_permute2x128_si256( m2[0][0], m2[1][0], perm_unpacklo_epi128 );
+    m1[0][1] = _mm256_permute2x128_si256( m2[0][1], m2[1][1], perm_unpacklo_epi128 );
+    m1[0][2] = _mm256_permute2x128_si256( m2[0][2], m2[1][2], perm_unpacklo_epi128 );
+    m1[0][3] = _mm256_permute2x128_si256( m2[0][3], m2[1][3], perm_unpacklo_epi128 );
+    m1[0][4] = _mm256_permute2x128_si256( m2[0][4], m2[1][4], perm_unpacklo_epi128 );
+    m1[0][5] = _mm256_permute2x128_si256( m2[0][5], m2[1][5], perm_unpacklo_epi128 );
+    m1[0][6] = _mm256_permute2x128_si256( m2[0][6], m2[1][6], perm_unpacklo_epi128 );
+    m1[0][7] = _mm256_permute2x128_si256( m2[0][7], m2[1][7], perm_unpacklo_epi128 );
+
+    m1[1][0] = _mm256_permute2x128_si256( m2[0][0], m2[1][0], perm_unpackhi_epi128 );
+    m1[1][1] = _mm256_permute2x128_si256( m2[0][1], m2[1][1], perm_unpackhi_epi128 );
+    m1[1][2] = _mm256_permute2x128_si256( m2[0][2], m2[1][2], perm_unpackhi_epi128 );
+    m1[1][3] = _mm256_permute2x128_si256( m2[0][3], m2[1][3], perm_unpackhi_epi128 );
+    m1[1][4] = _mm256_permute2x128_si256( m2[0][4], m2[1][4], perm_unpackhi_epi128 );
+    m1[1][5] = _mm256_permute2x128_si256( m2[0][5], m2[1][5], perm_unpackhi_epi128 );
+    m1[1][6] = _mm256_permute2x128_si256( m2[0][6], m2[1][6], perm_unpackhi_epi128 );
+    m1[1][7] = _mm256_permute2x128_si256( m2[0][7], m2[1][7], perm_unpackhi_epi128 );
 
-      for( int i = 0; i < 2; i++ )
-      {
-        n1[0][i] = _mm256_add_epi32( n2[0][i], n2[4][i] );
-        n1[1][i] = _mm256_add_epi32( n2[1][i], n2[5][i] );
-        n1[2][i] = _mm256_add_epi32( n2[2][i], n2[6][i] );
-        n1[3][i] = _mm256_add_epi32( n2[3][i], n2[7][i] );
-        n1[4][i] = _mm256_sub_epi32( n2[0][i], n2[4][i] );
-        n1[5][i] = _mm256_sub_epi32( n2[1][i], n2[5][i] );
-        n1[6][i] = _mm256_sub_epi32( n2[2][i], n2[6][i] );
-        n1[7][i] = _mm256_sub_epi32( n2[3][i], n2[7][i] );
-
-        n2[0][i] = _mm256_add_epi32( n1[0][i], n1[2][i] );
-        n2[1][i] = _mm256_add_epi32( n1[1][i], n1[3][i] );
-        n2[2][i] = _mm256_sub_epi32( n1[0][i], n1[2][i] );
-        n2[3][i] = _mm256_sub_epi32( n1[1][i], n1[3][i] );
-        n2[4][i] = _mm256_add_epi32( n1[4][i], n1[6][i] );
-        n2[5][i] = _mm256_add_epi32( n1[5][i], n1[7][i] );
-        n2[6][i] = _mm256_sub_epi32( n1[4][i], n1[6][i] );
-        n2[7][i] = _mm256_sub_epi32( n1[5][i], n1[7][i] );
-
-        n1[0][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[0][i], n2[1][i] ) );
-        n1[1][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[0][i], n2[1][i] ) );
-        n1[2][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[2][i], n2[3][i] ) );
-        n1[3][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[2][i], n2[3][i] ) );
-        n1[4][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[4][i], n2[5][i] ) );
-        n1[5][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[4][i], n2[5][i] ) );
-        n1[6][i] = _mm256_abs_epi32( _mm256_add_epi32( n2[6][i], n2[7][i] ) );
-        n1[7][i] = _mm256_abs_epi32( _mm256_sub_epi32( n2[6][i], n2[7][i] ) );
-      }
-      for( int i = 0; i < 8; i++ )
-      {
-        m1[i] = _mm256_add_epi32( n1[i][0], n1[i][1] );
-      }
-    }
-    else
+    for( int i = 0; i < 2; i++ )
     {
-      m1[0] = _mm256_add_epi16( m2[0], m2[4] );
-      m1[1] = _mm256_add_epi16( m2[1], m2[5] );
-      m1[2] = _mm256_add_epi16( m2[2], m2[6] );
-      m1[3] = _mm256_add_epi16( m2[3], m2[7] );
-      m1[4] = _mm256_sub_epi16( m2[0], m2[4] );
-      m1[5] = _mm256_sub_epi16( m2[1], m2[5] );
-      m1[6] = _mm256_sub_epi16( m2[2], m2[6] );
-      m1[7] = _mm256_sub_epi16( m2[3], m2[7] );
-
-      m2[0] = _mm256_add_epi16( m1[0], m1[2] );
-      m2[1] = _mm256_add_epi16( m1[1], m1[3] );
-      m2[2] = _mm256_sub_epi16( m1[0], m1[2] );
-      m2[3] = _mm256_sub_epi16( m1[1], m1[3] );
-      m2[4] = _mm256_add_epi16( m1[4], m1[6] );
-      m2[5] = _mm256_add_epi16( m1[5], m1[7] );
-      m2[6] = _mm256_sub_epi16( m1[4], m1[6] );
-      m2[7] = _mm256_sub_epi16( m1[5], m1[7] );
-
-      m1[0] = _mm256_abs_epi16( _mm256_add_epi16( m2[0], m2[1] ) );
-      m1[1] = _mm256_abs_epi16( _mm256_sub_epi16( m2[0], m2[1] ) );
-      m1[2] = _mm256_abs_epi16( _mm256_add_epi16( m2[2], m2[3] ) );
-      m1[3] = _mm256_abs_epi16( _mm256_sub_epi16( m2[2], m2[3] ) );
-      m1[4] = _mm256_abs_epi16( _mm256_add_epi16( m2[4], m2[5] ) );
-      m1[5] = _mm256_abs_epi16( _mm256_sub_epi16( m2[4], m2[5] ) );
-      m1[6] = _mm256_abs_epi16( _mm256_add_epi16( m2[6], m2[7] ) );
-      m1[7] = _mm256_abs_epi16( _mm256_sub_epi16( m2[6], m2[7] ) );
-
-      __m256i ma1, ma2;
-      __m256i vzero = _mm256_setzero_si256();
+      m2[i][0] = _mm256_add_epi32( m1[i][0], m1[i][4] );
+      m2[i][1] = _mm256_add_epi32( m1[i][1], m1[i][5] );
+      m2[i][2] = _mm256_add_epi32( m1[i][2], m1[i][6] );
+      m2[i][3] = _mm256_add_epi32( m1[i][3], m1[i][7] );
+      m2[i][4] = _mm256_sub_epi32( m1[i][0], m1[i][4] );
+      m2[i][5] = _mm256_sub_epi32( m1[i][1], m1[i][5] );
+      m2[i][6] = _mm256_sub_epi32( m1[i][2], m1[i][6] );
+      m2[i][7] = _mm256_sub_epi32( m1[i][3], m1[i][7] );
+
+      m1[i][0] = _mm256_add_epi32( m2[i][0], m2[i][2] );
+      m1[i][1] = _mm256_add_epi32( m2[i][1], m2[i][3] );
+      m1[i][2] = _mm256_sub_epi32( m2[i][0], m2[i][2] );
+      m1[i][3] = _mm256_sub_epi32( m2[i][1], m2[i][3] );
+      m1[i][4] = _mm256_add_epi32( m2[i][4], m2[i][6] );
+      m1[i][5] = _mm256_add_epi32( m2[i][5], m2[i][7] );
+      m1[i][6] = _mm256_sub_epi32( m2[i][4], m2[i][6] );
+      m1[i][7] = _mm256_sub_epi32( m2[i][5], m2[i][7] );
+
+      m2[i][0] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][0], m1[i][1] ) );
+      m2[i][1] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][0], m1[i][1] ) );
+      m2[i][2] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][2], m1[i][3] ) );
+      m2[i][3] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][2], m1[i][3] ) );
+      m2[i][4] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][4], m1[i][5] ) );
+      m2[i][5] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][4], m1[i][5] ) );
+      m2[i][6] = _mm256_abs_epi32( _mm256_add_epi32( m1[i][6], m1[i][7] ) );
+      m2[i][7] = _mm256_abs_epi32( _mm256_sub_epi32( m1[i][6], m1[i][7] ) );
+    }
 
-      for( int i = 0; i < 8; i++ )
-      {
-        ma1 = _mm256_unpacklo_epi16( m1[i], vzero );
-        ma2 = _mm256_unpackhi_epi16( m1[i], vzero );
-        m1[i] = _mm256_add_epi32( ma1, ma2 );
-      }
+    for( int i = 0; i < 8; i++ )
+    {
+      m1[0][i] = _mm256_add_epi32( m2[0][i], m2[1][i] );
     }
 
-    m1[0] = _mm256_add_epi32( m1[0], m1[1] );
-    m1[2] = _mm256_add_epi32( m1[2], m1[3] );
-    m1[4] = _mm256_add_epi32( m1[4], m1[5] );
-    m1[6] = _mm256_add_epi32( m1[6], m1[7] );
+    m1[0][0] = _mm256_add_epi32( m1[0][0], m1[0][1] );
+    m1[0][2] = _mm256_add_epi32( m1[0][2], m1[0][3] );
+    m1[0][4] = _mm256_add_epi32( m1[0][4], m1[0][5] );
+    m1[0][6] = _mm256_add_epi32( m1[0][6], m1[0][7] );
 
-    m1[0] = _mm256_add_epi32( m1[0], m1[2] );
-    m1[4] = _mm256_add_epi32( m1[4], m1[6] );
+    m1[0][0] = _mm256_add_epi32( m1[0][0], m1[0][2] );
+    m1[0][4] = _mm256_add_epi32( m1[0][4], m1[0][6] );
+
+    __m256i iSum = _mm256_add_epi32( m1[0][0], m1[0][4] );
 
-    __m256i iSum = _mm256_add_epi32( m1[0], m1[4] );
     iSum = _mm256_hadd_epi32( iSum, iSum );
     iSum = _mm256_hadd_epi32( iSum, iSum );
 
@@ -1778,103 +1494,143 @@ static uint32_t xCalcHAD16x16_AVX2( const Torg *piOrg, const Tcur *piCur, const
   return ( sad );
 }
 
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
 static uint32_t xCalcHAD16x8_AVX2( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
   uint32_t sad = 0;
 
 #ifdef USE_AVX2
-  // const int iLoops = ( bHorDownsampling && HAD_DOWNSAMPLING_HOR ) ? ( 1 ) : ( 2 );
-  //const int iLoops = 1;
   __m256i m1[16], m2[16];
-  __m256i vzero = _mm256_setzero_si256();
 
-
-  //for( int l = 0; l < iLoops; l++ )
   {
     {
       for( int k = 0; k < 8; k++ )
       {
-        __m256i r0 = (sizeof( Torg ) > 1) ? (_mm256_lddqu_si256( (__m256i*)piOrg )) : (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piOrg ) ), vzero ));
-        __m256i r1 = (sizeof( Tcur ) > 1) ? (_mm256_lddqu_si256( (__m256i*)piCur )) : (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piCur ) ), vzero ));
-        m1[k] = _mm256_sub_epi16( r0, r1 );
+        __m256i r0 = _mm256_lddqu_si256( (__m256i*)piOrg );
+        __m256i r1 = _mm256_lddqu_si256( (__m256i*)piCur );
+        m1[k]   = _mm256_sub_epi16( r0, r1 );
+        m1[k+8] = _mm256_cvtepi16_epi32( _mm256_extracti128_si256( m1[k], 1 ) );
+        m1[k]   = _mm256_cvtepi16_epi32( _mm256_castsi256_si128  ( m1[k]    ) );
         piCur += iStrideCur;
         piOrg += iStrideOrg;
       }
     }
 
-    // vertical
-    m2[0] = _mm256_add_epi16( m1[0], m1[4] );
-    m2[1] = _mm256_add_epi16( m1[1], m1[5] );
-    m2[2] = _mm256_add_epi16( m1[2], m1[6] );
-    m2[3] = _mm256_add_epi16( m1[3], m1[7] );
-    m2[4] = _mm256_sub_epi16( m1[0], m1[4] );
-    m2[5] = _mm256_sub_epi16( m1[1], m1[5] );
-    m2[6] = _mm256_sub_epi16( m1[2], m1[6] );
-    m2[7] = _mm256_sub_epi16( m1[3], m1[7] );
-
-    m1[0] = _mm256_add_epi16( m2[0], m2[2] );
-    m1[1] = _mm256_add_epi16( m2[1], m2[3] );
-    m1[2] = _mm256_sub_epi16( m2[0], m2[2] );
-    m1[3] = _mm256_sub_epi16( m2[1], m2[3] );
-    m1[4] = _mm256_add_epi16( m2[4], m2[6] );
-    m1[5] = _mm256_add_epi16( m2[5], m2[7] );
-    m1[6] = _mm256_sub_epi16( m2[4], m2[6] );
-    m1[7] = _mm256_sub_epi16( m2[5], m2[7] );
-
-    m2[0] = _mm256_add_epi16( m1[0], m1[1] );
-    m2[1] = _mm256_sub_epi16( m1[0], m1[1] );
-    m2[2] = _mm256_add_epi16( m1[2], m1[3] );
-    m2[3] = _mm256_sub_epi16( m1[2], m1[3] );
-    m2[4] = _mm256_add_epi16( m1[4], m1[5] );
-    m2[5] = _mm256_sub_epi16( m1[4], m1[5] );
-    m2[6] = _mm256_add_epi16( m1[6], m1[7] );
-    m2[7] = _mm256_sub_epi16( m1[6], m1[7] );
+    // vertical, first 8x8
+    m2[0] = _mm256_add_epi32( m1[0], m1[4] );
+    m2[1] = _mm256_add_epi32( m1[1], m1[5] );
+    m2[2] = _mm256_add_epi32( m1[2], m1[6] );
+    m2[3] = _mm256_add_epi32( m1[3], m1[7] );
+    m2[4] = _mm256_sub_epi32( m1[0], m1[4] );
+    m2[5] = _mm256_sub_epi32( m1[1], m1[5] );
+    m2[6] = _mm256_sub_epi32( m1[2], m1[6] );
+    m2[7] = _mm256_sub_epi32( m1[3], m1[7] );
+
+    m1[0] = _mm256_add_epi32( m2[0], m2[2] );
+    m1[1] = _mm256_add_epi32( m2[1], m2[3] );
+    m1[2] = _mm256_sub_epi32( m2[0], m2[2] );
+    m1[3] = _mm256_sub_epi32( m2[1], m2[3] );
+    m1[4] = _mm256_add_epi32( m2[4], m2[6] );
+    m1[5] = _mm256_add_epi32( m2[5], m2[7] );
+    m1[6] = _mm256_sub_epi32( m2[4], m2[6] );
+    m1[7] = _mm256_sub_epi32( m2[5], m2[7] );
+
+    m2[0] = _mm256_add_epi32( m1[0], m1[1] );
+    m2[1] = _mm256_sub_epi32( m1[0], m1[1] );
+    m2[2] = _mm256_add_epi32( m1[2], m1[3] );
+    m2[3] = _mm256_sub_epi32( m1[2], m1[3] );
+    m2[4] = _mm256_add_epi32( m1[4], m1[5] );
+    m2[5] = _mm256_sub_epi32( m1[4], m1[5] );
+    m2[6] = _mm256_add_epi32( m1[6], m1[7] );
+    m2[7] = _mm256_sub_epi32( m1[6], m1[7] );
+
+    // vertical, second 8x8
+    m2[8+0] = _mm256_add_epi32( m1[8+0], m1[8+4] );
+    m2[8+1] = _mm256_add_epi32( m1[8+1], m1[8+5] );
+    m2[8+2] = _mm256_add_epi32( m1[8+2], m1[8+6] );
+    m2[8+3] = _mm256_add_epi32( m1[8+3], m1[8+7] );
+    m2[8+4] = _mm256_sub_epi32( m1[8+0], m1[8+4] );
+    m2[8+5] = _mm256_sub_epi32( m1[8+1], m1[8+5] );
+    m2[8+6] = _mm256_sub_epi32( m1[8+2], m1[8+6] );
+    m2[8+7] = _mm256_sub_epi32( m1[8+3], m1[8+7] );
+
+    m1[8+0] = _mm256_add_epi32( m2[8+0], m2[8+2] );
+    m1[8+1] = _mm256_add_epi32( m2[8+1], m2[8+3] );
+    m1[8+2] = _mm256_sub_epi32( m2[8+0], m2[8+2] );
+    m1[8+3] = _mm256_sub_epi32( m2[8+1], m2[8+3] );
+    m1[8+4] = _mm256_add_epi32( m2[8+4], m2[8+6] );
+    m1[8+5] = _mm256_add_epi32( m2[8+5], m2[8+7] );
+    m1[8+6] = _mm256_sub_epi32( m2[8+4], m2[8+6] );
+    m1[8+7] = _mm256_sub_epi32( m2[8+5], m2[8+7] );
+
+    m2[8+0] = _mm256_add_epi32( m1[8+0], m1[8+1] );
+    m2[8+1] = _mm256_sub_epi32( m1[8+0], m1[8+1] );
+    m2[8+2] = _mm256_add_epi32( m1[8+2], m1[8+3] );
+    m2[8+3] = _mm256_sub_epi32( m1[8+2], m1[8+3] );
+    m2[8+4] = _mm256_add_epi32( m1[8+4], m1[8+5] );
+    m2[8+5] = _mm256_sub_epi32( m1[8+4], m1[8+5] );
+    m2[8+6] = _mm256_add_epi32( m1[8+6], m1[8+7] );
+    m2[8+7] = _mm256_sub_epi32( m1[8+6], m1[8+7] );
 
     // transpose
-    m1[0] = _mm256_unpacklo_epi16( m2[0], m2[1] );
-    m1[1] = _mm256_unpacklo_epi16( m2[2], m2[3] );
-    m1[2] = _mm256_unpacklo_epi16( m2[4], m2[5] );
-    m1[3] = _mm256_unpacklo_epi16( m2[6], m2[7] );
-    m1[4] = _mm256_unpackhi_epi16( m2[0], m2[1] );
-    m1[5] = _mm256_unpackhi_epi16( m2[2], m2[3] );
-    m1[6] = _mm256_unpackhi_epi16( m2[4], m2[5] );
-    m1[7] = _mm256_unpackhi_epi16( m2[6], m2[7] );
+    constexpr int perm_unpacklo_epi128 = ( 0 << 0 ) + ( 2 << 4 );
+    constexpr int perm_unpackhi_epi128 = ( 1 << 0 ) + ( 3 << 4 );
 
-    m2[0] = _mm256_unpacklo_epi32( m1[0], m1[1] );
-    m2[1] = _mm256_unpackhi_epi32( m1[0], m1[1] );
-    m2[2] = _mm256_unpacklo_epi32( m1[2], m1[3] );
-    m2[3] = _mm256_unpackhi_epi32( m1[2], m1[3] );
-    m2[4] = _mm256_unpacklo_epi32( m1[4], m1[5] );
-    m2[5] = _mm256_unpackhi_epi32( m1[4], m1[5] );
-    m2[6] = _mm256_unpacklo_epi32( m1[6], m1[7] );
-    m2[7] = _mm256_unpackhi_epi32( m1[6], m1[7] );
+    m1[0] = _mm256_unpacklo_epi32( m2[0], m2[1] );
+    m1[1] = _mm256_unpacklo_epi32( m2[2], m2[3] );
+    m1[2] = _mm256_unpacklo_epi32( m2[4], m2[5] );
+    m1[3] = _mm256_unpacklo_epi32( m2[6], m2[7] );
+    m1[4] = _mm256_unpackhi_epi32( m2[0], m2[1] );
+    m1[5] = _mm256_unpackhi_epi32( m2[2], m2[3] );
+    m1[6] = _mm256_unpackhi_epi32( m2[4], m2[5] );
+    m1[7] = _mm256_unpackhi_epi32( m2[6], m2[7] );
 
-    m1[0] = _mm256_unpacklo_epi64( m2[0], m2[2] );
-    m1[1] = _mm256_unpackhi_epi64( m2[0], m2[2] );
-    m1[2] = _mm256_unpacklo_epi64( m2[1], m2[3] );
-    m1[3] = _mm256_unpackhi_epi64( m2[1], m2[3] );
-    m1[4] = _mm256_unpacklo_epi64( m2[4], m2[6] );
-    m1[5] = _mm256_unpackhi_epi64( m2[4], m2[6] );
-    m1[6] = _mm256_unpacklo_epi64( m2[5], m2[7] );
-    m1[7] = _mm256_unpackhi_epi64( m2[5], m2[7] );
-    m1[8] = _mm256_permute2x128_si256( m1[0], vzero, 0x31 );
-    m1[9] = _mm256_permute2x128_si256( m1[1], vzero, 0x31 );
-    m1[10] = _mm256_permute2x128_si256( m1[2], vzero, 0x31 );
-    m1[11] = _mm256_permute2x128_si256( m1[3], vzero, 0x31 );
-    m1[12] = _mm256_permute2x128_si256( m1[4], vzero, 0x31 );
-    m1[13] = _mm256_permute2x128_si256( m1[5], vzero, 0x31 );
-    m1[14] = _mm256_permute2x128_si256( m1[6], vzero, 0x31 );
-    m1[15] = _mm256_permute2x128_si256( m1[7], vzero, 0x31 );
+    m2[0] = _mm256_unpacklo_epi64( m1[0], m1[1] );
+    m2[1] = _mm256_unpackhi_epi64( m1[0], m1[1] );
+    m2[2] = _mm256_unpacklo_epi64( m1[2], m1[3] );
+    m2[3] = _mm256_unpackhi_epi64( m1[2], m1[3] );
+    m2[4] = _mm256_unpacklo_epi64( m1[4], m1[5] );
+    m2[5] = _mm256_unpackhi_epi64( m1[4], m1[5] );
+    m2[6] = _mm256_unpacklo_epi64( m1[6], m1[7] );
+    m2[7] = _mm256_unpackhi_epi64( m1[6], m1[7] );
+
+    m1[0] = _mm256_permute2x128_si256( m2[0], m2[2], perm_unpacklo_epi128 );
+    m1[1] = _mm256_permute2x128_si256( m2[0], m2[2], perm_unpackhi_epi128 );
+    m1[2] = _mm256_permute2x128_si256( m2[1], m2[3], perm_unpacklo_epi128 );
+    m1[3] = _mm256_permute2x128_si256( m2[1], m2[3], perm_unpackhi_epi128 );
+    m1[4] = _mm256_permute2x128_si256( m2[4], m2[6], perm_unpacklo_epi128 );
+    m1[5] = _mm256_permute2x128_si256( m2[4], m2[6], perm_unpackhi_epi128 );
+    m1[6] = _mm256_permute2x128_si256( m2[5], m2[7], perm_unpacklo_epi128 );
+    m1[7] = _mm256_permute2x128_si256( m2[5], m2[7], perm_unpackhi_epi128 );
+
+    m1[8+0] = _mm256_unpacklo_epi32( m2[8+0], m2[8+1] );
+    m1[8+1] = _mm256_unpacklo_epi32( m2[8+2], m2[8+3] );
+    m1[8+2] = _mm256_unpacklo_epi32( m2[8+4], m2[8+5] );
+    m1[8+3] = _mm256_unpacklo_epi32( m2[8+6], m2[8+7] );
+    m1[8+4] = _mm256_unpackhi_epi32( m2[8+0], m2[8+1] );
+    m1[8+5] = _mm256_unpackhi_epi32( m2[8+2], m2[8+3] );
+    m1[8+6] = _mm256_unpackhi_epi32( m2[8+4], m2[8+5] );
+    m1[8+7] = _mm256_unpackhi_epi32( m2[8+6], m2[8+7] );
+
+    m2[8+0] = _mm256_unpacklo_epi64( m1[8+0], m1[8+1] );
+    m2[8+1] = _mm256_unpackhi_epi64( m1[8+0], m1[8+1] );
+    m2[8+2] = _mm256_unpacklo_epi64( m1[8+2], m1[8+3] );
+    m2[8+3] = _mm256_unpackhi_epi64( m1[8+2], m1[8+3] );
+    m2[8+4] = _mm256_unpacklo_epi64( m1[8+4], m1[8+5] );
+    m2[8+5] = _mm256_unpackhi_epi64( m1[8+4], m1[8+5] );
+    m2[8+6] = _mm256_unpacklo_epi64( m1[8+6], m1[8+7] );
+    m2[8+7] = _mm256_unpackhi_epi64( m1[8+6], m1[8+7] );
+
+    m1[8+0] = _mm256_permute2x128_si256( m2[8+0], m2[8+2], perm_unpacklo_epi128 );
+    m1[8+1] = _mm256_permute2x128_si256( m2[8+0], m2[8+2], perm_unpackhi_epi128 );
+    m1[8+2] = _mm256_permute2x128_si256( m2[8+1], m2[8+3], perm_unpacklo_epi128 );
+    m1[8+3] = _mm256_permute2x128_si256( m2[8+1], m2[8+3], perm_unpackhi_epi128 );
+    m1[8+4] = _mm256_permute2x128_si256( m2[8+4], m2[8+6], perm_unpacklo_epi128 );
+    m1[8+5] = _mm256_permute2x128_si256( m2[8+4], m2[8+6], perm_unpackhi_epi128 );
+    m1[8+6] = _mm256_permute2x128_si256( m2[8+5], m2[8+7], perm_unpacklo_epi128 );
+    m1[8+7] = _mm256_permute2x128_si256( m2[8+5], m2[8+7], perm_unpackhi_epi128 );
 
     // horizontal
     {
-      // extend to 32bit
-      for( int i = 0; i < 16; i++ )
-      {
-        m1[i] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m1[i] ) );
-      }
-
       m2[ 0] = _mm256_add_epi32( m1[0], m1[ 8] );
       m2[ 1] = _mm256_add_epi32( m1[1], m1[ 9] );
       m2[ 2] = _mm256_add_epi32( m1[2], m1[10] );
@@ -1978,25 +1734,20 @@ static uint32_t xCalcHAD16x8_AVX2( const Torg *piOrg, const Tcur *piCur, const i
 }
 
 
-template< typename Torg, typename Tcur/*, bool bHorDownsampling*/ >
-static uint32_t xCalcHAD8x16_AVX2( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
+static uint32_t xCalcHAD8x16_AVX2( const Pel* piOrg, const Pel* piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth )
 {
   uint32_t sad = 0;
 
 #ifdef USE_AVX2
-  // const int iLoops = ( bHorDownsampling && HAD_DOWNSAMPLING_HOR ) ? ( 1 ) : ( 2 );
-  //const int iLoops = 1;
   __m256i m1[16], m2[16];
-  __m256i vzero = _mm256_setzero_si256();
 
-  //for( int l = 0; l < iLoops; l++ )
   {
     {
       for( int k = 0; k < 16; k++ )
       {
-        __m256i r0 = (sizeof( Torg ) > 1) ? ( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piOrg ) ) ): (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)piOrg ) ), vzero ));
-        __m256i r1 = (sizeof( Tcur ) > 1) ? ( _mm256_castsi128_si256( _mm_lddqu_si128( (__m128i*)piCur ) ) ): (_mm256_unpacklo_epi8( _mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)piCur ) ), vzero ));
-        m1[k] = _mm256_sub_epi16( r0, r1 );
+        __m256i r0 = _mm256_cvtepi16_epi32( _mm_lddqu_si128( (__m128i*)piOrg ) );
+        __m256i r1 = _mm256_cvtepi16_epi32( _mm_lddqu_si128( (__m128i*)piCur ) );
+        m1[k] = _mm256_sub_epi32( r0, r1 );
         piCur += iStrideCur;
         piOrg += iStrideOrg;
       }
@@ -2004,261 +1755,198 @@ static uint32_t xCalcHAD8x16_AVX2( const Torg *piOrg, const Tcur *piCur, const i
 
     // vertical
 
-    m2[ 0] = _mm256_add_epi16( m1[0], m1[ 8] );
-    m2[ 1] = _mm256_add_epi16( m1[1], m1[ 9] );
-    m2[ 2] = _mm256_add_epi16( m1[2], m1[10] );
-    m2[ 3] = _mm256_add_epi16( m1[3], m1[11] );
-    m2[ 4] = _mm256_add_epi16( m1[4], m1[12] );
-    m2[ 5] = _mm256_add_epi16( m1[5], m1[13] );
-    m2[ 6] = _mm256_add_epi16( m1[6], m1[14] );
-    m2[ 7] = _mm256_add_epi16( m1[7], m1[15] );
-    m2[ 8] = _mm256_sub_epi16( m1[0], m1[ 8] );
-    m2[ 9] = _mm256_sub_epi16( m1[1], m1[ 9] );
-    m2[10] = _mm256_sub_epi16( m1[2], m1[10] );
-    m2[11] = _mm256_sub_epi16( m1[3], m1[11] );
-    m2[12] = _mm256_sub_epi16( m1[4], m1[12] );
-    m2[13] = _mm256_sub_epi16( m1[5], m1[13] );
-    m2[14] = _mm256_sub_epi16( m1[6], m1[14] );
-    m2[15] = _mm256_sub_epi16( m1[7], m1[15] );
-
-    m1[ 0] = _mm256_add_epi16( m2[ 0], m2[ 4] );
-    m1[ 1] = _mm256_add_epi16( m2[ 1], m2[ 5] );
-    m1[ 2] = _mm256_add_epi16( m2[ 2], m2[ 6] );
-    m1[ 3] = _mm256_add_epi16( m2[ 3], m2[ 7] );
-    m1[ 4] = _mm256_sub_epi16( m2[ 0], m2[ 4] );
-    m1[ 5] = _mm256_sub_epi16( m2[ 1], m2[ 5] );
-    m1[ 6] = _mm256_sub_epi16( m2[ 2], m2[ 6] );
-    m1[ 7] = _mm256_sub_epi16( m2[ 3], m2[ 7] );
-    m1[ 8] = _mm256_add_epi16( m2[ 8], m2[12] );
-    m1[ 9] = _mm256_add_epi16( m2[ 9], m2[13] );
-    m1[10] = _mm256_add_epi16( m2[10], m2[14] );
-    m1[11] = _mm256_add_epi16( m2[11], m2[15] );
-    m1[12] = _mm256_sub_epi16( m2[ 8], m2[12] );
-    m1[13] = _mm256_sub_epi16( m2[ 9], m2[13] );
-    m1[14] = _mm256_sub_epi16( m2[10], m2[14] );
-    m1[15] = _mm256_sub_epi16( m2[11], m2[15] );
-
-    m2[ 0] = _mm256_add_epi16( m1[ 0], m1[ 2] );
-    m2[ 1] = _mm256_add_epi16( m1[ 1], m1[ 3] );
-    m2[ 2] = _mm256_sub_epi16( m1[ 0], m1[ 2] );
-    m2[ 3] = _mm256_sub_epi16( m1[ 1], m1[ 3] );
-    m2[ 4] = _mm256_add_epi16( m1[ 4], m1[ 6] );
-    m2[ 5] = _mm256_add_epi16( m1[ 5], m1[ 7] );
-    m2[ 6] = _mm256_sub_epi16( m1[ 4], m1[ 6] );
-    m2[ 7] = _mm256_sub_epi16( m1[ 5], m1[ 7] );
-    m2[ 8] = _mm256_add_epi16( m1[ 8], m1[10] );
-    m2[ 9] = _mm256_add_epi16( m1[ 9], m1[11] );
-    m2[10] = _mm256_sub_epi16( m1[ 8], m1[10] );
-    m2[11] = _mm256_sub_epi16( m1[ 9], m1[11] );
-    m2[12] = _mm256_add_epi16( m1[12], m1[14] );
-    m2[13] = _mm256_add_epi16( m1[13], m1[15] );
-    m2[14] = _mm256_sub_epi16( m1[12], m1[14] );
-    m2[15] = _mm256_sub_epi16( m1[13], m1[15] );
-
-    m1[ 0] = _mm256_add_epi16( m2[ 0], m2[ 1] );
-    m1[ 1] = _mm256_sub_epi16( m2[ 0], m2[ 1] );
-    m1[ 2] = _mm256_add_epi16( m2[ 2], m2[ 3] );
-    m1[ 3] = _mm256_sub_epi16( m2[ 2], m2[ 3] );
-    m1[ 4] = _mm256_add_epi16( m2[ 4], m2[ 5] );
-    m1[ 5] = _mm256_sub_epi16( m2[ 4], m2[ 5] );
-    m1[ 6] = _mm256_add_epi16( m2[ 6], m2[ 7] );
-    m1[ 7] = _mm256_sub_epi16( m2[ 6], m2[ 7] );
-    m1[ 8] = _mm256_add_epi16( m2[ 8], m2[ 9] );
-    m1[ 9] = _mm256_sub_epi16( m2[ 8], m2[ 9] );
-    m1[10] = _mm256_add_epi16( m2[10], m2[11] );
-    m1[11] = _mm256_sub_epi16( m2[10], m2[11] );
-    m1[12] = _mm256_add_epi16( m2[12], m2[13] );
-    m1[13] = _mm256_sub_epi16( m2[12], m2[13] );
-    m1[14] = _mm256_add_epi16( m2[14], m2[15] );
-    m1[15] = _mm256_sub_epi16( m2[14], m2[15] );
-
+    m2[ 0] = _mm256_add_epi32( m1[0], m1[ 8] );
+    m2[ 1] = _mm256_add_epi32( m1[1], m1[ 9] );
+    m2[ 2] = _mm256_add_epi32( m1[2], m1[10] );
+    m2[ 3] = _mm256_add_epi32( m1[3], m1[11] );
+    m2[ 4] = _mm256_add_epi32( m1[4], m1[12] );
+    m2[ 5] = _mm256_add_epi32( m1[5], m1[13] );
+    m2[ 6] = _mm256_add_epi32( m1[6], m1[14] );
+    m2[ 7] = _mm256_add_epi32( m1[7], m1[15] );
+    m2[ 8] = _mm256_sub_epi32( m1[0], m1[ 8] );
+    m2[ 9] = _mm256_sub_epi32( m1[1], m1[ 9] );
+    m2[10] = _mm256_sub_epi32( m1[2], m1[10] );
+    m2[11] = _mm256_sub_epi32( m1[3], m1[11] );
+    m2[12] = _mm256_sub_epi32( m1[4], m1[12] );
+    m2[13] = _mm256_sub_epi32( m1[5], m1[13] );
+    m2[14] = _mm256_sub_epi32( m1[6], m1[14] );
+    m2[15] = _mm256_sub_epi32( m1[7], m1[15] );
+
+    m1[ 0] = _mm256_add_epi32( m2[ 0], m2[ 4] );
+    m1[ 1] = _mm256_add_epi32( m2[ 1], m2[ 5] );
+    m1[ 2] = _mm256_add_epi32( m2[ 2], m2[ 6] );
+    m1[ 3] = _mm256_add_epi32( m2[ 3], m2[ 7] );
+    m1[ 4] = _mm256_sub_epi32( m2[ 0], m2[ 4] );
+    m1[ 5] = _mm256_sub_epi32( m2[ 1], m2[ 5] );
+    m1[ 6] = _mm256_sub_epi32( m2[ 2], m2[ 6] );
+    m1[ 7] = _mm256_sub_epi32( m2[ 3], m2[ 7] );
+    m1[ 8] = _mm256_add_epi32( m2[ 8], m2[12] );
+    m1[ 9] = _mm256_add_epi32( m2[ 9], m2[13] );
+    m1[10] = _mm256_add_epi32( m2[10], m2[14] );
+    m1[11] = _mm256_add_epi32( m2[11], m2[15] );
+    m1[12] = _mm256_sub_epi32( m2[ 8], m2[12] );
+    m1[13] = _mm256_sub_epi32( m2[ 9], m2[13] );
+    m1[14] = _mm256_sub_epi32( m2[10], m2[14] );
+    m1[15] = _mm256_sub_epi32( m2[11], m2[15] );
+
+    m2[ 0] = _mm256_add_epi32( m1[ 0], m1[ 2] );
+    m2[ 1] = _mm256_add_epi32( m1[ 1], m1[ 3] );
+    m2[ 2] = _mm256_sub_epi32( m1[ 0], m1[ 2] );
+    m2[ 3] = _mm256_sub_epi32( m1[ 1], m1[ 3] );
+    m2[ 4] = _mm256_add_epi32( m1[ 4], m1[ 6] );
+    m2[ 5] = _mm256_add_epi32( m1[ 5], m1[ 7] );
+    m2[ 6] = _mm256_sub_epi32( m1[ 4], m1[ 6] );
+    m2[ 7] = _mm256_sub_epi32( m1[ 5], m1[ 7] );
+    m2[ 8] = _mm256_add_epi32( m1[ 8], m1[10] );
+    m2[ 9] = _mm256_add_epi32( m1[ 9], m1[11] );
+    m2[10] = _mm256_sub_epi32( m1[ 8], m1[10] );
+    m2[11] = _mm256_sub_epi32( m1[ 9], m1[11] );
+    m2[12] = _mm256_add_epi32( m1[12], m1[14] );
+    m2[13] = _mm256_add_epi32( m1[13], m1[15] );
+    m2[14] = _mm256_sub_epi32( m1[12], m1[14] );
+    m2[15] = _mm256_sub_epi32( m1[13], m1[15] );
+
+    m1[ 0] = _mm256_add_epi32( m2[ 0], m2[ 1] );
+    m1[ 1] = _mm256_sub_epi32( m2[ 0], m2[ 1] );
+    m1[ 2] = _mm256_add_epi32( m2[ 2], m2[ 3] );
+    m1[ 3] = _mm256_sub_epi32( m2[ 2], m2[ 3] );
+    m1[ 4] = _mm256_add_epi32( m2[ 4], m2[ 5] );
+    m1[ 5] = _mm256_sub_epi32( m2[ 4], m2[ 5] );
+    m1[ 6] = _mm256_add_epi32( m2[ 6], m2[ 7] );
+    m1[ 7] = _mm256_sub_epi32( m2[ 6], m2[ 7] );
+    m1[ 8] = _mm256_add_epi32( m2[ 8], m2[ 9] );
+    m1[ 9] = _mm256_sub_epi32( m2[ 8], m2[ 9] );
+    m1[10] = _mm256_add_epi32( m2[10], m2[11] );
+    m1[11] = _mm256_sub_epi32( m2[10], m2[11] );
+    m1[12] = _mm256_add_epi32( m2[12], m2[13] );
+    m1[13] = _mm256_sub_epi32( m2[12], m2[13] );
+    m1[14] = _mm256_add_epi32( m2[14], m2[15] );
+    m1[15] = _mm256_sub_epi32( m2[14], m2[15] );
 
     // transpose
-    // 1. 8x8
-    m2[0] = _mm256_unpacklo_epi16( m1[0], m1[1] );
-    m2[1] = _mm256_unpacklo_epi16( m1[2], m1[3] );
-    m2[2] = _mm256_unpacklo_epi16( m1[4], m1[5] );
-    m2[3] = _mm256_unpacklo_epi16( m1[6], m1[7] );
-    m2[4] = _mm256_unpackhi_epi16( m1[0], m1[1] );
-    m2[5] = _mm256_unpackhi_epi16( m1[2], m1[3] );
-    m2[6] = _mm256_unpackhi_epi16( m1[4], m1[5] );
-    m2[7] = _mm256_unpackhi_epi16( m1[6], m1[7] );
+    constexpr int perm_unpacklo_epi128 = ( 0 << 0 ) + ( 2 << 4 );
+    constexpr int perm_unpackhi_epi128 = ( 1 << 0 ) + ( 3 << 4 );
 
-    m1[0] = _mm256_unpacklo_epi32( m2[0], m2[1] );
-    m1[1] = _mm256_unpackhi_epi32( m2[0], m2[1] );
-    m1[2] = _mm256_unpacklo_epi32( m2[2], m2[3] );
-    m1[3] = _mm256_unpackhi_epi32( m2[2], m2[3] );
-    m1[4] = _mm256_unpacklo_epi32( m2[4], m2[5] );
-    m1[5] = _mm256_unpackhi_epi32( m2[4], m2[5] );
-    m1[6] = _mm256_unpacklo_epi32( m2[6], m2[7] );
-    m1[7] = _mm256_unpackhi_epi32( m2[6], m2[7] );
+    // 1. 8x8
+    m2[0] = _mm256_unpacklo_epi32( m1[0], m1[1] );
+    m2[1] = _mm256_unpacklo_epi32( m1[2], m1[3] );
+    m2[2] = _mm256_unpacklo_epi32( m1[4], m1[5] );
+    m2[3] = _mm256_unpacklo_epi32( m1[6], m1[7] );
+    m2[4] = _mm256_unpackhi_epi32( m1[0], m1[1] );
+    m2[5] = _mm256_unpackhi_epi32( m1[2], m1[3] );
+    m2[6] = _mm256_unpackhi_epi32( m1[4], m1[5] );
+    m2[7] = _mm256_unpackhi_epi32( m1[6], m1[7] );
 
-    m2[0] = _mm256_unpacklo_epi64( m1[0], m1[2] );
-    m2[1] = _mm256_unpackhi_epi64( m1[0], m1[2] );
-    m2[2] = _mm256_unpacklo_epi64( m1[1], m1[3] );
-    m2[3] = _mm256_unpackhi_epi64( m1[1], m1[3] );
-    m2[4] = _mm256_unpacklo_epi64( m1[4], m1[6] );
-    m2[5] = _mm256_unpackhi_epi64( m1[4], m1[6] );
-    m2[6] = _mm256_unpacklo_epi64( m1[5], m1[7] );
-    m2[7] = _mm256_unpackhi_epi64( m1[5], m1[7] );
+    m1[0] = _mm256_unpacklo_epi64( m2[0], m2[1] );
+    m1[1] = _mm256_unpackhi_epi64( m2[0], m2[1] );
+    m1[2] = _mm256_unpacklo_epi64( m2[2], m2[3] );
+    m1[3] = _mm256_unpackhi_epi64( m2[2], m2[3] );
+    m1[4] = _mm256_unpacklo_epi64( m2[4], m2[5] );
+    m1[5] = _mm256_unpackhi_epi64( m2[4], m2[5] );
+    m1[6] = _mm256_unpacklo_epi64( m2[6], m2[7] );
+    m1[7] = _mm256_unpackhi_epi64( m2[6], m2[7] );
+
+    m2[0] = _mm256_permute2x128_si256( m1[0], m1[2], perm_unpacklo_epi128 );
+    m2[1] = _mm256_permute2x128_si256( m1[0], m1[2], perm_unpackhi_epi128 );
+    m2[2] = _mm256_permute2x128_si256( m1[1], m1[3], perm_unpacklo_epi128 );
+    m2[3] = _mm256_permute2x128_si256( m1[1], m1[3], perm_unpackhi_epi128 );
+    m2[4] = _mm256_permute2x128_si256( m1[4], m1[6], perm_unpacklo_epi128 );
+    m2[5] = _mm256_permute2x128_si256( m1[4], m1[6], perm_unpackhi_epi128 );
+    m2[6] = _mm256_permute2x128_si256( m1[5], m1[7], perm_unpacklo_epi128 );
+    m2[7] = _mm256_permute2x128_si256( m1[5], m1[7], perm_unpackhi_epi128 );
 
     // 2. 8x8
-    m2[0+8] = _mm256_unpacklo_epi16( m1[0+8], m1[1+8] );
-    m2[1+8] = _mm256_unpacklo_epi16( m1[2+8], m1[3+8] );
-    m2[2+8] = _mm256_unpacklo_epi16( m1[4+8], m1[5+8] );
-    m2[3+8] = _mm256_unpacklo_epi16( m1[6+8], m1[7+8] );
-    m2[4+8] = _mm256_unpackhi_epi16( m1[0+8], m1[1+8] );
-    m2[5+8] = _mm256_unpackhi_epi16( m1[2+8], m1[3+8] );
-    m2[6+8] = _mm256_unpackhi_epi16( m1[4+8], m1[5+8] );
-    m2[7+8] = _mm256_unpackhi_epi16( m1[6+8], m1[7+8] );
-
-    m1[0+8] = _mm256_unpacklo_epi32( m2[0+8], m2[1+8] );
-    m1[1+8] = _mm256_unpackhi_epi32( m2[0+8], m2[1+8] );
-    m1[2+8] = _mm256_unpacklo_epi32( m2[2+8], m2[3+8] );
-    m1[3+8] = _mm256_unpackhi_epi32( m2[2+8], m2[3+8] );
-    m1[4+8] = _mm256_unpacklo_epi32( m2[4+8], m2[5+8] );
-    m1[5+8] = _mm256_unpackhi_epi32( m2[4+8], m2[5+8] );
-    m1[6+8] = _mm256_unpacklo_epi32( m2[6+8], m2[7+8] );
-    m1[7+8] = _mm256_unpackhi_epi32( m2[6+8], m2[7+8] );
-
-    m2[0+8] = _mm256_unpacklo_epi64( m1[0+8], m1[2+8] );
-    m2[1+8] = _mm256_unpackhi_epi64( m1[0+8], m1[2+8] );
-    m2[2+8] = _mm256_unpacklo_epi64( m1[1+8], m1[3+8] );
-    m2[3+8] = _mm256_unpackhi_epi64( m1[1+8], m1[3+8] );
-    m2[4+8] = _mm256_unpacklo_epi64( m1[4+8], m1[6+8] );
-    m2[5+8] = _mm256_unpackhi_epi64( m1[4+8], m1[6+8] );
-    m2[6+8] = _mm256_unpacklo_epi64( m1[5+8], m1[7+8] );
-    m2[7+8] = _mm256_unpackhi_epi64( m1[5+8], m1[7+8] );
-
+    m2[0+8] = _mm256_unpacklo_epi32( m1[0+8], m1[1+8] );
+    m2[1+8] = _mm256_unpacklo_epi32( m1[2+8], m1[3+8] );
+    m2[2+8] = _mm256_unpacklo_epi32( m1[4+8], m1[5+8] );
+    m2[3+8] = _mm256_unpacklo_epi32( m1[6+8], m1[7+8] );
+    m2[4+8] = _mm256_unpackhi_epi32( m1[0+8], m1[1+8] );
+    m2[5+8] = _mm256_unpackhi_epi32( m1[2+8], m1[3+8] );
+    m2[6+8] = _mm256_unpackhi_epi32( m1[4+8], m1[5+8] );
+    m2[7+8] = _mm256_unpackhi_epi32( m1[6+8], m1[7+8] );
+
+    m1[0+8] = _mm256_unpacklo_epi64( m2[0+8], m2[1+8] );
+    m1[1+8] = _mm256_unpackhi_epi64( m2[0+8], m2[1+8] );
+    m1[2+8] = _mm256_unpacklo_epi64( m2[2+8], m2[3+8] );
+    m1[3+8] = _mm256_unpackhi_epi64( m2[2+8], m2[3+8] );
+    m1[4+8] = _mm256_unpacklo_epi64( m2[4+8], m2[5+8] );
+    m1[5+8] = _mm256_unpackhi_epi64( m2[4+8], m2[5+8] );
+    m1[6+8] = _mm256_unpacklo_epi64( m2[6+8], m2[7+8] );
+    m1[7+8] = _mm256_unpackhi_epi64( m2[6+8], m2[7+8] );
+
+    m2[0+8] = _mm256_permute2x128_si256( m1[0+8], m1[2+8], perm_unpacklo_epi128 );
+    m2[1+8] = _mm256_permute2x128_si256( m1[0+8], m1[2+8], perm_unpackhi_epi128 );
+    m2[2+8] = _mm256_permute2x128_si256( m1[1+8], m1[3+8], perm_unpacklo_epi128 );
+    m2[3+8] = _mm256_permute2x128_si256( m1[1+8], m1[3+8], perm_unpackhi_epi128 );
+    m2[4+8] = _mm256_permute2x128_si256( m1[4+8], m1[6+8], perm_unpacklo_epi128 );
+    m2[5+8] = _mm256_permute2x128_si256( m1[4+8], m1[6+8], perm_unpackhi_epi128 );
+    m2[6+8] = _mm256_permute2x128_si256( m1[5+8], m1[7+8], perm_unpacklo_epi128 );
+    m2[7+8] = _mm256_permute2x128_si256( m1[5+8], m1[7+8], perm_unpackhi_epi128 );
 
     // horizontal
-    if( iBitDepth >= 10 )
-    {
-      // extend to 32bit
-      //for( int j = 0; j < 16; j+=8 )
-      {
-        for( int i = 0; i < 8; i++ )
-        {
-          m2[i] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m2[i] ) );
-        }
-
-        m1[0] = _mm256_add_epi32( m2[0], m2[4] );
-        m1[1] = _mm256_add_epi32( m2[1], m2[5] );
-        m1[2] = _mm256_add_epi32( m2[2], m2[6] );
-        m1[3] = _mm256_add_epi32( m2[3], m2[7] );
-        m1[4] = _mm256_sub_epi32( m2[0], m2[4] );
-        m1[5] = _mm256_sub_epi32( m2[1], m2[5] );
-        m1[6] = _mm256_sub_epi32( m2[2], m2[6] );
-        m1[7] = _mm256_sub_epi32( m2[3], m2[7] );
-
-        m2[0] = _mm256_add_epi32( m1[0], m1[2] );
-        m2[1] = _mm256_add_epi32( m1[1], m1[3] );
-        m2[2] = _mm256_sub_epi32( m1[0], m1[2] );
-        m2[3] = _mm256_sub_epi32( m1[1], m1[3] );
-        m2[4] = _mm256_add_epi32( m1[4], m1[6] );
-        m2[5] = _mm256_add_epi32( m1[5], m1[7] );
-        m2[6] = _mm256_sub_epi32( m1[4], m1[6] );
-        m2[7] = _mm256_sub_epi32( m1[5], m1[7] );
-
-        m1[0] = _mm256_abs_epi32( _mm256_add_epi32( m2[0], m2[1] ) );
-        m1[1] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0], m2[1] ) );
-        m1[2] = _mm256_abs_epi32( _mm256_add_epi32( m2[2], m2[3] ) );
-        m1[3] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2], m2[3] ) );
-        m1[4] = _mm256_abs_epi32( _mm256_add_epi32( m2[4], m2[5] ) );
-        m1[5] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4], m2[5] ) );
-        m1[6] = _mm256_abs_epi32( _mm256_add_epi32( m2[6], m2[7] ) );
-        m1[7] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6], m2[7] ) );
-
-        for( int i = 0; i < 8; i++ )
-        {
-          m2[i+8] = _mm256_cvtepi16_epi32( _mm256_castsi256_si128( m2[i+8] ) );
-        }
+    m1[0] = _mm256_add_epi32( m2[0], m2[4] );
+    m1[1] = _mm256_add_epi32( m2[1], m2[5] );
+    m1[2] = _mm256_add_epi32( m2[2], m2[6] );
+    m1[3] = _mm256_add_epi32( m2[3], m2[7] );
+    m1[4] = _mm256_sub_epi32( m2[0], m2[4] );
+    m1[5] = _mm256_sub_epi32( m2[1], m2[5] );
+    m1[6] = _mm256_sub_epi32( m2[2], m2[6] );
+    m1[7] = _mm256_sub_epi32( m2[3], m2[7] );
+
+    m2[0] = _mm256_add_epi32( m1[0], m1[2] );
+    m2[1] = _mm256_add_epi32( m1[1], m1[3] );
+    m2[2] = _mm256_sub_epi32( m1[0], m1[2] );
+    m2[3] = _mm256_sub_epi32( m1[1], m1[3] );
+    m2[4] = _mm256_add_epi32( m1[4], m1[6] );
+    m2[5] = _mm256_add_epi32( m1[5], m1[7] );
+    m2[6] = _mm256_sub_epi32( m1[4], m1[6] );
+    m2[7] = _mm256_sub_epi32( m1[5], m1[7] );
+
+    m1[0] = _mm256_abs_epi32( _mm256_add_epi32( m2[0], m2[1] ) );
+    m1[1] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0], m2[1] ) );
+    m1[2] = _mm256_abs_epi32( _mm256_add_epi32( m2[2], m2[3] ) );
+    m1[3] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2], m2[3] ) );
+    m1[4] = _mm256_abs_epi32( _mm256_add_epi32( m2[4], m2[5] ) );
+    m1[5] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4], m2[5] ) );
+    m1[6] = _mm256_abs_epi32( _mm256_add_epi32( m2[6], m2[7] ) );
+    m1[7] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6], m2[7] ) );
+
+    m1[0 + 8] = _mm256_add_epi32( m2[0 + 8], m2[4 + 8] );
+    m1[1 + 8] = _mm256_add_epi32( m2[1 + 8], m2[5 + 8] );
+    m1[2 + 8] = _mm256_add_epi32( m2[2 + 8], m2[6 + 8] );
+    m1[3 + 8] = _mm256_add_epi32( m2[3 + 8], m2[7 + 8] );
+    m1[4 + 8] = _mm256_sub_epi32( m2[0 + 8], m2[4 + 8] );
+    m1[5 + 8] = _mm256_sub_epi32( m2[1 + 8], m2[5 + 8] );
+    m1[6 + 8] = _mm256_sub_epi32( m2[2 + 8], m2[6 + 8] );
+    m1[7 + 8] = _mm256_sub_epi32( m2[3 + 8], m2[7 + 8] );
+
+    m2[0 + 8] = _mm256_add_epi32( m1[0 + 8], m1[2 + 8] );
+    m2[1 + 8] = _mm256_add_epi32( m1[1 + 8], m1[3 + 8] );
+    m2[2 + 8] = _mm256_sub_epi32( m1[0 + 8], m1[2 + 8] );
+    m2[3 + 8] = _mm256_sub_epi32( m1[1 + 8], m1[3 + 8] );
+    m2[4 + 8] = _mm256_add_epi32( m1[4 + 8], m1[6 + 8] );
+    m2[5 + 8] = _mm256_add_epi32( m1[5 + 8], m1[7 + 8] );
+    m2[6 + 8] = _mm256_sub_epi32( m1[4 + 8], m1[6 + 8] );
+    m2[7 + 8] = _mm256_sub_epi32( m1[5 + 8], m1[7 + 8] );
+
+    m1[0 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[0 + 8], m2[1 + 8] ) );
+    m1[1 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0 + 8], m2[1 + 8] ) );
+    m1[2 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[2 + 8], m2[3 + 8] ) );
+    m1[3 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2 + 8], m2[3 + 8] ) );
+    m1[4 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[4 + 8], m2[5 + 8] ) );
+    m1[5 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4 + 8], m2[5 + 8] ) );
+    m1[6 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[6 + 8], m2[7 + 8] ) );
+    m1[7 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6 + 8], m2[7 + 8] ) );
 
-        m1[0 + 8] = _mm256_add_epi32( m2[0 + 8], m2[4 + 8] );
-        m1[1 + 8] = _mm256_add_epi32( m2[1 + 8], m2[5 + 8] );
-        m1[2 + 8] = _mm256_add_epi32( m2[2 + 8], m2[6 + 8] );
-        m1[3 + 8] = _mm256_add_epi32( m2[3 + 8], m2[7 + 8] );
-        m1[4 + 8] = _mm256_sub_epi32( m2[0 + 8], m2[4 + 8] );
-        m1[5 + 8] = _mm256_sub_epi32( m2[1 + 8], m2[5 + 8] );
-        m1[6 + 8] = _mm256_sub_epi32( m2[2 + 8], m2[6 + 8] );
-        m1[7 + 8] = _mm256_sub_epi32( m2[3 + 8], m2[7 + 8] );
-
-        m2[0 + 8] = _mm256_add_epi32( m1[0 + 8], m1[2 + 8] );
-        m2[1 + 8] = _mm256_add_epi32( m1[1 + 8], m1[3 + 8] );
-        m2[2 + 8] = _mm256_sub_epi32( m1[0 + 8], m1[2 + 8] );
-        m2[3 + 8] = _mm256_sub_epi32( m1[1 + 8], m1[3 + 8] );
-        m2[4 + 8] = _mm256_add_epi32( m1[4 + 8], m1[6 + 8] );
-        m2[5 + 8] = _mm256_add_epi32( m1[5 + 8], m1[7 + 8] );
-        m2[6 + 8] = _mm256_sub_epi32( m1[4 + 8], m1[6 + 8] );
-        m2[7 + 8] = _mm256_sub_epi32( m1[5 + 8], m1[7 + 8] );
-
-        m1[0 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[0 + 8], m2[1 + 8] ) );
-        m1[1 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0 + 8], m2[1 + 8] ) );
-        m1[2 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[2 + 8], m2[3 + 8] ) );
-        m1[3 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2 + 8], m2[3 + 8] ) );
-        m1[4 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[4 + 8], m2[5 + 8] ) );
-        m1[5 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4 + 8], m2[5 + 8] ) );
-        m1[6 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[6 + 8], m2[7 + 8] ) );
-        m1[7 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6 + 8], m2[7 + 8] ) );
-      }
-      // sum up
-      m1[0] = _mm256_add_epi32( m1[0], m1[1] );
-      m1[1] = _mm256_add_epi32( m1[2], m1[3] );
-      m1[2] = _mm256_add_epi32( m1[4], m1[5] );
-      m1[3] = _mm256_add_epi32( m1[6], m1[7] );
-      m1[4] = _mm256_add_epi32( m1[8], m1[9] );
-      m1[5] = _mm256_add_epi32( m1[10], m1[11] );
-      m1[6] = _mm256_add_epi32( m1[12], m1[13] );
-      m1[7] = _mm256_add_epi32( m1[14], m1[15] );
-    }
-    else
-    {
-      // 16x8
-      m2[0] = _mm256_permute2x128_si256( m2[0], m2[0 + 8], 0x20 );
-      m2[1] = _mm256_permute2x128_si256( m2[1], m2[1 + 8], 0x20 );
-      m2[2] = _mm256_permute2x128_si256( m2[2], m2[2 + 8], 0x20 );
-      m2[3] = _mm256_permute2x128_si256( m2[3], m2[3 + 8], 0x20 );
-      m2[4] = _mm256_permute2x128_si256( m2[4], m2[4 + 8], 0x20 );
-      m2[5] = _mm256_permute2x128_si256( m2[5], m2[5 + 8], 0x20 );
-      m2[6] = _mm256_permute2x128_si256( m2[6], m2[6 + 8], 0x20 );
-      m2[7] = _mm256_permute2x128_si256( m2[7], m2[7 + 8], 0x20 );
-
-      m1[0] = _mm256_add_epi16( m2[0], m2[4] );
-      m1[1] = _mm256_add_epi16( m2[1], m2[5] );
-      m1[2] = _mm256_add_epi16( m2[2], m2[6] );
-      m1[3] = _mm256_add_epi16( m2[3], m2[7] );
-      m1[4] = _mm256_sub_epi16( m2[0], m2[4] );
-      m1[5] = _mm256_sub_epi16( m2[1], m2[5] );
-      m1[6] = _mm256_sub_epi16( m2[2], m2[6] );
-      m1[7] = _mm256_sub_epi16( m2[3], m2[7] );
-
-      m2[0] = _mm256_add_epi16( m1[0], m1[2] );
-      m2[1] = _mm256_add_epi16( m1[1], m1[3] );
-      m2[2] = _mm256_sub_epi16( m1[0], m1[2] );
-      m2[3] = _mm256_sub_epi16( m1[1], m1[3] );
-      m2[4] = _mm256_add_epi16( m1[4], m1[6] );
-      m2[5] = _mm256_add_epi16( m1[5], m1[7] );
-      m2[6] = _mm256_sub_epi16( m1[4], m1[6] );
-      m2[7] = _mm256_sub_epi16( m1[5], m1[7] );
-
-      m1[0] = _mm256_abs_epi16( _mm256_add_epi16( m2[0], m2[1] ) );
-      m1[1] = _mm256_abs_epi16( _mm256_sub_epi16( m2[0], m2[1] ) );
-      m1[2] = _mm256_abs_epi16( _mm256_add_epi16( m2[2], m2[3] ) );
-      m1[3] = _mm256_abs_epi16( _mm256_sub_epi16( m2[2], m2[3] ) );
-      m1[4] = _mm256_abs_epi16( _mm256_add_epi16( m2[4], m2[5] ) );
-      m1[5] = _mm256_abs_epi16( _mm256_sub_epi16( m2[4], m2[5] ) );
-      m1[6] = _mm256_abs_epi16( _mm256_add_epi16( m2[6], m2[7] ) );
-      m1[7] = _mm256_abs_epi16( _mm256_sub_epi16( m2[6], m2[7] ) );
-
-      __m256i ma1, ma2;
-
-      for( int i = 0; i < 8; i++ )
-      {
-        ma1 = _mm256_unpacklo_epi16( m1[i], vzero );
-        ma2 = _mm256_unpackhi_epi16( m1[i], vzero );
-        m1[i] = _mm256_add_epi32( ma1, ma2 );
-      }
-    }
+    // sum up
+    m1[0] = _mm256_add_epi32( m1[0], m1[1] );
+    m1[1] = _mm256_add_epi32( m1[2], m1[3] );
+    m1[2] = _mm256_add_epi32( m1[4], m1[5] );
+    m1[3] = _mm256_add_epi32( m1[6], m1[7] );
+    m1[4] = _mm256_add_epi32( m1[8], m1[9] );
+    m1[5] = _mm256_add_epi32( m1[10], m1[11] );
+    m1[6] = _mm256_add_epi32( m1[12], m1[13] );
+    m1[7] = _mm256_add_epi32( m1[14], m1[15] );
 
     // sum up
     m1[ 0] = _mm256_add_epi32( m1[ 0], m1[ 1] );
@@ -2286,7 +1974,7 @@ static uint32_t xCalcHAD8x16_AVX2( const Torg *piOrg, const Tcur *piCur, const i
 }
 
 
-template< typename Torg, typename Tcur, X86_VEXT vext >
+template<X86_VEXT vext>
 Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
 {
   if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
@@ -2294,8 +1982,8 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
     return RdCost::xGetHADs( rcDtParam );
   }
 
-  const Torg*  piOrg = (const Torg*)rcDtParam.org.buf;
-  const Tcur*  piCur = (const Tcur*)rcDtParam.cur.buf;
+  const Pel*  piOrg = rcDtParam.org.buf;
+  const Pel*  piCur = rcDtParam.cur.buf;
   const int iRows = rcDtParam.org.height;
   const int iCols = rcDtParam.org.width;
   const int iStrideCur = rcDtParam.cur.stride;
@@ -2312,9 +2000,9 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
       for( x = 0; x < iCols; x += 16 )
       {
         if( vext >= AVX2 )
-          uiSum += xCalcHAD16x8_AVX2<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+          uiSum += xCalcHAD16x8_AVX2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
         else
-          uiSum += xCalcHAD16x8_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+          uiSum += xCalcHAD16x8_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
       }
       piOrg += iStrideOrg * 8;
       piCur += iStrideCur * 8;
@@ -2327,9 +2015,9 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
       for( x = 0; x < iCols; x += 8 )
       {
         if( vext >= AVX2 )
-          uiSum += xCalcHAD8x16_AVX2<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+          uiSum += xCalcHAD8x16_AVX2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
         else
-          uiSum += xCalcHAD8x16_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+          uiSum += xCalcHAD8x16_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
       }
       piOrg += iStrideOrg * 16;
       piCur += iStrideCur * 16;
@@ -2341,7 +2029,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
     {
       for( x = 0; x < iCols; x += 8 )
       {
-        uiSum += xCalcHAD8x4_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+        uiSum += xCalcHAD8x4_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
       }
       piOrg += iStrideOrg * 4;
       piCur += iStrideCur * 4;
@@ -2367,7 +2055,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
     {
       for( x = 0; x < iCols; x += 16 )
       {
-        uiSum += xCalcHAD16x16_AVX2<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+        uiSum += xCalcHAD16x16_AVX2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
       }
       piOrg += iOffsetOrg;
       piCur += iOffsetCur;
@@ -2381,7 +2069,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
     {
       for( x = 0; x < iCols; x += 8 )
       {
-        uiSum += xCalcHAD8x8_SSE<Torg, Tcur>( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
+        uiSum += xCalcHAD8x8_SSE( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iBitDepth );
       }
       piOrg += iOffsetOrg;
       piCur += iOffsetCur;
@@ -2410,7 +2098,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
     {
       for( x = 0; x < iCols; x += 2 )
       {
-        uiSum += xCalcHADs2x2( (Torg*)&piOrg[x], (Tcur*)&piCur[x*rcDtParam.step], iStrideOrg, iStrideCur, rcDtParam.step );
+        uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*rcDtParam.step], iStrideOrg, iStrideCur, rcDtParam.step );
       }
       piOrg += iOffsetOrg;
       piCur += iOffsetCur;
@@ -2421,6 +2109,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
     THROW( "Unsupported size" );
   }
 
+
   return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
 }
 
@@ -2451,14 +2140,14 @@ void RdCost::_initRdCostX86()
   m_afpDistortFunc[DF_SAD24  ] = RdCost::xGetSAD_SIMD<vext>;
   m_afpDistortFunc[DF_SAD48  ] = RdCost::xGetSAD_SIMD<vext>;
 
-  m_afpDistortFunc[DF_HAD]     = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD2]    = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD4]    = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD8]    = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD16]   = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD32]   = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD64]   = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
-  m_afpDistortFunc[DF_HAD16N]  = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
+  m_afpDistortFunc[DF_HAD]     = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD2]    = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD4]    = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD8]    = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD16]   = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD32]   = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD64]   = RdCost::xGetHADs_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD16N]  = RdCost::xGetHADs_SIMD<vext>;
 
   m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>;
 }
diff --git a/source/Lib/DecoderLib/AnnexBread.cpp b/source/Lib/DecoderLib/AnnexBread.cpp
index 870e2b19381e9efcf5bf8f85664f44818717b85a..7058de923b32d00ecf690cabd1961393eda7a466 100644
--- a/source/Lib/DecoderLib/AnnexBread.cpp
+++ b/source/Lib/DecoderLib/AnnexBread.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -79,7 +79,11 @@ _byteStreamNALUnit(
   {
     uint8_t leading_zero_8bits = bs.readByte();
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
+#if EPBINCOUNT_FIX
+    statBits.bits+=8;
+#else
     statBits.bits+=8; statBits.count++;
+#endif
 #endif
     if(leading_zero_8bits != 0) { THROW( "Leading zero bits not zero" ); }
     stats.m_numLeadingZero8BitsBytes++;
@@ -97,7 +101,11 @@ _byteStreamNALUnit(
   {
     uint8_t zero_byte = bs.readByte();
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
+#if EPBINCOUNT_FIX
+    statBits.bits+=8;
+#else
     statBits.bits+=8; statBits.count++;
+#endif
 #endif
     CHECK( zero_byte != 0, "Zero byte not '0'" );
     stats.m_numZeroByteBytes++;
@@ -111,7 +119,11 @@ _byteStreamNALUnit(
   /* NB, (1) guarantees that the next three bytes are 0x00 00 01 */
   uint32_t start_code_prefix_one_3bytes = bs.readBytes(24/8);
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
+#if EPBINCOUNT_FIX
+  statBits.bits+=24;
+#else
   statBits.bits+=24; statBits.count+=3;
+#endif
 #endif
   if(start_code_prefix_one_3bytes != 0x000001) { THROW( "Invalid code prefix" );}
   stats.m_numStartCodePrefixBytes += 3;
@@ -163,7 +175,11 @@ _byteStreamNALUnit(
   {
     uint8_t trailing_zero_8bits = bs.readByte();
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
+#if EPBINCOUNT_FIX
+    statBits.bits+=8;
+#else
     statBits.bits+=8; statBits.count++;
+#endif
 #endif
     CHECK( trailing_zero_8bits != 0, "Trailing zero bits not '0'" );
     stats.m_numTrailingZero8BitsBytes++;
diff --git a/source/Lib/DecoderLib/AnnexBread.h b/source/Lib/DecoderLib/AnnexBread.h
index 659c4bc2c8fadbe9e92b0da9474a409f4b8ea6ad..6f9c7334d7133ccddc0c402c11542109d3e12b90 100644
--- a/source/Lib/DecoderLib/AnnexBread.h
+++ b/source/Lib/DecoderLib/AnnexBread.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/DecoderLib/BinDecoder.cpp b/source/Lib/DecoderLib/BinDecoder.cpp
index 49b3ea2f3a2d91734b2acdda7f3d881c1673c878..81d4783ba41efb3cc7bcf75c1d203ea2c5ee40bc 100644
--- a/source/Lib/DecoderLib/BinDecoder.cpp
+++ b/source/Lib/DecoderLib/BinDecoder.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -180,48 +180,33 @@ unsigned BinDecoderBase::decodeBinsEP( unsigned numBins )
   return bins;
 }
 
-unsigned BinDecoderBase::decodeRemAbsEP( unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange )
+unsigned BinDecoderBase::decodeRemAbsEP(unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange)
 {
-  unsigned cutoff = COEF_REMAIN_BIN_REDUCTION;
   unsigned prefix = 0;
-  useLimitedPrefixLength = true;
-  if( useLimitedPrefixLength )
   {
     const unsigned  maxPrefix = 32 - maxLog2TrDynamicRange;
-    unsigned        codeWord  = 0;
+    unsigned        codeWord = 0;
     do
     {
       prefix++;
       codeWord = decodeBinEP();
-    }
-    while( codeWord && prefix < maxPrefix );
+    } while (codeWord && prefix < maxPrefix);
     prefix -= 1 - codeWord;
   }
-  else
-  {
-    while( decodeBinEP() )
-    {
-      prefix++;
-    }
-  }
+
   unsigned length = goRicePar, offset;
-  if( prefix < cutoff )
+  if (prefix < cutoff)
   {
-    offset    = prefix << goRicePar;
+    offset = prefix << goRicePar;
   }
   else
   {
-    offset    = ( ( ( 1 << ( prefix - cutoff ) ) + cutoff - 1 ) << goRicePar );
-    if( useLimitedPrefixLength )
+    offset = (((1 << (prefix - cutoff)) + cutoff - 1) << goRicePar);
     {
-      length += ( prefix == ( 32 - maxLog2TrDynamicRange ) ? maxLog2TrDynamicRange - goRicePar : prefix - COEF_REMAIN_BIN_REDUCTION );
-    }
-    else
-    {
-      length += ( prefix - cutoff );
+      length += (prefix == (32 - maxLog2TrDynamicRange) ? maxLog2TrDynamicRange - goRicePar : prefix - cutoff);
     }
   }
-  return offset + decodeBinsEP( length );
+  return offset + decodeBinsEP(length);
 }
 
 
@@ -257,15 +242,6 @@ unsigned BinDecoderBase::decodeBinTrm()
 }
 
 
-unsigned BinDecoderBase::decodeBinsPCM( unsigned numBins )
-{
-  unsigned bins = 0;
-  m_Bitstream->read( numBins, bins );
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-  CodingStatistics::IncrementStatisticEP( STATS__CABAC_PCM_CODE_BITS, numBins, int(bins) );
-#endif
-  return bins;
-}
 
 
 void BinDecoderBase::align()
diff --git a/source/Lib/DecoderLib/BinDecoder.h b/source/Lib/DecoderLib/BinDecoder.h
index 2e45c0d250b3e27208ffdc275f40f2dd5ead1cd8..11a4260974e322f69032b782d7f64522b93b686d 100644
--- a/source/Lib/DecoderLib/BinDecoder.h
+++ b/source/Lib/DecoderLib/BinDecoder.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -71,9 +71,8 @@ public:
 public:
   unsigned          decodeBinEP         ();
   unsigned          decodeBinsEP        ( unsigned numBins  );
-  unsigned          decodeRemAbsEP      ( unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange );
+  unsigned          decodeRemAbsEP      ( unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange );
   unsigned          decodeBinTrm        ();
-  unsigned          decodeBinsPCM       ( unsigned numBins  );
   void              align               ();
   unsigned          getNumBitsRead      () { return m_Bitstream->getNumBitsRead() + m_bitsNeeded; }
 private:
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 6a5c179aee44bb4673824c41d683a9ba7a6d3838..17abe59be5eba95409a82b8364c6a4dcf08569d8 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -130,45 +130,38 @@ void CABACReader::remaining_bytes( bool noTrailingBytesExpected )
 //================================================================================
 //  clause 7.3.8.2
 //--------------------------------------------------------------------------------
-//    bool  coding_tree_unit( cs, area, qpL, qpC, ctuRsAddr )
+//    void  coding_tree_unit( cs, area, qpL, qpC, ctuRsAddr )
 //================================================================================
 
-bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr )
+void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr )
 {
   CUCtx cuCtx( qps[CH_L] );
-  Partitioner *partitioner = PartitionerFactory::get( *cs.slice );
+  QTBTPartitioner partitioner;
 
-  partitioner->initCtu( area, CH_L, *cs.slice );
+  partitioner.initCtu(area, CH_L, *cs.slice);
+  cs.treeType = partitioner.treeType = TREE_D;
+  cs.modeType = partitioner.modeType = MODE_TYPE_ALL;
 
 
   sao( cs, ctuRsAddr );
-
-  if (cs.sps->getALFEnabledFlag() && (cs.slice->getTileGroupAlfEnabledFlag()))
+  if (cs.sps->getALFEnabledFlag() && (cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y)))
   {
-    CHECK(cs.aps == nullptr, "APS not initialized");
-    const AlfSliceParam& alfSliceParam = cs.aps->getAlfAPSParam();
-
     const PreCalcValues& pcv = *cs.pcv;
     int                 frame_width_in_ctus = pcv.widthInCtus;
     int                 ry = ctuRsAddr / frame_width_in_ctus;
     int                 rx = ctuRsAddr - ry * frame_width_in_ctus;
     const Position      pos( rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight );
     const uint32_t          curSliceIdx = cs.slice->getIndependentSliceIdx();
-#if HEVC_TILES_WPP
-    const uint32_t          curTileIdx = cs.picture->tileMap->getTileIdxMap( pos );
-    bool                leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, curTileIdx, CH_L ) ? true : false;
-    bool                aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, curTileIdx, CH_L ) ? true : false;
-#else
-    bool                leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, CH_L ) ? true : false;
-    bool                aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, CH_L ) ? true : false;
-#endif
+    const uint32_t          curTileIdx = cs.pps->getTileIdx( pos );
+    bool                leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+    bool                aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
 
     int leftCTUAddr = leftAvail ? ctuRsAddr - 1 : -1;
     int aboveCTUAddr = aboveAvail ? ctuRsAddr - frame_width_in_ctus : -1;
 
     for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
     {
-      if( alfSliceParam.enabledFlag[compIdx] )
+      if (cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx))
       {
         uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx );
         int ctx = 0;
@@ -177,31 +170,50 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
 
         RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__ALF);
         ctbAlfFlag[ctuRsAddr] = m_BinDecoder.decodeBin( Ctx::ctbAlfFlag( compIdx * 3 + ctx ) );
+
+        if (isLuma((ComponentID)compIdx) && ctbAlfFlag[ctuRsAddr])
+        {
+          readAlfCtuFilterIndex(cs, ctuRsAddr);
+        }
+        if( isChroma( (ComponentID)compIdx ) )
+        {
+          int apsIdx = cs.slice->getTileGroupApsIdChroma();
+          CHECK(cs.slice->getAlfAPSs()[apsIdx] == nullptr, "APS not initialized");
+          const AlfParam& alfParam = cs.slice->getAlfAPSs()[apsIdx]->getAlfAPSParam();
+          const int numAlts = alfParam.numAlternativesChroma;
+          uint8_t* ctbAlfAlternative = cs.slice->getPic()->getAlfCtuAlternativeData( compIdx );
+          ctbAlfAlternative[ctuRsAddr] = 0;
+          if( ctbAlfFlag[ctuRsAddr] )
+          {
+            uint8_t decoded = 0;
+            while( decoded < numAlts-1 && m_BinDecoder.decodeBin( Ctx::ctbAlfAlternative( compIdx-1 ) ) )
+              ++ decoded;
+            ctbAlfAlternative[ctuRsAddr] = decoded;
+          }
+        }
       }
     }
   }
 
-  bool isLast = false;
 
   if ( CS::isDualITree(cs) && cs.pcv->chrFormat != CHROMA_400 && cs.pcv->maxCUWidth > 64 )
   {
-    Partitioner *chromaPartitioner = PartitionerFactory::get(*cs.slice);
-    chromaPartitioner->initCtu(area, CH_C, *cs.slice);
+    QTBTPartitioner chromaPartitioner;
+    chromaPartitioner.initCtu(area, CH_C, *cs.slice);
     CUCtx cuCtxChroma(qps[CH_C]);
-    isLast = coding_tree(cs, *partitioner, cuCtx, chromaPartitioner, &cuCtxChroma);
+    coding_tree(cs, partitioner, cuCtx, &chromaPartitioner, &cuCtxChroma);
     qps[CH_L] = cuCtx.qp;
     qps[CH_C] = cuCtxChroma.qp;
-    delete chromaPartitioner;
   }
   else
   {
-    isLast = coding_tree(cs, *partitioner, cuCtx);
+    coding_tree(cs, partitioner, cuCtx);
     qps[CH_L] = cuCtx.qp;
-    if( !isLast && CS::isDualITree( cs ) && cs.pcv->chrFormat != CHROMA_400 )
+    if( CS::isDualITree( cs ) && cs.pcv->chrFormat != CHROMA_400 )
     {
       CUCtx cuCtxChroma( qps[CH_C] );
-      partitioner->initCtu( area, CH_C, *cs.slice );
-      isLast = coding_tree( cs, *partitioner, cuCtxChroma );
+      partitioner.initCtu(area, CH_C, *cs.slice);
+      coding_tree(cs, partitioner, cuCtxChroma);
       qps[CH_C] = cuCtxChroma.qp;
     }
   }
@@ -209,10 +221,36 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
   DTRACE_COND( ctuRsAddr == 0, g_trace_ctx, D_QP_PER_CTU, "\n%4d %2d", cs.picture->poc, cs.slice->getSliceQpBase() );
   DTRACE     (                 g_trace_ctx, D_QP_PER_CTU, " %3d",           qps[CH_L] - cs.slice->getSliceQpBase() );
 
-  delete partitioner;
-  return isLast;
 }
 
+void CABACReader::readAlfCtuFilterIndex(CodingStructure& cs, unsigned ctuRsAddr)
+{
+  short* alfCtbFilterSetIndex = cs.slice->getPic()->getAlfCtbFilterIndex();
+  unsigned numAps = cs.slice->getTileGroupNumAps();
+  unsigned numAvailableFiltSets = numAps + NUM_FIXED_FILTER_SETS;
+  uint32_t filtIndex = 0;
+  if (numAvailableFiltSets > NUM_FIXED_FILTER_SETS)
+  {
+    unsigned usePrevFilt = m_BinDecoder.decodeBin(Ctx::AlfUseTemporalFilt());
+    if (usePrevFilt)
+    {
+      if (numAps > 1)
+      {
+        xReadTruncBinCode(filtIndex, numAvailableFiltSets - NUM_FIXED_FILTER_SETS);
+      }
+      filtIndex += (unsigned)(NUM_FIXED_FILTER_SETS);
+    }
+    else
+    {
+      xReadTruncBinCode(filtIndex, NUM_FIXED_FILTER_SETS);
+    }
+  }
+  else
+  {
+    xReadTruncBinCode(filtIndex, NUM_FIXED_FILTER_SETS);
+  }
+  alfCtbFilterSetIndex[ctuRsAddr] = filtIndex;
+}
 //================================================================================
 //  clause 7.3.8.3
 //--------------------------------------------------------------------------------
@@ -250,22 +288,14 @@ void CABACReader::sao( CodingStructure& cs, unsigned ctuRsAddr )
 
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SAO );
 
-#if HEVC_TILES_WPP
-  const unsigned  curTileIdx  = cs.picture->tileMap->getTileIdxMap( pos );
-  if( cs.getCURestricted( pos.offset(-(int)cs.pcv->maxCUWidth, 0), curSliceIdx, curTileIdx, CH_L ) )
-#else
-  if( cs.getCURestricted( pos.offset(-(int)cs.pcv->maxCUWidth, 0), curSliceIdx, CH_L ) )
-#endif
+  const unsigned  curTileIdx  = cs.pps->getTileIdx( pos );
+  if( cs.getCURestricted( pos.offset(-(int)cs.pcv->maxCUWidth, 0), pos, curSliceIdx, curTileIdx, CH_L ) )
   {
     // sao_merge_left_flag
     sao_merge_type  += int( m_BinDecoder.decodeBin( Ctx::SaoMergeFlag() ) );
   }
 
-#if HEVC_TILES_WPP
-  if( sao_merge_type < 0 && cs.getCURestricted( pos.offset(0, -(int)cs.pcv->maxCUHeight), curSliceIdx, curTileIdx, CH_L ) )
-#else
-  if( sao_merge_type < 0 && cs.getCURestricted( pos.offset(0, -(int)cs.pcv->maxCUHeight), curSliceIdx, CH_L ) )
-#endif
+  if( sao_merge_type < 0 && cs.getCURestricted( pos.offset(0, -(int)cs.pcv->maxCUHeight), pos, curSliceIdx, curTileIdx, CH_L ) )
   {
     // sao_merge_above_flag
     sao_merge_type  += int( m_BinDecoder.decodeBin( Ctx::SaoMergeFlag() ) ) << 1;
@@ -373,19 +403,19 @@ void CABACReader::sao( CodingStructure& cs, unsigned ctuRsAddr )
 //================================================================================
 //  clause 7.3.8.4
 //--------------------------------------------------------------------------------
-//    bool  coding_tree       ( cs, partitioner, cuCtx )
+//    void  coding_tree       ( cs, partitioner, cuCtx )
 //    bool  split_cu_flag     ( cs, partitioner )
 //    split split_cu_mode_mt  ( cs, partitioner )
 //================================================================================
 
-bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, Partitioner* pPartitionerChroma, CUCtx* pCuCtxChroma)
+void CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, Partitioner* pPartitionerChroma, CUCtx* pCuCtxChroma)
 {
   const PPS      &pps         = *cs.pps;
   const UnitArea &currArea    = partitioner.currArea();
-  bool           lastSegment  = false;
 
   // Reset delta QP coding flag and ChromaQPAdjustemt coding flag
-  if( pps.getUseDQP() && partitioner.currQgEnable() )
+  //Note: do not reset qg at chroma CU
+  if( pps.getUseDQP() && partitioner.currQgEnable() && !isChroma(partitioner.chType) )
   {
     cuCtx.qgStart    = true;
     cuCtx.isDQPCoded = false;
@@ -408,7 +438,6 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
       pCuCtxChroma->isChromaQpAdjCoded = false;
     }
   }
-  int startShareThisLevel = 0;
 
   const PartSplit splitMode = split_cu_mode( cs, partitioner );
 
@@ -416,31 +445,6 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
 
   if( splitMode != CU_DONT_SPLIT )
   {
-      const PartSplit split = splitMode;
-      int splitRatio = 1;
-      CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT
-        || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type");
-      splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2;
-
-      bool isOneChildSmall = (((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) >> splitRatio) < MRG_SHARELIST_SHARSIZE;
-
-      if ((((partitioner.currArea().lwidth())*(partitioner.currArea().lheight())) > (MRG_SHARELIST_SHARSIZE * 1)))
-      {
-        shareStateDec = NO_SHARE;
-      }
-
-      if (shareStateDec == NO_SHARE)//init state
-      {
-        if (isOneChildSmall)
-        {
-          shareStateDec = SHARING;//share start state
-          startShareThisLevel = 1;
-
-          shareParentPos = partitioner.currArea().lumaPos();
-          shareParentSize.width = partitioner.currArea().lwidth();
-          shareParentSize.height = partitioner.currArea().lheight();
-        }
-      }
       if (CS::isDualITree(cs) && pPartitionerChroma != nullptr && (partitioner.currArea().lwidth() >= 64 || partitioner.currArea().lheight() >= 64))
       {
         partitioner.splitCurrArea(CU_QUAD_SPLIT, cs);
@@ -448,15 +452,14 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
         bool beContinue = true;
         bool lumaContinue = true;
         bool chromaContinue = true;
-        bool lastSegmentC = false;
 
         while (beContinue)
         {
           if (partitioner.currArea().lwidth() > 64 || partitioner.currArea().lheight() > 64)
           {
-            if (!lastSegmentC && cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
+            if (cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
             {
-              lastSegmentC = coding_tree(cs, partitioner, cuCtx, pPartitionerChroma, pCuCtxChroma);
+              coding_tree(cs, partitioner, cuCtx, pPartitionerChroma, pCuCtxChroma);
             }
             lumaContinue = partitioner.nextPart(cs);
             chromaContinue = pPartitionerChroma->nextPart(cs);
@@ -466,18 +469,17 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
           else
           {
             //dual tree coding under 64x64 block
-            if (!lastSegment && cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
+            if (cs.area.blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
             {
-              lastSegment = coding_tree(cs, partitioner, cuCtx);
+              coding_tree(cs, partitioner, cuCtx);
             }
             lumaContinue = partitioner.nextPart(cs);
-            if (!lastSegmentC && cs.area.blocks[pPartitionerChroma->chType].contains(pPartitionerChroma->currArea().blocks[pPartitionerChroma->chType].pos()))
+            if (cs.area.blocks[pPartitionerChroma->chType].contains(pPartitionerChroma->currArea().blocks[pPartitionerChroma->chType].pos()))
             {
-              lastSegmentC = coding_tree(cs, *pPartitionerChroma, *pCuCtxChroma);
+              coding_tree(cs, *pPartitionerChroma, *pCuCtxChroma);
             }
             chromaContinue = pPartitionerChroma->nextPart(cs);
             CHECK(lumaContinue != chromaContinue, "luma chroma partition should be matched");
-            CHECK(lastSegment == true, "luma should not be the last segment");
             beContinue = lumaContinue;
           }
         }
@@ -516,33 +518,57 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
         CodingUnit* chromaFirstCu = cs.getCU(pPartitionerChroma->currArea().chromaPos(), CHANNEL_TYPE_CHROMA);
         tempLastLumaCu->next = chromaFirstCu;
 
-        lastSegment = lastSegmentC;
       }
       else
       {
+        const ModeType modeTypeParent = partitioner.modeType;
+        cs.modeType = partitioner.modeType = mode_constraint( cs, partitioner, splitMode ); //change for child nodes
+        //decide chroma split or not
+        bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && partitioner.modeType == MODE_TYPE_INTRA;
+        CHECK( chromaNotSplit && partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" );
+        if( partitioner.treeType == TREE_D )
+        {
+          cs.treeType = partitioner.treeType = chromaNotSplit ? TREE_L : TREE_D;
+        }
       partitioner.splitCurrArea( splitMode, cs );
       do
       {
-        if( !lastSegment && cs.area.blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) )
+        if( cs.area.blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) )
         {
-          lastSegment = coding_tree( cs, partitioner, cuCtx );
+          coding_tree( cs, partitioner, cuCtx );
         }
       } while( partitioner.nextPart( cs ) );
 
       partitioner.exitCurrSplit();
+      if( chromaNotSplit )
+      {
+        CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "must be luma status" );
+        partitioner.chType = CHANNEL_TYPE_CHROMA;
+        cs.treeType = partitioner.treeType = TREE_C;
+
+        if( cs.picture->blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) )
+        {
+          coding_tree( cs, partitioner, cuCtx );
+        }
+
+        //recover treeType
+        partitioner.chType = CHANNEL_TYPE_LUMA;
+        cs.treeType = partitioner.treeType = TREE_D;
+      }
+
+      //recover ModeType
+      cs.modeType = partitioner.modeType = modeTypeParent;
       }
-      if (startShareThisLevel == 1)
-        shareStateDec = NO_SHARE;
-      return lastSegment;
+      return;
   }
 
   CodingUnit& cu = cs.addCU( CS::getArea( cs, currArea, partitioner.chType ), partitioner.chType );
 
   partitioner.setCUData( cu );
   cu.slice   = cs.slice;
-#if HEVC_TILES_WPP
-  cu.tileIdx = cs.picture->tileMap->getTileIdxMap( currArea.lumaPos() );
-#endif
+  cu.tileIdx = cs.pps->getTileIdx( currArea.lumaPos() );
+  CHECK( cu.cs->treeType != partitioner.treeType, "treeType mismatch" );
+  int lumaQPinLocalDualTree = -1;
 
   // Predict QP on start of quantization group
   if( cuCtx.qgStart )
@@ -551,11 +577,15 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
     cuCtx.qp = CU::predictQP( cu, cuCtx.qp );
   }
 
-  if (pps.getUseDQP() && CS::isDualITree(cs) && isChroma(cu.chType))
+  if (pps.getUseDQP() && partitioner.isSepTree(cs) && isChroma(cu.chType))
   {
     const Position chromaCentral(cu.chromaPos().offset(cu.chromaSize().width >> 1, cu.chromaSize().height >> 1));
     const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, cu.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, cu.chromaFormat));
-    const CodingUnit* colLumaCu = cs.getCU(lumaRefPos, CHANNEL_TYPE_LUMA);
+    //derive chroma qp, but the chroma qp is saved in cuCtx.qp which is used for luma qp
+    //therefore, after decoding the chroma CU, the cuCtx.qp shall be recovered to luma qp in order to decode next luma cu qp
+    const CodingUnit* colLumaCu = cs.getLumaCU( lumaRefPos );
+    CHECK( colLumaCu == nullptr, "colLumaCU shall exist" );
+    lumaQPinLocalDualTree = cuCtx.qp;
 
     if (colLumaCu) cuCtx.qp = colLumaCu->qp;
   }
@@ -564,15 +594,69 @@ bool CABACReader::coding_tree( CodingStructure& cs, Partitioner& partitioner, CU
   cu.chromaQpAdj = cs.chromaQpAdj;  //NOTE: CU chroma QP adjustment can be changed by adjustment signaling at TU level
 
   // coding unit
-    cu.shareParentPos = (shareStateDec == SHARING) ? shareParentPos : partitioner.currArea().lumaPos();
-    cu.shareParentSize = (shareStateDec == SHARING) ? shareParentSize : partitioner.currArea().lumaSize();
 
-  bool isLastCtu = coding_unit( cu, partitioner, cuCtx );
+  coding_unit( cu, partitioner, cuCtx );
+  //recover cuCtx.qp to luma qp after decoding the chroma CU
+  if( pps.getUseDQP() && partitioner.isSepTree( cs ) && isChroma( cu.chType ) )
+  {
+    cuCtx.qp = lumaQPinLocalDualTree;
+  }
 
+  uint32_t compBegin;
+  uint32_t numComp;
+  bool jointPLT = false;
+  if (cu.isSepTree())
+  {
+    if (isLuma(partitioner.chType))
+    {
+      compBegin = COMPONENT_Y;
+      numComp = 1;
+    }
+    else
+    {
+      compBegin = COMPONENT_Cb;
+      numComp = 2;
+    }
+  }
+  else
+  {
+    compBegin = COMPONENT_Y;
+    numComp = 3;
+    jointPLT = true;
+  }
+  if (CU::isPLT(cu))
+  {
+    cs.reorderPrevPLT(cs.prevPLT, cu.curPLTSize, cu.curPLT, cu.reuseflag, compBegin, numComp, jointPLT);
+  }
+  if( cu.chType == CHANNEL_TYPE_CHROMA )
+  {
+    DTRACE( g_trace_ctx, D_QP, "[chroma CU]x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Cb().x, cu.Cb().y, cu.Cb().width, cu.Cb().height, cu.qp );
+  }
+  else
+  {
   DTRACE( g_trace_ctx, D_QP, "x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, cu.qp );
-  if (startShareThisLevel == 1)
-    shareStateDec = NO_SHARE;
-  return isLastCtu;
+  }
+}
+
+ModeType CABACReader::mode_constraint( CodingStructure& cs, Partitioner &partitioner, PartSplit splitMode )
+{
+  int val = cs.signalModeCons( splitMode, partitioner, partitioner.modeType );
+  if( val == LDT_MODE_TYPE_SIGNAL )
+  {
+    int ctxIdx = DeriveCtx::CtxModeConsFlag( cs, partitioner );
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MODE_CONSTRAINT_FLAG, partitioner.currArea().blocks[partitioner.chType].size(), partitioner.chType );
+    bool flag = m_BinDecoder.decodeBin( Ctx::ModeConsFlag( ctxIdx ) );
+    DTRACE( g_trace_ctx, D_SYNTAX, "mode_cons_flag() flag=%d\n", flag );
+    return flag ? MODE_TYPE_INTRA : MODE_TYPE_INTER;
+  }
+  else if( val == LDT_MODE_TYPE_INFER )
+  {
+    return MODE_TYPE_INTRA;
+  }
+  else
+  {
+    return partitioner.modeType;
+  }
 }
 
 PartSplit CABACReader::split_cu_mode( CodingStructure& cs, Partitioner &partitioner )
@@ -647,32 +731,25 @@ PartSplit CABACReader::split_cu_mode( CodingStructure& cs, Partitioner &partitio
 //================================================================================
 //  clause 7.3.8.5
 //--------------------------------------------------------------------------------
-//    bool  coding_unit               ( cu, partitioner, cuCtx )
-//    void  cu_transquant_bypass_flag ( cu )
+//    void  coding_unit               ( cu, partitioner, cuCtx )
 //    void  cu_skip_flag              ( cu )
 //    void  pred_mode                 ( cu )
 //    void  part_mode                 ( cu )
-//    void  pcm_flag                  ( cu )
-//    void  pcm_samples               ( tu )
 //    void  cu_pred_data              ( pus )
 //    void  cu_lic_flag               ( cu )
 //    void  intra_luma_pred_modes     ( pus )
 //    void  intra_chroma_pred_mode    ( pu )
 //    void  cu_residual               ( cu, partitioner, cuCtx )
 //    void  rqt_root_cbf              ( cu )
-//    bool  end_of_ctu                ( cu, cuCtx )
+//    void  end_of_ctu                ( cu, cuCtx )
 //================================================================================
 
-bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cuCtx )
+void CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cuCtx )
 {
   CodingStructure& cs = *cu.cs;
-  cs.chType = partitioner.chType;
-  // transquant bypass flag
-  if( cs.pps->getTransquantBypassEnabledFlag() )
-  {
-    cu_transquant_bypass_flag( cu );
-  }
-
+  CHECK( cu.treeType != partitioner.treeType || cu.modeType != partitioner.modeType, "treeType or modeType mismatch" );
+  DTRACE( g_trace_ctx, D_SYNTAX, "coding_unit() treeType=%d modeType=%d\n", cu.treeType, cu.modeType );
+  PredictionUnit&    pu = cs.addPU(cu, partitioner.chType);
   // skip flag
   if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag()) && cu.Y().valid())
   {
@@ -682,36 +759,47 @@ bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx&
   // skip data
   if( cu.skip )
   {
+    cu.colorTransform = false;
     cs.addTU         ( cu, partitioner.chType );
-    PredictionUnit&    pu = cs.addPU( cu, partitioner.chType );
-    pu.shareParentPos = cu.shareParentPos;
-    pu.shareParentSize = cu.shareParentSize;
     MergeCtx           mrgCtx;
     prediction_unit  ( pu, mrgCtx );
-    return end_of_ctu( cu, cuCtx );
+    end_of_ctu( cu, cuCtx );
+    return;
   }
 
   // prediction mode and partitioning data
   pred_mode ( cu );
-
-  // --> create PUs
-  CU::addPUs( cu );
-
-  // pcm samples
-  if( CU::isIntra(cu) )
+  if (CU::isIntra(cu))
+  {
+    adaptive_color_transform(cu);
+  }
+  if (CU::isPLT(cu))
   {
-    pcm_flag( cu, partitioner );
-    if( cu.ipcm )
+    cu.colorTransform = false;
+    cs.addTU(cu, partitioner.chType);
+    if (cu.isSepTree())
     {
-      TransformUnit& tu = cs.addTU( cu, partitioner.chType );
-      pcm_samples( tu );
-      return end_of_ctu( cu, cuCtx );
+      if (isLuma(partitioner.chType))
+      {
+        cu_palette_info(cu, COMPONENT_Y, 1, cuCtx);
+      }
+      if (cu.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA))
+      {
+        cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx);
+      }
     }
+    else
+    {
+      cu_palette_info(cu, COMPONENT_Y, 3, cuCtx);
+    }
+    end_of_ctu(cu, cuCtx);
+    return;
   }
+  bdpcm_mode( cu, ComponentID( partitioner.chType ) );
+  if (!CS::isDualITree(*cu.cs) && isLuma(partitioner.chType))
+      bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA));
 
-  extend_ref_line( cu );
-
-  isp_mode( cu );
+  // --> create PUs
 
   // prediction data ( intra prediction modes / reference indexes + motion vectors )
   cu_pred_data( cu );
@@ -720,28 +808,21 @@ bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx&
   cu_residual( cu, partitioner, cuCtx );
 
   // check end of cu
-  return end_of_ctu( cu, cuCtx );
-}
-
-
-void CABACReader::cu_transquant_bypass_flag( CodingUnit& cu )
-{
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__TQ_BYPASS_FLAG );
-
-  cu.transQuantBypass = ( m_BinDecoder.decodeBin( Ctx::TransquantBypassFlag() ) );
+  end_of_ctu( cu, cuCtx );
 }
 
-
 void CABACReader::cu_skip_flag( CodingUnit& cu )
 {
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SKIP_FLAG );
 
-  if (cu.slice->isIntra() && cu.cs->slice->getSPS()->getIBCFlag())
+  if ((cu.slice->isIntra() || cu.isConsIntra()) && cu.cs->slice->getSPS()->getIBCFlag())
   {
     cu.skip = false;
     cu.rootCbf = false;
     cu.predMode = MODE_INTRA;
     cu.mmvdSkip = false;
+    if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
+    {
     unsigned ctxId = DeriveCtx::CtxSkipFlag(cu);
     unsigned skip = m_BinDecoder.decodeBin(Ctx::SkipFlag(ctxId));
     DTRACE( g_trace_ctx, D_SYNTAX, "cu_skip_flag() ctx=%d skip=%d\n", ctxId, skip ? 1 : 0 );
@@ -752,10 +833,17 @@ void CABACReader::cu_skip_flag( CodingUnit& cu )
       cu.predMode = MODE_IBC;
       cu.mmvdSkip = false;
     }
-
+    }
+    return;
+  }
+  if ( !cu.cs->slice->getSPS()->getIBCFlag() && cu.lwidth() == 4 && cu.lheight() == 4 )
+  {
+    return;
+  }
+  if( !cu.cs->slice->getSPS()->getIBCFlag() && cu.isConsIntra() )
+  {
     return;
   }
-
   unsigned ctxId  = DeriveCtx::CtxSkipFlag(cu);
   unsigned skip   = m_BinDecoder.decodeBin( Ctx::SkipFlag(ctxId) );
 
@@ -763,6 +851,16 @@ void CABACReader::cu_skip_flag( CodingUnit& cu )
 
   if (skip && cu.cs->slice->getSPS()->getIBCFlag())
   {
+    if (cu.lwidth() < 128 && cu.lheight() < 128 && !cu.isConsInter()) // disable IBC mode larger than 64x64 and disable IBC when only allowing inter mode
+    {
+      if ( cu.lwidth() == 4 && cu.lheight() == 4 )
+      {
+        cu.skip     = true;
+        cu.rootCbf  = false;
+        cu.predMode = MODE_IBC;
+        cu.mmvdSkip = false;
+        return;
+      }
     unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
     if (m_BinDecoder.decodeBin(Ctx::IBCFlag(ctxidx)))
     {
@@ -770,23 +868,22 @@ void CABACReader::cu_skip_flag( CodingUnit& cu )
       cu.rootCbf = false;
       cu.predMode = MODE_IBC;
       cu.mmvdSkip = false;
+      cu.firstPU->regularMergeFlag = false;
     }
     else
     {
       cu.predMode = MODE_INTER;
     }
     DTRACE(g_trace_ctx, D_SYNTAX, "ibc() ctx=%d cu.predMode=%d\n", ctxidx, cu.predMode);
+    }
+    else
+    {
+      cu.predMode = MODE_INTER;
+    }
   }
   if ((skip && CU::isInter(cu) && cu.cs->slice->getSPS()->getIBCFlag()) ||
     (skip && !cu.cs->slice->getSPS()->getIBCFlag()))
   {
-#if JVET_MMVD_OFF_MACRO
-    cu.mmvdSkip = false;
-#else
-    unsigned mmvdSkip = m_BinDecoder.decodeBin(Ctx::MmvdFlag(0));
-    cu.mmvdSkip = mmvdSkip;
-    DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_cu_skip_flag() ctx=%d mmvd_skip=%d\n", 0, mmvdSkip ? 1 : 0);
-#endif
     cu.skip     = true;
     cu.rootCbf  = false;
     cu.predMode = MODE_INTER;
@@ -816,21 +913,30 @@ void CABACReader::imv_mode( CodingUnit& cu, MergeCtx& mrgCtx )
   const SPS *sps = cu.cs->sps;
 
   unsigned value = 0;
-  unsigned ctxId = DeriveCtx::CtxIMVFlag( cu );
   if (CU::isIBC(cu))
     value = 1;
   else
-    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) );
-  DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, ctxId );
+    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 0 ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 0 );
 
+    cu.imv = value;
   if( sps->getAMVREnabledFlag() && value )
   {
-    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 3 ) );
-    DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 3 );
+    if (!CU::isIBC(cu))
+    {
+      value = m_BinDecoder.decodeBin(Ctx::ImvFlag(4));
+      DTRACE(g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 4);
+      cu.imv = value ? 1 : IMV_HPEL;
+    }
+    if (value)
+    {
+    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 1 ) );
+    DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, 1 );
     value++;
+      cu.imv = value;
+    }
   }
 
-  cu.imv = value;
   DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() IMVFlag=%d\n", cu.imv );
 }
 
@@ -851,13 +957,13 @@ void CABACReader::affine_amvr_mode( CodingUnit& cu, MergeCtx& mrgCtx )
   }
 
   unsigned value = 0;
-  value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 4 ) );
-  DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 4 );
+  value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 2 ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 2 );
 
   if( value )
   {
-    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 5 ) );
-    DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 5 );
+    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( 3 ) );
+    DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", value, 3 );
     value++;
   }
 
@@ -868,60 +974,135 @@ void CABACReader::affine_amvr_mode( CodingUnit& cu, MergeCtx& mrgCtx )
 void CABACReader::pred_mode( CodingUnit& cu )
 {
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__PRED_MODE );
-
-  if (cu.cs->slice->getSPS()->getIBCFlag())
+  if (cu.cs->slice->getSPS()->getIBCFlag() && cu.chType != CHANNEL_TYPE_CHROMA)
   {
-    if (cu.cs->slice->isIntra())
+    if( cu.isConsInter() )
+    {
+      cu.predMode = MODE_INTER;
+      return;
+    }
+
+    if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() )
     {
       cu.predMode = MODE_INTRA;
+      if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
+      {
       unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
       if (m_BinDecoder.decodeBin(Ctx::IBCFlag(ctxidx)))
       {
         cu.predMode = MODE_IBC;
       }
+      }
+      if (!CU::isIBC(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+      {
+        if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0)))
+        {
+          cu.predMode = MODE_PLT;
+        }
+      }
     }
     else
     {
       if (m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))))
       {
         cu.predMode = MODE_INTRA;
+        if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+        {
+          if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0)))
+          {
+            cu.predMode = MODE_PLT;
+          }
+        }
       }
       else
       {
         cu.predMode = MODE_INTER;
+        if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
+        {
         unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
         if (m_BinDecoder.decodeBin(Ctx::IBCFlag(ctxidx)))
         {
           cu.predMode = MODE_IBC;
         }
+        }
       }
     }
   }
   else
   {
-    if (cu.cs->slice->isIntra() || m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))))
+    if( cu.isConsInter() )
+    {
+      cu.predMode = MODE_INTER;
+      return;
+    }
+
+    if ( cu.cs->slice->isIntra() || (cu.lwidth() == 4 && cu.lheight() == 4) || cu.isConsIntra() )
     {
       cu.predMode = MODE_INTRA;
+      if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+      {
+        if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0)))
+        {
+          cu.predMode = MODE_PLT;
+        }
+      }
     }
     else
     {
-      cu.predMode = MODE_INTER;
+      cu.predMode = m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))) ? MODE_INTRA : MODE_INTER;
+      if (CU::isIntra(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+      {
+        if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0)))
+        {
+          cu.predMode = MODE_PLT;
+        }
+      }
     }
   }
 }
-
-void CABACReader::pcm_flag( CodingUnit& cu, Partitioner &partitioner )
+void CABACReader::bdpcm_mode( CodingUnit& cu, const ComponentID compID )
 {
-  const SPS& sps = *cu.cs->sps;
-  if( !sps.getPCMEnabledFlag() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize())
-      || partitioner.currArea().lheight() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lheight() < (1 << sps.getPCMLog2MinSize()) )
+
+  if (!CU::bdpcmAllowed(cu, compID))
   {
-    cu.ipcm = false;
-    return;
+     if (isLuma(compID))
+     {
+        cu.bdpcmMode = 0;
+         if (!CS::isDualITree(*cu.cs))
+             cu.bdpcmModeChroma = 0;
+     }
+     else
+     {
+         cu.bdpcmModeChroma = 0;
+     }
+     return;
   }
-  cu.ipcm = ( m_BinDecoder.decodeBinTrm() );
-}
 
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__BDPCM_MODE, cu.block(compID).lumaSize(), compID );
+
+  int bdpcmMode;
+  bdpcmMode = m_BinDecoder.decodeBin(Ctx::BDPCMMode(0));
+  if (bdpcmMode)
+  {
+    bdpcmMode += m_BinDecoder.decodeBin(Ctx::BDPCMMode(1));
+  }
+  if (isLuma(compID))
+  {
+    cu.bdpcmMode = bdpcmMode;
+  }
+  else
+  {
+    cu.bdpcmModeChroma = bdpcmMode;
+  }
+  if (isLuma(compID))
+  {
+    DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_LUMA, cu.lumaPos().x, cu.lumaPos().y, cu.lwidth(), cu.lheight(), cu.bdpcmMode);
+  }
+  else
+  {
+    DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_CHROMA, cu.chromaPos().x, cu.chromaPos().y, cu.chromaSize().width, cu.chromaSize().height, cu.bdpcmModeChroma);
+  }
+}
 
 void CABACReader::cu_pred_data( CodingUnit &cu )
 {
@@ -940,59 +1121,53 @@ void CABACReader::cu_pred_data( CodingUnit &cu )
 
   for( auto &pu : CU::traversePUs( cu ) )
   {
-    pu.shareParentPos = cu.shareParentPos;
-    pu.shareParentSize = cu.shareParentSize;
     prediction_unit( pu, mrgCtx );
   }
 
   imv_mode   ( cu, mrgCtx );
   affine_amvr_mode( cu, mrgCtx );
-  cu_gbi_flag( cu );
+  cu_bcw_flag( cu );
 
 }
 
-void CABACReader::cu_gbi_flag(CodingUnit& cu)
+void CABACReader::cu_bcw_flag(CodingUnit& cu)
 {
-  if(!CU::isGBiIdxCoded(cu))
+  if(!CU::isBcwIdxCoded(cu))
   {
     return;
   }
 
-  CHECK(!(GBI_NUM > 1 && (GBI_NUM == 2 || (GBI_NUM & 0x01) == 1)), " !( GBI_NUM > 1 && ( GBI_NUM == 2 || ( GBI_NUM & 0x01 ) == 1 ) ) ");
+  CHECK(!(BCW_NUM > 1 && (BCW_NUM == 2 || (BCW_NUM & 0x01) == 1)), " !( BCW_NUM > 1 && ( BCW_NUM == 2 || ( BCW_NUM & 0x01 ) == 1 ) ) ");
 
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__GBI_IDX);
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__BCW_IDX);
 
   uint32_t idx = 0;
 
-  uint32_t symbol = m_BinDecoder.decodeBin(Ctx::GBiIdx(0));
-
-  int32_t numGBi = (cu.slice->getCheckLDC()) ? 5 : 3;
+  uint32_t symbol = m_BinDecoder.decodeBin(Ctx::BcwIdx(0));
 
-  if(symbol == 0)
+  int32_t numBcw = (cu.slice->getCheckLDC()) ? 5 : 3;
+  if(symbol == 1)
   {
-    uint32_t prefixNumBits = numGBi - 2;
+    uint32_t prefixNumBits = numBcw - 2;
     uint32_t step = 1;
 
-    unsigned ctxIdGBi = 4;
     idx = 1;
 
     for(int ui = 0; ui < prefixNumBits; ++ui)
     {
-      symbol = m_BinDecoder.decodeBin(Ctx::GBiIdx(ctxIdGBi));
-
-      if (symbol == 1)
+      symbol = m_BinDecoder.decodeBinEP();
+      if (symbol == 0)
       {
         break;
       }
-      ctxIdGBi += step;
       idx += step;
     }
   }
 
-  uint8_t gbiIdx = (uint8_t)g_GbiParsingOrder[idx];
-  CU::setGbiIdx(cu, gbiIdx);
+  uint8_t bcwIdx = (uint8_t)g_BcwParsingOrder[idx];
+  CU::setBcwIdx(cu, bcwIdx);
 
-  DTRACE(g_trace_ctx, D_SYNTAX, "cu_gbi_flag() gbi_idx=%d\n", cu.GBiIdx ? 1 : 0);
+  DTRACE(g_trace_ctx, D_SYNTAX, "cu_bcw_flag() bcw_idx=%d\n", cu.BcwIdx ? 1 : 0);
 }
 
 void CABACReader::xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol)
@@ -1029,11 +1204,7 @@ void CABACReader::xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol)
 
 void CABACReader::extend_ref_line(CodingUnit& cu)
 {
-#if !ENABLE_JVET_L0283_MRL
-  return;
-#endif
-
-  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.ipcm)
+  if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode )
   {
     cu.firstPU->multiRefIdx = 0;
     return;
@@ -1045,6 +1216,12 @@ void CABACReader::extend_ref_line(CodingUnit& cu)
 
   for (int k = 0; k < numBlocks; k++)
   {
+    if( !cu.cs->sps->getUseMRL() )
+    {
+      pu->multiRefIdx = 0;
+      pu = pu->next;
+      continue;
+    }
     bool isFirstLineOfCtu = (((cu.block(COMPONENT_Y).y)&((cu.cs->sps)->getMaxCUWidth() - 1)) == 0);
     if (isFirstLineOfCtu)
     {
@@ -1059,10 +1236,6 @@ void CABACReader::extend_ref_line(CodingUnit& cu)
       if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0])
       {
         multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(1)) == 1 ? MULTI_REF_LINE_IDX[2] : MULTI_REF_LINE_IDX[1];
-        if (MRL_NUM_REF_LINES > 3 && multiRefIdx != MULTI_REF_LINE_IDX[1])
-        {
-          multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(2)) == 1 ? MULTI_REF_LINE_IDX[3] : MULTI_REF_LINE_IDX[2];
-        }
       }
 
     }
@@ -1078,6 +1251,21 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
     return;
   }
 
+  if( cu.bdpcmMode )
+  {
+    cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX;
+    return;
+  }
+
+  mip_flag(cu);
+  if (cu.mipFlag)
+  {
+    mip_pred_modes(cu);
+    return;
+  }
+  extend_ref_line( cu );
+  isp_mode( cu );
+
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__INTRA_DIR_ANG, cu.lumaSize(), CHANNEL_TYPE_LUMA );
 
   // prev_intra_luma_pred_flag
@@ -1086,7 +1274,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
   for( int k = 0; k < numBlocks; k++ )
   {
     CHECK(numBlocks != 1, "not supported yet");
-    if( cu.firstPU->multiRefIdx || ( cu.ispMode && isLuma( cu.chType ) ) )
+    if ( cu.firstPU->multiRefIdx )
     {
       mpmFlag[0] = true;
     }
@@ -1107,7 +1295,11 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
     {
       uint32_t ipred_idx = 0;
       {
-        ipred_idx = m_BinDecoder.decodeBinEP();
+        unsigned ctx = (pu->cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
+        if (pu->multiRefIdx == 0)
+          ipred_idx = m_BinDecoder.decodeBin(Ctx::IntraLumaPlanarFlag(ctx));
+        else
+          ipred_idx = 1;
         if( ipred_idx )
         {
           ipred_idx += m_BinDecoder.decodeBinEP();
@@ -1152,7 +1344,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
 
 void CABACReader::intra_chroma_pred_modes( CodingUnit& cu )
 {
-  if( cu.chromaFormat == CHROMA_400 || ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_LUMA ) )
+  if( cu.chromaFormat == CHROMA_400 || ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA ) )
   {
     return;
   }
@@ -1164,50 +1356,72 @@ void CABACReader::intra_chroma_pred_modes( CodingUnit& cu )
     intra_chroma_pred_mode( *pu );
   }
 }
-
-bool CABACReader::intra_chroma_lmc_mode( PredictionUnit& pu )
+bool CABACReader::intra_chroma_lmc_mode(PredictionUnit& pu)
 {
   int lmModeList[10];
-  int maxSymbol = PU::getLMSymbolList(pu, lmModeList);
-  int symbol    = unary_max_symbol(Ctx::IntraChromaPredMode(1), Ctx::IntraChromaPredMode(2), maxSymbol - 1);
-  if (lmModeList[symbol] != -1)
+  PU::getLMSymbolList(pu, lmModeList);
+
+  int symbol = m_BinDecoder.decodeBin(Ctx::CclmModeIdx(0));
+
+  if (symbol == 0)
   {
     pu.intraDir[1] = lmModeList[symbol];
-    return true;
+    CHECK(pu.intraDir[1] != LM_CHROMA_IDX, "should be LM_CHROMA");
   }
-  return false;
+  else
+  {
+    symbol += m_BinDecoder.decodeBinEP();
+    pu.intraDir[1] = lmModeList[symbol];
+  }
+  return true; //it will only enter this function for LMC modes, so always return true ;
 }
 
-void CABACReader::intra_chroma_pred_mode( PredictionUnit& pu )
+void CABACReader::intra_chroma_pred_mode(PredictionUnit& pu)
 {
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__INTRA_DIR_ANG, pu.cu->blocks[pu.chType].lumaSize(), CHANNEL_TYPE_CHROMA );
-
-  if (m_BinDecoder.decodeBin(Ctx::IntraChromaPredMode(0)) == 0)
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__INTRA_DIR_ANG, pu.cu->blocks[pu.chType].lumaSize(), CHANNEL_TYPE_CHROMA);
+  if (pu.cu->colorTransform)
   {
-    pu.intraDir[1] = DM_CHROMA_IDX;
+    pu.intraDir[CHANNEL_TYPE_CHROMA] = DM_CHROMA_IDX;
     return;
   }
 
   // LM chroma mode
-  if( pu.cs->sps->getUseLMChroma() )
+
+  if (pu.cu->bdpcmModeChroma)
+  {
+    unsigned chromaCandModes[NUM_CHROMA_MODE];
+    PU::getIntraChromaCandModes(pu, chromaCandModes);
+    pu.intraDir[1] = chromaCandModes[0];
+    return;
+  }
+
+  if (pu.cs->sps->getUseLMChroma() && pu.cu->checkCCLMAllowed())
   {
-    if( intra_chroma_lmc_mode( pu ) )
+    bool isLMCMode = m_BinDecoder.decodeBin(Ctx::CclmModeFlag(0)) ? true : false;
+    if (isLMCMode)
     {
+      intra_chroma_lmc_mode(pu);
       return;
     }
   }
-  unsigned candId = m_BinDecoder.decodeBinsEP( 2 );
 
-  unsigned chromaCandModes[ NUM_CHROMA_MODE ];
-  PU::getIntraChromaCandModes( pu, chromaCandModes );
+  if (m_BinDecoder.decodeBin(Ctx::IntraChromaPredMode(0)) == 0)
+  {
+    pu.intraDir[1] = DM_CHROMA_IDX;
+    return;
+  }
+
+  unsigned candId = m_BinDecoder.decodeBinsEP(2);
 
-  CHECK( candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds" );
-  CHECK( PU::isLMCMode( chromaCandModes[ candId ] ), "The intra dir cannot be LM_CHROMA for this path" );
-  CHECK( chromaCandModes[ candId ] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path" );
+  unsigned chromaCandModes[NUM_CHROMA_MODE];
+  PU::getIntraChromaCandModes(pu, chromaCandModes);
 
-  pu.intraDir[1] = chromaCandModes[ candId ];
-}
+  CHECK(candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds");
+  CHECK(PU::isLMCMode(chromaCandModes[candId]), "The intra dir cannot be LM_CHROMA for this path");
+  CHECK(chromaCandModes[candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path");
 
+  pu.intraDir[1] = chromaCandModes[candId];
+}
 void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx& cuCtx )
 {
   if (!CU::isIntra(cu))
@@ -1227,6 +1441,7 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx&
     }
     if( !cu.rootCbf )
     {
+      cu.colorTransform = false;
       TransformUnit& tu = cu.cs->addTU(cu, partitioner.chType);
       tu.depth = 0;
       for( unsigned c = 0; c < tu.blocks.size(); c++ )
@@ -1240,16 +1455,29 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx&
     }
   }
 
+  if (CU::isInter(cu) || CU::isIBC(cu))
+  {
+    adaptive_color_transform(cu);
+  }
+
+  cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA]   = false;
+  cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
+  cuCtx.lfnstLastScanPos                              = false;
+  cuCtx.violatesMtsCoeffConstraint                    = false;
+
   ChromaCbfs chromaCbfs;
   if( cu.ispMode && isLuma( partitioner.chType ) )
   {
     TUIntraSubPartitioner subTuPartitioner( partitioner );
-    transform_tree( *cu.cs, subTuPartitioner, cuCtx, chromaCbfs, CU::getISPType( cu, getFirstComponentOfChannel( partitioner.chType ) ), 0 );
+    transform_tree( *cu.cs, subTuPartitioner, cuCtx, CU::getISPType(cu, getFirstComponentOfChannel(partitioner.chType)), 0 );
   }
   else
   {
-    transform_tree( *cu.cs, partitioner, cuCtx, chromaCbfs );
+    transform_tree( *cu.cs, partitioner, cuCtx             );
   }
+
+  residual_lfnst_mode( cu, cuCtx );
+  mts_idx            ( cu, cuCtx );
 }
 
 void CABACReader::rqt_root_cbf( CodingUnit& cu )
@@ -1261,6 +1489,24 @@ void CABACReader::rqt_root_cbf( CodingUnit& cu )
   DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y );
 }
 
+void CABACReader::adaptive_color_transform(CodingUnit& cu)
+{
+  if (!cu.slice->getSPS()->getUseColorTrans())
+  {
+    return;
+  }
+
+  if (cu.isSepTree())
+  {
+    return;
+  }
+
+  if (CU::isInter(cu) || CU::isIBC(cu) || CU::isIntra(cu))
+  {
+    cu.colorTransform = (m_BinDecoder.decodeBin(Ctx::ACTFlag()));
+  }
+}
+
 void CABACReader::sbt_mode( CodingUnit& cu )
 {
   const uint8_t sbtAllowed = cu.checkAllowedSbt();
@@ -1318,27 +1564,338 @@ void CABACReader::sbt_mode( CodingUnit& cu )
 }
 
 
-bool CABACReader::end_of_ctu( CodingUnit& cu, CUCtx& cuCtx )
+void CABACReader::end_of_ctu( CodingUnit& cu, CUCtx& cuCtx )
 {
-  const SPS     &sps   = *cu.cs->sps;
   const Position rbPos = recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].bottomRight().offset( 1, 1 ) );
 
-  if ( ( ( rbPos.x & cu.cs->pcv->maxCUWidthMask  ) == 0 || rbPos.x == sps.getPicWidthInLumaSamples () )
-    && ( ( rbPos.y & cu.cs->pcv->maxCUHeightMask ) == 0 || rbPos.y == sps.getPicHeightInLumaSamples() )
-    && ( !CS::isDualITree( *cu.cs ) || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) )
+  if( ( ( rbPos.x & cu.cs->pcv->maxCUWidthMask ) == 0 || rbPos.x == cu.cs->pps->getPicWidthInLumaSamples() )
+  && ( ( rbPos.y & cu.cs->pcv->maxCUHeightMask ) == 0 || rbPos.y == cu.cs->pps->getPicHeightInLumaSamples() )
+    && ( !cu.isSepTree() || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) )
       )
   {
     cuCtx.isDQPCoded = ( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded );
 
-    return terminating_bit();
   }
-
-  return false;
 }
 
-//================================================================================
-//  clause 7.3.8.6
-//--------------------------------------------------------------------------------
+void CABACReader::cu_palette_info(CodingUnit& cu, ComponentID compBegin, uint32_t numComp, CUCtx& cuCtx)
+{
+  const SPS&      sps = *(cu.cs->sps);
+  TransformUnit&   tu = *cu.firstTU;
+  int curPLTidx = 0;
+
+  cu.lastPLTSize[compBegin] = cu.cs->prevPLT.curPLTSize[compBegin];
+
+  if (cu.lastPLTSize[compBegin])
+  {
+    xDecodePLTPredIndicator(cu, MAXPLTSIZE, compBegin);
+  }
+
+  for (int idx = 0; idx < cu.lastPLTSize[compBegin]; idx++)
+  {
+    if (cu.reuseflag[compBegin][idx])
+    {
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+      {
+        cu.curPLT[comp][curPLTidx] = cu.cs->prevPLT.curPLT[comp][idx];
+      }
+      curPLTidx++;
+    }
+  }
+
+  uint32_t recievedPLTnum = 0;
+
+  if (curPLTidx < MAXPLTSIZE)
+  {
+    recievedPLTnum = exp_golomb_eqprob(0);
+  }
+
+  cu.curPLTSize[compBegin] = curPLTidx + recievedPLTnum;
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    for (int idx = curPLTidx; idx < cu.curPLTSize[compBegin]; idx++)
+    {
+      ComponentID compID = (ComponentID)comp;
+      const int  channelBitDepth = sps.getBitDepth(toChannelType(compID));
+      cu.curPLT[compID][idx] = m_BinDecoder.decodeBinsEP(channelBitDepth);
+    }
+  }
+  cu.useEscape[compBegin] = true;
+  if (cu.curPLTSize[compBegin] > 0)
+  {
+    uint32_t escCode = 0;
+    escCode = m_BinDecoder.decodeBinEP();
+    cu.useEscape[compBegin] = (escCode != 0);
+  }
+  uint32_t    indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
+  //encode index map
+  uint32_t    height = cu.block(compBegin).height;
+  uint32_t    width = cu.block(compBegin).width;
+
+  uint32_t total = height * width;
+  if (indexMaxSize > 1)
+    parseScanRotationModeFlag(cu, compBegin);
+  else
+    cu.useRotation[compBegin] = false;
+
+  if (cu.useEscape[compBegin] && cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded)
+  {
+    if (!cu.isSepTree() || isLuma(tu.chType))
+    {
+      cu_qp_delta(cu, cuCtx.qp, cu.qp);
+      cuCtx.qp = cu.qp;
+      cuCtx.isDQPCoded = true;
+    }
+  }
+  if (cu.useEscape[compBegin] && cu.cs->slice->getUseChromaQpAdj() && !cuCtx.isChromaQpAdjCoded)
+  {
+    if (!cu.isSepTree() || isChroma(tu.chType))
+    {
+      cu_chroma_qp_offset(cu);
+      cuCtx.isChromaQpAdjCoded = true;
+    }
+  }
+
+  m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
+  uint32_t prevRunPos = 0;
+  unsigned prevRunType = 0;
+  for (int subSetId = 0; subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE; subSetId++)
+  {
+    cuPaletteSubblockInfo(cu, compBegin, numComp, subSetId, prevRunPos, prevRunType);
+  }
+}
+void CABACReader::cuPaletteSubblockInfo(CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType)
+{
+  const SPS&      sps = *(cu.cs->sps);
+  TransformUnit&  tu = *cu.firstTU;
+  PLTtypeBuf      runType = tu.getrunType(compBegin);
+  PelBuf          curPLTIdx = tu.getcurPLTIdx(compBegin);
+  uint32_t        indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
+  uint32_t        totalPel = cu.block(compBegin).height*cu.block(compBegin).width;
+
+  int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
+  int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
+  maxSubPos = (maxSubPos > totalPel) ? totalPel : maxSubPos; // if last position is out of the current CU size
+
+  unsigned runCopyFlag[(1 << LOG2_PALETTE_CG_SIZE)];
+  for (int i = 0; i < (1 << LOG2_PALETTE_CG_SIZE); i++)
+    runCopyFlag[i] = MAX_INT;
+  if (minSubPos == 0)
+    runCopyFlag[0] = 0;
+
+// PLT runCopy flag and runType - context coded
+  int curPos = minSubPos;
+  for (; curPos < maxSubPos && indexMaxSize > 1; curPos++)
+  {
+    uint32_t posy = m_scanOrder[curPos].y;
+    uint32_t posx = m_scanOrder[curPos].x;
+    uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y;
+    uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x;
+    unsigned identityFlag = 1;
+
+    const CtxSet&   ctxSet = (prevRunType == PLT_RUN_INDEX) ? Ctx::IdxRunModel : Ctx::CopyRunModel;
+    if (curPos > 0)
+    {
+      int dist = curPos - prevRunPos - 1;
+      const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(prevRunType, dist);
+      identityFlag = m_BinDecoder.decodeBin( ctxSet( ctxId ) );
+      DTRACE(g_trace_ctx, D_SYNTAX, "plt_copy_flag() bin=%d ctx=%d\n", identityFlag, ctxId);
+      runCopyFlag[curPos - minSubPos] = identityFlag;
+    }
+
+    if ( identityFlag == 0 || curPos == 0 )
+    {
+      if (((posy == 0) && !cu.useRotation[compBegin]) || ((posx == 0) && cu.useRotation[compBegin]))
+      {
+        runType.at(posx, posy) = PLT_RUN_INDEX;
+      }
+      else if (curPos != 0 && runType.at(posxprev, posyprev) == PLT_RUN_COPY)
+      {
+        runType.at(posx, posy) = PLT_RUN_INDEX;
+      }
+      else
+      {
+        runType.at(posx, posy) = (m_BinDecoder.decodeBin(Ctx::RunTypeFlag()));
+      }
+      DTRACE(g_trace_ctx, D_SYNTAX, "plt_type_flag() bin=%d sp=%d\n", runType.at(posx, posy), curPos);
+      prevRunType = runType.at(posx, posy);
+      prevRunPos  = curPos;
+    }
+    else //assign run information
+    {
+      runType.at(posx, posy) = runType.at(posxprev, posyprev);
+    }
+  }
+
+// PLT index values - bypass coded
+  uint32_t adjust;
+  uint32_t symbol = 0;
+  curPos = minSubPos;
+  if (indexMaxSize > 1)
+  {
+    for (; curPos < maxSubPos; curPos++)
+    {
+      if (curPos > 0)
+        adjust = 1;
+      else
+        adjust = 0;
+
+      uint32_t posy = m_scanOrder[curPos].y;
+      uint32_t posx = m_scanOrder[curPos].x;
+      uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y;
+      uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x;
+      if ( runCopyFlag[curPos - minSubPos] == 0 && runType.at(posx, posy) == PLT_RUN_INDEX )
+      {
+        xReadTruncBinCode(symbol, indexMaxSize - adjust);
+        xAdjustPLTIndex(cu, symbol, curPos, curPLTIdx, runType, indexMaxSize, compBegin);
+        DTRACE(g_trace_ctx, D_SYNTAX, "plt_idx_idc() value=%d sp=%d\n", curPLTIdx.at(posx, posy), curPos);
+      }
+      else if (runType.at(posx, posy) == PLT_RUN_INDEX)
+      {
+        curPLTIdx.at(posx, posy) = curPLTIdx.at(posxprev, posyprev);
+      }
+      else
+      {
+        curPLTIdx.at(posx, posy) = (cu.useRotation[compBegin]) ? curPLTIdx.at(posx - 1, posy) : curPLTIdx.at(posx, posy - 1);
+      }
+    }
+  }
+  else
+  {
+    for (; curPos < maxSubPos; curPos++)
+    {
+      uint32_t posy = m_scanOrder[curPos].y;
+      uint32_t posx = m_scanOrder[curPos].x;
+      uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y;
+      uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x;
+      runType.at(posx, posy) = PLT_RUN_INDEX;
+      if (runCopyFlag[curPos - minSubPos] == 0 && runType.at(posx, posy) == PLT_RUN_INDEX)
+      {
+        curPLTIdx.at(posx, posy) = 0;
+      }
+      else
+      {
+        curPLTIdx.at(posx, posy) = curPLTIdx.at(posxprev, posyprev);
+      }
+    }
+  }
+
+// Quantized escape colors - bypass coded
+  uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, sps.getChromaFormatIdc());
+  uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, sps.getChromaFormatIdc());
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    ComponentID compID = (ComponentID)comp;
+    for (curPos = minSubPos; curPos < maxSubPos; curPos++)
+    {
+      uint32_t posy = m_scanOrder[curPos].y;
+      uint32_t posx = m_scanOrder[curPos].x;
+      if (curPLTIdx.at(posx, posy) == cu.curPLTSize[compBegin])
+      {
+          PLTescapeBuf    escapeValue = tu.getescapeValue((ComponentID)comp);
+          if (compID == COMPONENT_Y || compBegin != COMPONENT_Y)
+          {
+            escapeValue.at(posx, posy) = exp_golomb_eqprob(3);
+            assert(escapeValue.at(posx, posy) < (1 << (cu.cs->sps->getBitDepth(toChannelType((ComponentID)comp)) + 1)));
+            DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos);
+          }
+          if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && posy % (1 << scaleY) == 0 && posx % (1 << scaleX) == 0)
+          {
+            uint32_t posxC = posx >> scaleX;
+            uint32_t posyC = posy >> scaleY;
+            escapeValue.at(posxC, posyC) = exp_golomb_eqprob(3);
+            assert(escapeValue.at(posxC, posyC) < (1 << (cu.cs->sps->getBitDepth(toChannelType(compID)) + 1)));
+            DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos);
+          }
+      }
+    }
+  }
+}
+void CABACReader::parseScanRotationModeFlag(CodingUnit& cu, ComponentID compBegin)
+{
+  cu.useRotation[compBegin] = m_BinDecoder.decodeBin(Ctx::RotationFlag());
+}
+void CABACReader::xDecodePLTPredIndicator(CodingUnit& cu, uint32_t maxPLTSize, ComponentID compBegin)
+{
+  uint32_t symbol, numPltPredicted = 0, idx = 0;
+
+  symbol = exp_golomb_eqprob(0);
+
+  if (symbol != 1)
+  {
+    while (idx < cu.lastPLTSize[compBegin] && numPltPredicted < maxPLTSize)
+    {
+      if (idx > 0)
+      {
+        symbol = exp_golomb_eqprob(0);
+      }
+      if (symbol == 1)
+      {
+        break;
+      }
+
+      if (symbol)
+      {
+        idx += symbol - 1;
+      }
+      cu.reuseflag[compBegin][idx] = 1;
+      numPltPredicted++;
+      idx++;
+    }
+  }
+}
+void CABACReader::xAdjustPLTIndex(CodingUnit& cu, Pel curLevel, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin)
+{
+  uint32_t symbol;
+  int refLevel = MAX_INT;
+  uint32_t posy = m_scanOrder[idx].y;
+  uint32_t posx = m_scanOrder[idx].x;
+  if (idx)
+  {
+    uint32_t prevposy = m_scanOrder[idx - 1].y;
+    uint32_t prevposx = m_scanOrder[idx - 1].x;
+    if (paletteRunType.at(prevposx, prevposy) == PLT_RUN_INDEX)
+    {
+      refLevel = paletteIdx.at(prevposx, prevposy);
+      if (paletteIdx.at(prevposx, prevposy) == cu.curPLTSize[compBegin]) // escape
+      {
+        refLevel = maxSymbol - 1;
+      }
+    }
+    else
+    {
+      if (cu.useRotation[compBegin])
+      {
+        assert(prevposx > 0);
+        refLevel = paletteIdx.at(posx - 1, posy);
+        if (paletteIdx.at(posx - 1, posy) == cu.curPLTSize[compBegin]) // escape mode
+        {
+          refLevel = maxSymbol - 1;
+        }
+      }
+      else
+      {
+        assert(prevposy > 0);
+        refLevel = paletteIdx.at(posx, posy - 1);
+        if (paletteIdx.at(posx, posy - 1) == cu.curPLTSize[compBegin]) // escape mode
+        {
+          refLevel = maxSymbol - 1;
+        }
+      }
+    }
+    maxSymbol--;
+  }
+  symbol = curLevel;
+  if (curLevel >= refLevel) // include escape mode
+  {
+    symbol++;
+  }
+  paletteIdx.at(posx, posy) = symbol;
+}
+
+//================================================================================
+//  clause 7.3.8.6
+//--------------------------------------------------------------------------------
 //    void  prediction_unit ( pu, mrgCtx );
 //    void  merge_flag      ( pu );
 //    void  merge_data      ( pu, mrgCtx );
@@ -1360,27 +1917,7 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx )
   }
   if( pu.mergeFlag )
   {
-    if (CU::isIBC(*pu.cu))
-    {
-      merge_idx(pu);
-    }
-    else
-    {
-    subblock_merge_flag( *pu.cu );
-    MHIntra_flag(pu);
-    if (pu.mhIntraFlag)
-    {
-      MHIntra_luma_pred_modes(*pu.cu);
-      pu.intraDir[1] = DM_CHROMA_IDX;
-    }
-    triangle_mode( *pu.cu );
-    if (pu.mmvdMergeFlag)
-    {
-      mmvd_merge_idx(pu);
-    }
-    else
-      merge_data   ( pu );
-    }
+    merge_data(pu);
   }
   else if (CU::isIBC(*pu.cu))
   {
@@ -1388,6 +1925,11 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx )
     pu.cu->affine = false;
     pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
     mvd_coding(pu.mvd[REF_PIC_LIST_0]);
+    if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 )
+    {
+      pu.mvpIdx[REF_PIC_LIST_0] = 0;
+    }
+    else
     mvp_flag(pu, REF_PIC_LIST_0);
   }
   else
@@ -1420,7 +1962,7 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx )
       if ( pu.cu->smvdMode != 1 )
       {
       ref_idx     ( pu, REF_PIC_LIST_1 );
-      if( pu.cu->cs->slice->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ )
+      if( pu.cu->cs->picHeader->getMvdL1ZeroFlag() && pu.interDir == 3 /* PRED_BI */ )
       {
         pu.mvd[ REF_PIC_LIST_1 ] = Mv();
         pu.mvdAffi[REF_PIC_LIST_1][0] = Mv();
@@ -1449,13 +1991,14 @@ void CABACReader::prediction_unit( PredictionUnit& pu, MergeCtx& mrgCtx )
     pu.mv    [REF_PIC_LIST_1] = Mv(0, 0);
     pu.refIdx[REF_PIC_LIST_1] = -1;
     pu.interDir               =  1;
-    pu.cu->GBiIdx = GBI_DEFAULT;
+    pu.cu->BcwIdx = BCW_DEFAULT;
   }
 
   if ( pu.cu->smvdMode )
   {
     RefPicList eCurRefList = (RefPicList)(pu.cu->smvdMode - 1);
     pu.mvd[1 - eCurRefList].set( -pu.mvd[eCurRefList].hor, -pu.mvd[eCurRefList].ver );
+    CHECK(!((pu.mvd[1 - eCurRefList].getHor() >= MVD_MIN) && (pu.mvd[1 - eCurRefList].getHor() <= MVD_MAX)) || !((pu.mvd[1 - eCurRefList].getVer() >= MVD_MIN) && (pu.mvd[1 - eCurRefList].getVer() <= MVD_MAX)), "Illegal MVD value");
     pu.refIdx[1 - eCurRefList] = pu.cs->slice->getSymRefIdx( 1 - eCurRefList );
   }
 
@@ -1484,17 +2027,14 @@ void CABACReader::smvd_mode( PredictionUnit& pu )
 
 void CABACReader::subblock_merge_flag( CodingUnit& cu )
 {
-  if ( cu.firstPU->mergeFlag && (cu.firstPU->mmvdMergeFlag || cu.mmvdSkip) )
-  {
-    return;
-  }
+  cu.affine = false;
 
-  if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getUseAffine() || cu.cs->sps->getSBTMVPEnabledFlag()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 )
+  if ( !cu.cs->slice->isIntra() && (cu.slice->getPicHeader()->getMaxNumAffineMergeCand() > 0) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 )
   {
     RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__AFFINE_FLAG );
 
     unsigned ctxId = DeriveCtx::CtxAffineFlag( cu );
-    cu.affine = m_BinDecoder.decodeBin( Ctx::AffineFlag( ctxId ) );
+    cu.affine = m_BinDecoder.decodeBin( Ctx::SubblockMergeFlag( ctxId ) );
     DTRACE( g_trace_ctx, D_SYNTAX, "subblock_merge_flag() subblock_merge_flag=%d ctx=%d pos=(%d,%d)\n", cu.affine ? 1 : 0, ctxId, cu.Y().x, cu.Y().y );
   }
 }
@@ -1533,23 +2073,85 @@ void CABACReader::merge_flag( PredictionUnit& pu )
   if (pu.mergeFlag && CU::isIBC(*pu.cu))
   {
     pu.mmvdMergeFlag = false;
+    pu.regularMergeFlag = false;
     return;
   }
-#if JVET_MMVD_OFF_MACRO
-  pu.mmvdMergeFlag = false;
-#else
-  if (pu.mergeFlag)
-  {
-    pu.mmvdMergeFlag = (m_BinDecoder.decodeBin(Ctx::MmvdFlag(0)));
-    DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
-  }
-#endif
 }
 
 
 void CABACReader::merge_data( PredictionUnit& pu )
 {
-  if (pu.cu->mmvdSkip)
+  if (CU::isIBC(*pu.cu))
+  {
+    merge_idx(pu);
+    return;
+  }
+  else
+  {
+    CodingUnit cu = *pu.cu;
+    subblock_merge_flag(*pu.cu);
+    if (pu.cu->affine)
+    {
+      merge_idx(pu);
+      cu.firstPU->regularMergeFlag = false;
+      return;
+    }
+
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__MERGE_FLAG );
+    const bool triangleAvailable = pu.cu->cs->slice->getSPS()->getUseTriangle() && pu.cu->cs->slice->isInterB() && pu.cu->cs->picHeader->getMaxNumTriangleCand() > 1;
+    const bool ciipAvailable = pu.cs->sps->getUseCiip() && !pu.cu->skip && pu.cu->lwidth() < MAX_CU_SIZE && pu.cu->lheight() < MAX_CU_SIZE;
+    if (pu.cu->lwidth() * pu.cu->lheight() >= 64
+      && (triangleAvailable || ciipAvailable))
+    {
+      cu.firstPU->regularMergeFlag = m_BinDecoder.decodeBin(Ctx::RegularMergeFlag(cu.skip ? 0 : 1));
+    }
+    else
+    {
+      cu.firstPU->regularMergeFlag = true;
+    }
+    if (cu.firstPU->regularMergeFlag)
+    {
+      if (cu.cs->slice->getSPS()->getUseMMVD())
+      {
+        cu.firstPU->mmvdMergeFlag = m_BinDecoder.decodeBin(Ctx::MmvdFlag(0));
+      }
+      else
+      {
+        cu.firstPU->mmvdMergeFlag = false;
+      }
+      if (cu.skip)
+      {
+        cu.mmvdSkip = cu.firstPU->mmvdMergeFlag;
+      }
+    }
+    else
+    {
+      pu.mmvdMergeFlag = false;
+      pu.cu->mmvdSkip = false;
+      if (triangleAvailable && ciipAvailable)
+      {
+        Ciip_flag(pu);
+      }
+      else if (ciipAvailable)
+      {
+        pu.ciipFlag = true;
+      }
+      else
+      {
+        pu.ciipFlag = false;
+      }
+      if (pu.ciipFlag)
+      {
+        pu.intraDir[0] = PLANAR_IDX;
+        pu.intraDir[1] = DM_CHROMA_IDX;
+      }
+      else
+      {
+        pu.cu->triangle = true;
+      }
+    }
+  }
+  if (pu.mmvdMergeFlag || pu.cu->mmvdSkip)
   {
     mmvd_merge_idx(pu);
   }
@@ -1566,7 +2168,7 @@ void CABACReader::merge_idx( PredictionUnit& pu )
 
   if ( pu.cu->affine )
   {
-    int numCandminus1 = int( pu.cs->slice->getMaxNumAffineMergeCand() ) - 1;
+    int numCandminus1 = int( pu.cs->picHeader->getMaxNumAffineMergeCand() ) - 1;
     pu.mergeIdx = 0;
     if ( numCandminus1 > 0 )
     {
@@ -1586,7 +2188,7 @@ void CABACReader::merge_idx( PredictionUnit& pu )
   }
   else
   {
-  int numCandminus1 = int( pu.cs->slice->getMaxNumMergeCand() ) - 1;
+  int numCandminus1 = int( pu.cs->picHeader->getMaxNumMergeCand() ) - 1;
   pu.mergeIdx       = 0;
 
   if( pu.cu->triangle )
@@ -1613,8 +2215,10 @@ void CABACReader::merge_idx( PredictionUnit& pu )
       }
       return decIdx;
     };
-    candIdx0 = decodeOneIdx(TRIANGLE_MAX_NUM_UNI_CANDS - 1);
-    candIdx1 = decodeOneIdx(TRIANGLE_MAX_NUM_UNI_CANDS - 2);
+    const int maxNumTriangleCand = pu.cs->picHeader->getMaxNumTriangleCand();
+    CHECK(maxNumTriangleCand < 2, "Incorrect max number of triangle candidates");
+    candIdx0 = decodeOneIdx(maxNumTriangleCand - 1);
+    candIdx1 = decodeOneIdx(maxNumTriangleCand - 2);
     candIdx1 += candIdx1 >= candIdx0 ? 1 : 0;
     DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() triangle_split_dir=%d\n", splitDir );
     DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() triangle_idx0=%d\n", candIdx0 );
@@ -1625,6 +2229,10 @@ void CABACReader::merge_idx( PredictionUnit& pu )
     return;
   }
 
+  if (pu.cu->predMode == MODE_IBC)
+  {
+    numCandminus1 = int(pu.cs->picHeader->getMaxNumIBCMergeCand()) - 1;
+  }
   if( numCandminus1 > 0 )
   {
     if( m_BinDecoder.decodeBin( Ctx::MergeIdx() ) )
@@ -1646,50 +2254,29 @@ void CABACReader::merge_idx( PredictionUnit& pu )
 void CABACReader::mmvd_merge_idx(PredictionUnit& pu)
 {
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__MERGE_INDEX);
-  int var0, var1, var2;
-  int dir0 = 0;
-  int var = 0;
-  int mvpIdx = 0;
-
-  pu.mmvdMergeIdx = 0;
 
-  mvpIdx = (var + dir0)*(MMVD_MAX_REFINE_NUM*MMVD_BASE_MV_NUM);
-
-  int numCandminus1_base = MMVD_BASE_MV_NUM - 1;
-  var0 = 0;
-  if (numCandminus1_base > 0)
+  int var0 = 0;
+  if (pu.cs->picHeader->getMaxNumMergeCand() > 1)
   {
-    if (m_BinDecoder.decodeBin(Ctx::MmvdMergeIdx()))
-    {
-      var0++;
-      for (; var0 < numCandminus1_base; var0++)
-      {
-        if (!m_BinDecoder.decodeBinEP())
-        {
-          break;
-        }
-      }
-    }
+    static_assert(MMVD_BASE_MV_NUM == 2, "");
+    var0 = m_BinDecoder.decodeBin(Ctx::MmvdMergeIdx());
   }
   DTRACE(g_trace_ctx, D_SYNTAX, "base_mvp_idx() base_mvp_idx=%d\n", var0);
   int numCandminus1_step = MMVD_REFINE_STEP - 1;
-  var1 = 0;
-  if (numCandminus1_step > 0)
+  int var1 = 0;
+  if (m_BinDecoder.decodeBin(Ctx::MmvdStepMvpIdx()))
   {
-    if (m_BinDecoder.decodeBin(Ctx::MmvdStepMvpIdx()))
+    var1++;
+    for (; var1 < numCandminus1_step; var1++)
     {
-      var1++;
-      for (; var1 < numCandminus1_step; var1++)
+      if (!m_BinDecoder.decodeBinEP())
       {
-        if (!m_BinDecoder.decodeBinEP())
-        {
-          break;
-        }
+        break;
       }
     }
   }
   DTRACE(g_trace_ctx, D_SYNTAX, "MmvdStepMvpIdx() MmvdStepMvpIdx=%d\n", var1);
-  var2 = 0;
+  int var2 = 0;
   if (m_BinDecoder.decodeBinEP())
   {
     var2 += 2;
@@ -1707,7 +2294,7 @@ void CABACReader::mmvd_merge_idx(PredictionUnit& pu)
     }
   }
   DTRACE(g_trace_ctx, D_SYNTAX, "pos() pos=%d\n", var2);
-  mvpIdx += (var0 * MMVD_MAX_REFINE_NUM + var1 * 4 + var2);
+  int mvpIdx = (var0 * MMVD_MAX_REFINE_NUM + var1 * 4 + var2);
   pu.mmvdMergeIdx = mvpIdx;
   DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_idx() mmvd_merge_idx=%d\n", pu.mmvdMergeIdx);
 }
@@ -1731,13 +2318,13 @@ void CABACReader::inter_pred_idc( PredictionUnit& pu )
       return;
     }
   }
-  if( m_BinDecoder.decodeBin( Ctx::InterDir(4) ) )
+  if( m_BinDecoder.decodeBin( Ctx::InterDir(5) ) )
   {
-    DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=4 value=%d pos=(%d,%d)\n", 2, pu.lumaPos().x, pu.lumaPos().y );
+    DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=5 value=%d pos=(%d,%d)\n", 2, pu.lumaPos().x, pu.lumaPos().y );
     pu.interDir = 2;
     return;
   }
-  DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=4 value=%d pos=(%d,%d)\n", 1, pu.lumaPos().x, pu.lumaPos().y );
+  DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=5 value=%d pos=(%d,%d)\n", 1, pu.lumaPos().x, pu.lumaPos().y );
   pu.interDir = 1;
   return;
 }
@@ -1794,192 +2381,27 @@ void CABACReader::mvp_flag( PredictionUnit& pu, RefPicList eRefList )
 }
 
 
-void CABACReader::MHIntra_flag(PredictionUnit& pu)
+void CABACReader::Ciip_flag(PredictionUnit& pu)
 {
-  if (!pu.cs->sps->getUseMHIntra())
+  if (!pu.cs->sps->getUseCiip())
   {
-    pu.mhIntraFlag = false;
+    pu.ciipFlag = false;
     return;
   }
   if (pu.cu->skip)
   {
-    pu.mhIntraFlag = false;
+    pu.ciipFlag = false;
     return;
   }
 
-  if (pu.mmvdMergeFlag)
-  {
-    pu.mhIntraFlag = false;
-    return;
-  }
-  if (pu.cu->affine)
-  {
-    pu.mhIntraFlag = false;
-    return;
-  }
-  if (pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE)
-  {
-    pu.mhIntraFlag = false;
-    return;
-  }
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__MH_INTRA_FLAG);
 
-  pu.mhIntraFlag = (m_BinDecoder.decodeBin(Ctx::MHIntraFlag()));
-  DTRACE(g_trace_ctx, D_SYNTAX, "MHIntra_flag() MHIntra=%d pos=(%d,%d) size=%dx%d\n", pu.mhIntraFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
-}
-
-void CABACReader::MHIntra_luma_pred_modes(CodingUnit &cu)
-{
-  if (!cu.Y().valid())
-  {
-    return;
-  }
-
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__INTRA_DIR_ANG, cu.lumaSize(), CHANNEL_TYPE_LUMA);
-
-  const int numMPMs = 3;  // Multi-hypothesis intra uses only 3 MPM
-
-  // prev_intra_luma_pred_flag
-  int numBlocks = CU::getNumPUs(cu);
-  int mpmFlag[4];
-  PredictionUnit *pu = cu.firstPU;
-  for (int k = 0; k < numBlocks; k++)
-  {
-    if (PU::getNarrowShape(pu->lwidth(), pu->lheight()) == 0)
-    {
-      mpmFlag[k] = m_BinDecoder.decodeBin(Ctx::MHIntraPredMode());
-    }
-    else
-    {
-      mpmFlag[k] = 1;
-    }
-  }
-
-  unsigned mpm_pred[numMPMs];
-  for (int k = 0; k < numBlocks; k++)
-  {
-    PU::getMHIntraMPMs(*pu, mpm_pred);
-
-    if (mpmFlag[k])
-    {
-      unsigned pred_idx = 0;
-
-      pred_idx = m_BinDecoder.decodeBinEP();
-      if (pred_idx)
-      {
-        pred_idx += m_BinDecoder.decodeBinEP();
-      }
-      pu->intraDir[0] = mpm_pred[pred_idx];
-    }
-    else
-    {
-      unsigned pred_mode = 0;
-
-      bool isMPMCand[4];
-      for (unsigned i = 0; i < 4; i++)
-      {
-        isMPMCand[i] = false;
-      }
-      for (unsigned i = 0; i < 3; i++)
-      {
-        if (mpm_pred[i] == PLANAR_IDX)
-        {
-          isMPMCand[0] = true;
-        }
-        else if (mpm_pred[i] == DC_IDX)
-        {
-          isMPMCand[1] = true;
-        }
-        else if (mpm_pred[i] == HOR_IDX)
-        {
-          isMPMCand[2] = true;
-        }
-        else if (mpm_pred[i] == VER_IDX)
-        {
-          isMPMCand[3] = true;
-        }
-      }
-      if (!isMPMCand[0])
-      {
-        pred_mode = PLANAR_IDX;
-      }
-      if (!isMPMCand[1])
-      {
-        pred_mode = DC_IDX;
-      }
-      if (!isMPMCand[2])
-      {
-        pred_mode = HOR_IDX;
-      }
-      if (!isMPMCand[3])
-      {
-        pred_mode = VER_IDX;
-      }
-      pu->intraDir[0] = pred_mode;
-    }
-    DTRACE(g_trace_ctx, D_SYNTAX, "intra_luma_pred_modes() idx=%d pos=(%d,%d) mode=%d\n", k, pu->lumaPos().x, pu->lumaPos().y, pu->intraDir[0]);
-    pu = pu->next;
-  }
+  pu.ciipFlag = (m_BinDecoder.decodeBin(Ctx::CiipFlag()));
+  DTRACE(g_trace_ctx, D_SYNTAX, "Ciip_flag() Ciip=%d pos=(%d,%d) size=%dx%d\n", pu.ciipFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
 }
 
-void CABACReader::triangle_mode( CodingUnit& cu )
-{
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__TRIANGLE_FLAG );
-
-  if( !cu.cs->slice->getSPS()->getUseTriangle() || !cu.cs->slice->isInterB() || cu.lwidth() * cu.lheight() < TRIANGLE_MIN_SIZE || cu.affine )
-  {
-    return;
-  }
 
-  if ( cu.firstPU->mmvdMergeFlag || cu.mmvdSkip )
-  {
-    return;
-  }
 
-  if ( cu.firstPU->mhIntraFlag )
-  {
-    return;
-  }
-
-  unsigned flag_idx = DeriveCtx::CtxTriangleFlag( cu );
-  cu.triangle = m_BinDecoder.decodeBin( Ctx::TriangleFlag(flag_idx) );
-
-
-  DTRACE( g_trace_ctx, D_SYNTAX, "triangle_mode() triangle_mode=%d pos=(%d,%d) size: %dx%d\n", cu.triangle, cu.Y().x, cu.Y().y, cu.lumaSize().width, cu.lumaSize().height );
-}
-
-//================================================================================
-//  clause 7.3.8.7
-//--------------------------------------------------------------------------------
-//    void  pcm_samples( tu )
-//================================================================================
-
-void CABACReader::pcm_samples( TransformUnit& tu )
-{
-  CHECK( !tu.cu->ipcm, "pcm mode expected" );
-
-  const CodingStructure *cs = tu.cs;
-  const ChannelType chType = tu.chType;
-
-  const SPS&        sps       = *tu.cu->cs->sps;
-  tu.depth                    = 0;
-
-  ComponentID compStr = (CS::isDualITree(*cs) && !isLuma(chType)) ? COMPONENT_Cb: COMPONENT_Y;
-  ComponentID compEnd = (CS::isDualITree(*cs) && isLuma(chType)) ? COMPONENT_Y : COMPONENT_Cr;
-  for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) )
-  {
-    PelBuf          samples     = tu.getPcmbuf( compID );
-    const unsigned  sampleBits  = sps.getPCMBitDepth( toChannelType(compID) );
-    for( unsigned y = 0; y < samples.height; y++ )
-    {
-      for( unsigned x = 0; x < samples.width; x++ )
-      {
-        samples.at(x, y) = m_BinDecoder.decodeBinsPCM( sampleBits );
-      }
-    }
-  }
-  m_BinDecoder.start();
-}
 
 //================================================================================
 //  clause 7.3.8.8
@@ -1989,21 +2411,16 @@ void CABACReader::pcm_samples( TransformUnit& tu )
 //    bool  cbf_comp            ( area, depth )
 //================================================================================
 
-void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType, const int subTuIdx )
+void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, CUCtx& cuCtx,                         const PartSplit ispType, const int subTuIdx )
 {
-  ChromaCbfs chromaCbfsLastDepth;
-  chromaCbfsLastDepth.Cb        = chromaCbfs.Cb;
-  chromaCbfsLastDepth.Cr        = chromaCbfs.Cr;
-  const UnitArea& area          = partitioner.currArea();
-
-  CodingUnit&     cu            = *cs.getCU( area.blocks[partitioner.chType], partitioner.chType );
-  const unsigned  trDepth       = partitioner.currTrDepth;
-        int       subTuCounter  = subTuIdx;
+  const UnitArea&   area = partitioner.currArea();
+  CodingUnit&         cu = *cs.getCU(area.blocks[partitioner.chType], partitioner.chType);
+  int       subTuCounter = subTuIdx;
 
   // split_transform_flag
-  bool split = false;
+  bool split = partitioner.canSplit(TU_MAX_TR_SPLIT, cs);
+  const unsigned  trDepth = partitioner.currTrDepth;
 
-  split = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
   if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
   {
     split = true;
@@ -2013,27 +2430,6 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
   {
     split = partitioner.canSplit( ispType, cs );
   }
-  const bool chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode && !split;
-
-  // cbf_cb & cbf_cr
-  if( area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && ( !CS::isDualITree( cs ) || partitioner.chType == CHANNEL_TYPE_CHROMA ) && ( !cu.ispMode || chromaCbfISP ) )
-  {
-    const int cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth;
-    if (chromaCbfs.Cb)
-    {
-      if (!(cu.sbtInfo && trDepth == 1))
-        chromaCbfs.Cb &= cbf_comp(cs, area.blocks[COMPONENT_Cb], cbfDepth);
-    }
-    if (chromaCbfs.Cr)
-    {
-      if (!(cu.sbtInfo && trDepth == 1))
-        chromaCbfs.Cr &= cbf_comp(cs, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb);
-    }
-  }
-  else if( CS::isDualITree( cs ) )
-  {
-    chromaCbfs = ChromaCbfs( false );
-  }
 
   if( split )
   {
@@ -2062,45 +2458,18 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
 
     do
     {
-      ChromaCbfs subCbfs = chromaCbfs;
-      transform_tree( cs, partitioner, cuCtx, subCbfs, ispType, subTuCounter );
+      transform_tree( cs, partitioner, cuCtx,          ispType, subTuCounter );
       subTuCounter += subTuCounter != -1 ? 1 : 0;
     } while( partitioner.nextPart( cs ) );
 
     partitioner.exitCurrSplit();
 
-    const UnitArea &currArea  = partitioner.currArea();
-    const unsigned  currDepth = partitioner.currTrDepth;
-    const unsigned numTBlocks = getNumberValidTBlocks( *cs.pcv );
-
-    unsigned        compCbf[3] = { 0, 0, 0 };
-    unsigned        cbfDepth   = 0;
-    for( auto &currTU : cs.traverseTUs( currArea, partitioner.chType ) )
-    {
-      for( unsigned ch = 0; ch < numTBlocks; ch++ )
-      {
-        cbfDepth     = !isLuma( ComponentID( ch ) ) && cu.ispMode ? currDepth : currDepth + 1;
-        compCbf[ch] |= ( TU::getCbfAtDepth( currTU, ComponentID( ch ), cbfDepth ) ? 1 : 0 );
-      }
-    }
-
-    for (auto &currTU: cs.traverseTUs(currArea, partitioner.chType))
-    {
-      TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]);
-      if (currArea.chromaFormat != CHROMA_400)
-      {
-        TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]);
-        TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]);
-      }
-    }
   }
   else
   {
     TransformUnit &tu = cs.addTU( CS::getArea( cs, area, partitioner.chType ), partitioner.chType );
     unsigned numBlocks = ::getNumberValidTBlocks( *cs.pcv );
     tu.checkTuNoResidual( partitioner.currPartIdx() );
-    chromaCbfs.Cb &= !tu.noResidual;
-    chromaCbfs.Cr &= !tu.noResidual;
 
     for( unsigned compID = COMPONENT_Y; compID < numBlocks; compID++ )
     {
@@ -2113,70 +2482,33 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
     tu.depth = trDepth;
     DTRACE( g_trace_ctx, D_SYNTAX, "transform_unit() pos=(%d,%d) size=%dx%d depth=%d trDepth=%d\n", tu.blocks[tu.chType].x, tu.blocks[tu.chType].y, tu.blocks[tu.chType].width, tu.blocks[tu.chType].height, cu.depth, partitioner.currTrDepth );
 
-    if( !isChroma( partitioner.chType ) )
-    {
-      if( !CU::isIntra( cu ) && trDepth == 0 && !chromaCbfs.sigChroma( area.chromaFormat ) )
-      {
-        TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 );
-      }
-      else if( cu.sbtInfo && tu.noResidual )
-      {
-        TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 0 );
-      }
-      else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) )
-      {
-        assert( !tu.noResidual );
-        TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 );
-      }
-      else
-      {
-        bool previousCbf       = false;
-        bool rootCbfSoFar      = false;
-        bool lastCbfIsInferred = false;
-        if( cu.ispMode )
-        {
-          uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> g_aucLog2[tu.lheight()] : cu.lwidth() >> g_aucLog2[tu.lwidth()];
-          if( subTuCounter == nTus - 1 )
-          {
-            TransformUnit* tuPointer = cu.firstTU;
-            for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
-            {
-              rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, trDepth );
-              tuPointer = tuPointer->next;
-            }
-            if( !rootCbfSoFar )
-            {
-              lastCbfIsInferred = true;
-            }
-          }
-          if( !lastCbfIsInferred )
-          {
-            previousCbf = TU::getPrevTuCbfAtDepth( tu, COMPONENT_Y, trDepth );
-          }
-        }
-        bool cbfY = lastCbfIsInferred ? true : cbf_comp( cs, tu.Y(), trDepth, previousCbf, cu.ispMode );
-        TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, ( cbfY ? 1 : 0 ) );
-      }
-    }
-    if( area.chromaFormat != CHROMA_400 && ( !cu.ispMode || chromaCbfISP ) )
-    {
-      TU::setCbfAtDepth( tu, COMPONENT_Cb, trDepth, ( chromaCbfs.Cb ? 1 : 0 ) );
-      TU::setCbfAtDepth( tu, COMPONENT_Cr, trDepth, ( chromaCbfs.Cr ? 1 : 0 ) );
-    }
-
-
-    transform_unit( tu, cuCtx, chromaCbfs );
+    transform_unit(tu, cuCtx, partitioner, subTuCounter);
   }
 }
 
 bool CABACReader::cbf_comp( CodingStructure& cs, const CompArea& area, unsigned depth, const bool prevCbf, const bool useISP )
 {
-  const unsigned  ctxId = DeriveCtx::CtxQtCbf( area.compID, depth, prevCbf, useISP && isLuma( area.compID ) );
+  unsigned  ctxId = DeriveCtx::CtxQtCbf(area.compID, prevCbf, useISP && isLuma(area.compID));
   const CtxSet&   ctxSet  = Ctx::QtCbf[ area.compID ];
 
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__QT_CBF, area.size(), area.compID);
 
-  const unsigned  cbf = m_BinDecoder.decodeBin( ctxSet( ctxId ) );
+  unsigned  cbf = 0;
+  if( (area.compID == COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmMode)
+   || (area.compID != COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmModeChroma))
+  {
+    if (area.compID == COMPONENT_Y)
+        ctxId = 1;
+    else if (area.compID == COMPONENT_Cb)
+        ctxId = 1;
+    else
+        ctxId = 2;
+    cbf = m_BinDecoder.decodeBin(ctxSet(ctxId));
+  }
+  else
+  {
+    cbf = m_BinDecoder.decodeBin( ctxSet( ctxId ) );
+  }
 
   DTRACE( g_trace_ctx, D_SYNTAX, "cbf_comp() etype=%d pos=(%d,%d) ctx=%d cbf=%d\n", area.compID, area.x, area.y, ctxId, cbf );
   return cbf;
@@ -2218,7 +2550,7 @@ void CABACReader::mvd_coding( Mv &rMvd )
   {
     if (horAbs > 1)
     {
-      horAbs += exp_golomb_eqprob(1 );
+      horAbs += m_BinDecoder.decodeRemAbsEP(1, 0, MV_BITS - 1);
     }
     if (m_BinDecoder.decodeBinEP())
     {
@@ -2229,51 +2561,139 @@ void CABACReader::mvd_coding( Mv &rMvd )
   {
     if (verAbs > 1)
     {
-      verAbs += exp_golomb_eqprob(1 );
+      verAbs += m_BinDecoder.decodeRemAbsEP(1, 0, MV_BITS - 1);
     }
     if (m_BinDecoder.decodeBinEP())
     {
       verAbs = -verAbs;
     }
   }
-  rMvd = Mv(horAbs, verAbs);
-}
-
-
-//================================================================================
-//  clause 7.3.8.10
-//--------------------------------------------------------------------------------
-//    void  transform_unit      ( tu, cuCtx, chromaCbfs )
-//    void  cu_qp_delta         ( cu )
-//    void  cu_chroma_qp_offset ( cu )
-//================================================================================
-
-void CABACReader::transform_unit( TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& chromaCbfs )
-{
-  CodingUnit& cu         = *tu.cu;
+  rMvd = Mv(horAbs, verAbs);
+  CHECK(!((horAbs >= MVD_MIN) && (horAbs <= MVD_MAX)) || !((verAbs >= MVD_MIN) && (verAbs <= MVD_MAX)), "Illegal MVD value");
+}
+
+
+//================================================================================
+//  clause 7.3.8.10
+//--------------------------------------------------------------------------------
+//    void  transform_unit      ( tu, cuCtx, chromaCbfs )
+//    void  cu_qp_delta         ( cu )
+//    void  cu_chroma_qp_offset ( cu )
+//================================================================================
+void CABACReader::transform_unit( TransformUnit& tu, CUCtx& cuCtx, Partitioner& partitioner, const int subTuCounter)
+{
+  const UnitArea&         area = partitioner.currArea();
+  const unsigned          trDepth = partitioner.currTrDepth;
+
+  CodingStructure&  cs = *tu.cs;
+  CodingUnit&       cu = *tu.cu;
+  ChromaCbfs        chromaCbfs;
+  chromaCbfs.Cb = chromaCbfs.Cr = false;
+
+  const bool chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode;
+
+  // cbf_cb & cbf_cr
+  if (area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && (!cu.isSepTree() || partitioner.chType == CHANNEL_TYPE_CHROMA) && (!cu.ispMode || chromaCbfISP))
+  {
+    const int cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth;
+    {
+      if (!(cu.sbtInfo && tu.noResidual))
+        chromaCbfs.Cb = cbf_comp(cs, area.blocks[COMPONENT_Cb], cbfDepth);
+
+      if (!(cu.sbtInfo && tu.noResidual))
+        chromaCbfs.Cr = cbf_comp(cs, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb);
+    }
+  }
+  else if (cu.isSepTree())
+  {
+    chromaCbfs = ChromaCbfs(false);
+  }
+
+  if (!isChroma(partitioner.chType))
+  {
+    if (!CU::isIntra(cu) && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat))
+    {
+      TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, 1);
+    }
+    else if (cu.sbtInfo && tu.noResidual)
+    {
+      TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, 0);
+    }
+    else if (cu.sbtInfo && !chromaCbfs.sigChroma(area.chromaFormat))
+    {
+      assert(!tu.noResidual);
+      TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, 1);
+    }
+    else
+    {
+      bool lumaCbfIsInferredACT = (cu.colorTransform && cu.predMode == MODE_INTRA && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat));
+      bool lastCbfIsInferred    = lumaCbfIsInferredACT; // ISP and ACT are mutually exclusive
+      bool previousCbf          = false;
+      bool rootCbfSoFar         = false;
+      if (cu.ispMode)
+      {
+        uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> floorLog2(tu.lheight()) : cu.lwidth() >> floorLog2(tu.lwidth());
+        if (subTuCounter == nTus - 1)
+        {
+          TransformUnit* tuPointer = cu.firstTU;
+          for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++)
+          {
+            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, trDepth);
+            tuPointer = tuPointer->next;
+          }
+          if (!rootCbfSoFar)
+          {
+            lastCbfIsInferred = true;
+          }
+        }
+        if (!lastCbfIsInferred)
+        {
+          previousCbf = TU::getPrevTuCbfAtDepth(tu, COMPONENT_Y, trDepth);
+        }
+      }
+      bool cbfY = lastCbfIsInferred ? true : cbf_comp(cs, tu.Y(), trDepth, previousCbf, cu.ispMode);
+      TU::setCbfAtDepth(tu, COMPONENT_Y, trDepth, (cbfY ? 1 : 0));
+    }
+  }
+  if (area.chromaFormat != CHROMA_400 && (!cu.ispMode || chromaCbfISP))
+  {
+    TU::setCbfAtDepth(tu, COMPONENT_Cb, trDepth, (chromaCbfs.Cb ? 1 : 0));
+    TU::setCbfAtDepth(tu, COMPONENT_Cr, trDepth, (chromaCbfs.Cr ? 1 : 0));
+  }
   bool        lumaOnly   = ( cu.chromaFormat == CHROMA_400 || !tu.blocks[COMPONENT_Cb].valid() );
   bool        cbfLuma    = ( tu.cbf[ COMPONENT_Y ] != 0 );
   bool        cbfChroma  = ( lumaOnly ? false : ( chromaCbfs.Cb || chromaCbfs.Cr ) );
 
-  if( cbfLuma || cbfChroma )
+  if( ( cu.lwidth() > 64 || cu.lheight() > 64 || cbfLuma || cbfChroma ) &&
+    (!tu.cu->isSepTree() || isLuma(tu.chType)) )
   {
     if( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded )
     {
-      if (!CS::isDualITree(*tu.cs) || isLuma(tu.chType))
-      {
-        cu_qp_delta(cu, cuCtx.qp, cu.qp);
-        cuCtx.qp = cu.qp;
-        cuCtx.isDQPCoded = true;
-      }
+      cu_qp_delta(cu, cuCtx.qp, cu.qp);
+      cuCtx.qp = cu.qp;
+      cuCtx.isDQPCoded = true;
     }
-    if( cu.cs->slice->getUseChromaQpAdj() && cbfChroma && !cu.transQuantBypass && !cuCtx.isChromaQpAdjCoded )
+  }
+  if (!cu.isSepTree() || isChroma(tu.chType))   // !DUAL_TREE_LUMA
+  {
+    SizeType channelWidth = !cu.isSepTree() ? cu.lwidth() : cu.chromaSize().width;
+    SizeType channelHeight = !cu.isSepTree() ? cu.lheight() : cu.chromaSize().height;
+
+    if (cu.cs->slice->getUseChromaQpAdj() && (channelWidth > 64 || channelHeight > 64 || cbfChroma) && !cuCtx.isChromaQpAdjCoded)
     {
-      cu_chroma_qp_offset( cu );
+      cu_chroma_qp_offset(cu);
       cuCtx.isChromaQpAdjCoded = true;
     }
+  }
+
+  if( !lumaOnly )
+  {
+    joint_cb_cr( tu, ( tu.cbf[COMPONENT_Cb] ? 2 : 0 ) + ( tu.cbf[COMPONENT_Cr] ? 1 : 0 ) );
+  }
+
     if( cbfLuma )
     {
-      residual_coding( tu, COMPONENT_Y );
+      residual_coding( tu, COMPONENT_Y, cuCtx );
     }
     if( !lumaOnly )
     {
@@ -2285,8 +2705,7 @@ void CABACReader::transform_unit( TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& c
         }
         if( tu.cbf[ compID ] )
         {
-          residual_coding( tu, compID );
-        }
+          residual_coding( tu, compID, cuCtx );
       }
     }
   }
@@ -2323,7 +2742,7 @@ void CABACReader::cu_chroma_qp_offset( CodingUnit& cu )
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__CHROMA_QP_ADJUSTMENT, cu.blocks[cu.chType].lumaSize(), CHANNEL_TYPE_CHROMA );
 
   // cu_chroma_qp_offset_flag
-  int       length  = cu.cs->pps->getPpsRangeExtension().getChromaQpOffsetListLen();
+  int       length  = cu.cs->pps->getChromaQpOffsetListLen();
   unsigned  qpAdj   = m_BinDecoder.decodeBin( Ctx::ChromaQpAdjFlag() );
   if( qpAdj && length > 1 )
   {
@@ -2346,20 +2765,41 @@ void CABACReader::cu_chroma_qp_offset( CodingUnit& cu )
 //    void        residual_coding_subblock( coeffCtx )
 //================================================================================
 
-void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID )
+void CABACReader::joint_cb_cr( TransformUnit& tu, const int cbfMask )
+{
+  if ( !tu.cu->slice->getSPS()->getJointCbCrEnabledFlag() )
+  {
+    return;
+  }
+
+  if( ( CU::isIntra( *tu.cu ) && cbfMask ) || ( cbfMask == 3 ) )
+  {
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__JOINT_CB_CR, tu.blocks[COMPONENT_Cr].lumaSize(), CHANNEL_TYPE_CHROMA );
+    tu.jointCbCr = ( m_BinDecoder.decodeBin( Ctx::JointCbCrFlag( cbfMask-1 ) ) ? cbfMask : 0 );
+  }
+}
+
+void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx& cuCtx )
 {
   const CodingUnit& cu = *tu.cu;
   DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode );
 
+  if( compID == COMPONENT_Cr && tu.jointCbCr == 3 )
+    return;
+
   // parse transform skip and explicit rdpcm mode
-  mts_coding         ( tu, compID );
+  ts_flag            ( tu, compID );
   explicit_rdpcm_mode( tu, compID );
 
+  if (tu.mtsIdx[compID] == MTS_SKIP)
+  {
+    residual_codingTS( tu, compID );
+    return;
+  }
 
-#if HEVC_USE_SIGN_HIDING
   // determine sign hiding
-  bool signHiding  = ( cu.cs->slice->getSignDataHidingEnabledFlag() && !cu.transQuantBypass && tu.rdpcm[compID] == RDPCM_OFF );
-  if(  signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx==1 )
+  bool signHiding  = ( cu.cs->picHeader->getSignDataHidingEnabledFlag() && tu.rdpcm[compID] == RDPCM_OFF );
+  if(  signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx[compID] == MTS_SKIP )
   {
     const ChannelType chType    = toChannelType( compID );
     const unsigned    intraMode = PU::getFinalIntraMode( *cu.cs->getPU( tu.blocks[compID].pos(), chType ), chType );
@@ -2368,28 +2808,42 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID )
       signHiding = false;
     }
   }
-#endif
 
   // init coeff coding context
-#if HEVC_USE_SIGN_HIDING
   CoeffCodingContext  cctx    ( tu, compID, signHiding );
-#else
-  CoeffCodingContext  cctx    ( tu, compID );
-#endif
   TCoeff*             coeff   = tu.getCoeffs( compID ).buf;
 
   // parse last coeff position
   cctx.setScanPosLast( last_sig_coeff( cctx, tu, compID ) );
+  if (tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4 )
+  {
+    const int maxLfnstPos = ((tu.blocks[compID].height == 4 && tu.blocks[compID].width == 4) || (tu.blocks[compID].height == 8 && tu.blocks[compID].width == 8)) ? 7 : 15;
+    cuCtx.violatesLfnstConstrained[ toChannelType(compID) ] |= cctx.scanPosLast() > maxLfnstPos;
+  }
+  if( tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4 )
+  {
+    const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA;
+    cuCtx.lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh;
+  }
+#if !JVET_Q0055_MTS_SIGNALLING
+  if( isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) )
+  {
+    cuCtx.violatesMtsCoeffConstraint = true;
+  }
+#endif
 
   // parse subblocks
-  const int stateTransTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 );
+  const int stateTransTab = ( tu.cs->picHeader->getDepQuantEnabledFlag() ? 32040 : 0 );
   int       state         = 0;
 
+  int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
+  cctx.regBinLimit = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4;
 
     for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--)
     {
       cctx.initSubblock       ( subSetId );
-      if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+
+      if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && compID == COMPONENT_Y )
       {
         if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) ) || ( tu.blocks[ compID ].width == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth() ) ) )
         {
@@ -2397,91 +2851,83 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID )
         }
       }
       residual_coding_subblock( cctx, coeff, stateTransTab, state );
+
+#if JVET_Q0055_MTS_SIGNALLING
+      if ( isLuma(compID) && cctx.isSigGroup() && ( cctx.cgPosY() > 3 || cctx.cgPosX() > 3 ) )
+      {
+        cuCtx.violatesMtsCoeffConstraint = true;
+      }
+#endif
     }
 
 }
 
-void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID )
+void CABACReader::ts_flag( TransformUnit& tu, ComponentID compID )
 {
-  const CodingUnit  &cu = *tu.cu;
-  const bool  tsAllowed = TU::isTSAllowed ( tu, compID );
-  const bool mtsAllowed = TU::isMTSAllowed( tu, compID );
-
-  if( !mtsAllowed && !tsAllowed ) return;
+  int tsFlag = ( (tu.cu->bdpcmMode && isLuma(compID)) || (tu.cu->bdpcmModeChroma && isChroma(compID)) ) ? 1 : tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0;
+  int ctxIdx = isLuma(compID) ? 0 : 1;
 
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[compID], compID );
-
-  int symbol = 0;
-  int ctxIdx = 0;
-
-  if( tsAllowed )
+  if( TU::isTSAllowed ( tu, compID ) )
   {
-    ctxIdx = 6;
-    symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) );
-    tu.mtsIdx = 1-symbol; // 1 = TS
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[compID], compID );
+    tsFlag = m_BinDecoder.decodeBin( Ctx::TransformSkipFlag( ctxIdx ) );
   }
+  
+  tu.mtsIdx[compID] = tsFlag ? MTS_SKIP : MTS_DCT2_DCT2;
+  
+  DTRACE(g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag);
+}
 
-  if( tu.mtsIdx != 1 )
-  {
-    if( mtsAllowed )
+void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx )
+{
+  TransformUnit &tu = *cu.firstTU;
+  int        mtsIdx = tu.mtsIdx[COMPONENT_Y]; // Transform skip flag has already been decoded
+  
+  if( CU::isMTSAllowed( cu, COMPONENT_Y ) && !cuCtx.violatesMtsCoeffConstraint &&
+      cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) )
+  {
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[COMPONENT_Y], COMPONENT_Y );
+    int ctxIdx = 0;
+    int symbol = m_BinDecoder.decodeBin( Ctx::MTSIdx(ctxIdx));
+    
+    if( symbol )
     {
-      ctxIdx = std::min( (int)cu.qtDepth, 5 );
-      symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) );
-
-      if( symbol )
+      ctxIdx = 1;
+      mtsIdx = MTS_DST7_DST7; // mtsIdx = 2 -- 4
+      for( int i = 0; i < 3; i++, ctxIdx++ )
       {
-        ctxIdx    = 7;
-        tu.mtsIdx = 2; // mtsIdx = 2 -- 4
-        for( int i = 0; i < 3; i++, ctxIdx++ )
+        symbol  = m_BinDecoder.decodeBin( Ctx::MTSIdx(ctxIdx));
+        mtsIdx += symbol;
+        
+        if( !symbol )
         {
-          symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) );
-          tu.mtsIdx += symbol;
-
-          if( !symbol )
-          {
-            break;
-          }
+          break;
         }
       }
     }
   }
-  DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), tu.mtsIdx );
+  
+  tu.mtsIdx[COMPONENT_Y] = mtsIdx;
+  
+  DTRACE(g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx);
 }
-
+  
 void CABACReader::isp_mode( CodingUnit& cu )
 {
-  if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || cu.ipcm )
+  if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform )
   {
     cu.ispMode = NOT_INTRA_SUBPARTITIONS;
     return;
   }
 
-  const ISPType allowedSplits = CU::canUseISPSplit( cu, getFirstComponentOfChannel( cu.chType ) );
-  if( allowedSplits == NOT_INTRA_SUBPARTITIONS )
-  {
-    cu.ispMode = NOT_INTRA_SUBPARTITIONS;
-    return;
-  }
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__ISP_MODE_FLAG);
 
-  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__ISP_MODE_FLAG );
-  cu.ispMode = NOT_INTRA_SUBPARTITIONS;
-  int symbol = m_BinDecoder.decodeBin( Ctx::ISPMode( 0 ) );
+  int symbol = m_BinDecoder.decodeBin(Ctx::ISPMode(0));
 
   if( symbol )
   {
-    if( allowedSplits == HOR_INTRA_SUBPARTITIONS )
-    {
-      cu.ispMode = HOR_INTRA_SUBPARTITIONS;
-    }
-    else if( allowedSplits == VER_INTRA_SUBPARTITIONS )
-    {
-      cu.ispMode = VER_INTRA_SUBPARTITIONS;
-    }
-    else
-    {
-      RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__ISP_SPLIT_FLAG );
-      cu.ispMode = 1 + m_BinDecoder.decodeBin( Ctx::ISPMode( 1 ) );
-    }
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__ISP_SPLIT_FLAG );
+    cu.ispMode = 1 + m_BinDecoder.decodeBin( Ctx::ISPMode( 1 ) );
   }
   DTRACE( g_trace_ctx, D_SYNTAX, "intra_subPartitions() etype=%d pos=(%d,%d) ispIdx=%d\n", cu.chType, cu.blocks[cu.chType].x, cu.blocks[cu.chType].y, (int)cu.ispMode );
 }
@@ -2492,7 +2938,7 @@ void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID )
 
   tu.rdpcm[compID] = RDPCM_OFF;
 
-  if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx==1 || cu.transQuantBypass ) )
+  if (!CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx[compID] == MTS_SKIP))
   {
     RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE( STATS__EXPLICIT_RDPCM_BITS, tu.blocks[tu.chType].lumaSize() );
 
@@ -2511,6 +2957,51 @@ void CABACReader::explicit_rdpcm_mode( TransformUnit& tu, ComponentID compID )
   }
 }
 
+void CABACReader::residual_lfnst_mode( CodingUnit& cu,  CUCtx& cuCtx  )
+{
+  int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0;
+  if ( (cu.ispMode && !CU::canUseLfnstWithISP( cu, cu.chType ) ) ||
+      (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) ||
+    ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 )
+    || ( cu.blocks[ chIdx ].lumaSize().width > cu.cs->sps->getMaxTbSize() || cu.blocks[ chIdx ].lumaSize().height > cu.cs->sps->getMaxTbSize() )
+    )
+  {
+    return;
+  }
+
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__LFNST );
+
+  if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) )
+  {
+    const bool lumaFlag              = cu.isSepTree() ? (   isLuma( cu.chType ) ? true : false ) : true;
+    const bool chromaFlag            = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true;
+    bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] );
+    const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP;
+    if ((!cuCtx.lfnstLastScanPos && !cu.ispMode) || nonZeroCoeffNonTsCorner8x8 || isTrSkip)
+    {
+      cu.lfnstIdx = 0;
+      return;
+    }
+  }
+  else
+  {
+    cu.lfnstIdx = 0;
+    return;
+  }
+
+
+  unsigned cctx = 0;
+  if ( cu.isSepTree() ) cctx++;
+
+  uint32_t idxLFNST = m_BinDecoder.decodeBin( Ctx::LFNSTIdx( cctx ) );
+  if( idxLFNST )
+  {
+    idxLFNST += m_BinDecoder.decodeBin(Ctx::LFNSTIdx(2));
+  }
+  cu.lfnstIdx = idxLFNST;
+
+  DTRACE( g_trace_ctx, D_SYNTAX, "residual_lfnst_mode() etype=%d pos=(%d,%d) mode=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.lfnstIdx );
+}
 
 int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, ComponentID compID )
 {
@@ -2520,7 +3011,7 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co
   unsigned maxLastPosX = cctx.maxLastPosX();
   unsigned maxLastPosY = cctx.maxLastPosY();
 
-  if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+  if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && compID == COMPONENT_Y )
   {
     maxLastPosX = ( tu.blocks[ compID ].width  == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX;
     maxLastPosY = ( tu.blocks[ compID ].height == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosY;
@@ -2562,13 +3053,6 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co
   }
 
   int blkPos;
-#if HEVC_USE_MDCS
-  if( cctx.scanType() == SCAN_VER )
-  {
-    blkPos = PosLastY + ( PosLastX * cctx.width() );
-  }
-  else
-#endif
   {
     blkPos = PosLastX + ( PosLastY * cctx.width() );
   }
@@ -2624,13 +3108,10 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
 
   //===== decode absolute values =====
   const int inferSigPos   = nextSigPos != cctx.scanPosLast() ? ( cctx.isNotFirst() ? minSubPos : -1 ) : nextSigPos;
-#if HEVC_USE_SIGN_HIDING
   int       firstNZPos    = nextSigPos;
   int       lastNZPos     = -1;
-#endif
   int       numNonZero    =  0;
-  bool      is2x2subblock = ( cctx.log2CGSize() == 2 );
-  int       remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK );
+  int       remRegBins    = cctx.regBinLimit;
   int       firstPosMode2 = minSubPos - 1;
   int       sigBlkPos[ 1 << MLS_CG_SIZE ];
 
@@ -2646,16 +3127,18 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId );
       remRegBins--;
     }
+    else if( nextSigPos != cctx.scanPosLast() )
+    {
+      cctx.sigCtxIdAbs( nextSigPos, coeff, state ); // required for setting variables that are needed for gtx/par context selection
+    }
 
     if( sigFlag )
     {
       uint8_t&  ctxOff = ctxOffset[ nextSigPos - minSubPos ];
       ctxOff           = cctx.ctxOffsetAbs();
       sigBlkPos[ numNonZero++ ] = blkPos;
-#if HEVC_USE_SIGN_HIDING
       firstNZPos = nextSigPos;
       lastNZPos  = std::max<int>( lastNZPos, nextSigPos );
-#endif
 
       RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_gt1 );
       unsigned gt1Flag = m_BinDecoder.decodeBin( cctx.greater1CtxIdAbs(ctxOff) );
@@ -2682,33 +3165,33 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
     state = ( stateTransTable >> ((state<<2)+((coeff[blkPos]&1)<<1)) ) & 3;
   }
   firstPosMode2 = nextSigPos;
+  cctx.regBinLimit = remRegBins;
 
 
   //===== 2nd PASS: Go-rice codes =====
   unsigned ricePar = 0;
   for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- )
   {
+    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 4);
+    ricePar = g_auiGoRiceParsCoeff[sumAll];
     TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ];
     if( tcoeff >= 4 )
     {
       RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_escs );
-      int       rem     = m_BinDecoder.decodeRemAbsEP( ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() );
+      int       rem     = m_BinDecoder.decodeRemAbsEP( ricePar, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar );
       tcoeff += (rem<<1);
-      if( ricePar < 3 && rem > (3<<ricePar)-1 )
-      {
-        ricePar++;
-      }
     }
   }
 
   //===== coeff bypass ====
   for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- )
   {
-    int       sumAll    = cctx.templateAbsSum(scanPos, coeff);
+    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 0);
     int       rice      = g_auiGoRiceParsCoeff                        [sumAll];
-    int       pos0      = g_auiGoRicePosCoeff0[std::max(0, state - 1)][sumAll];
-    int       rem       = m_BinDecoder.decodeRemAbsEP( rice, cctx.extPrec(), cctx.maxLog2TrDRange() );
+    int       pos0      = g_auiGoRicePosCoeff0(state, rice);
+    RExt__DECODER_DEBUG_BIT_STATISTICS_SET(ctype_escs);
+    int       rem       = m_BinDecoder.decodeRemAbsEP( rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
     DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice );
     TCoeff    tcoeff  = ( rem == pos0 ? 0 : rem < pos0 ? rem+1 : rem );
     state = ( stateTransTable >> ((state<<2)+((tcoeff&1)<<1)) ) & 3;
@@ -2716,36 +3199,26 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
     {
       int        blkPos         = cctx.blockPos( scanPos );
       sigBlkPos[ numNonZero++ ] = blkPos;
-#if HEVC_USE_SIGN_HIDING
+      firstNZPos = scanPos;
       lastNZPos  = std::max<int>( lastNZPos, scanPos );
-#endif
       coeff[blkPos] = tcoeff;
     }
   }
 
   //===== decode sign's =====
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__SIGN_BIT, Size( cctx.width(), cctx.height() ), cctx.compID() );
-#if HEVC_USE_SIGN_HIDING
   const unsigned  numSigns    = ( cctx.hideSign( firstNZPos, lastNZPos ) ? numNonZero - 1 : numNonZero );
   unsigned        signPattern = m_BinDecoder.decodeBinsEP( numSigns ) << ( 32 - numSigns );
-#else
-  unsigned        signPattern = m_BinDecoder.decodeBinsEP( numNonZero ) << ( 32 - numNonZero );
-#endif
 
   //===== set final coefficents =====
   int sumAbs = 0;
-#if HEVC_USE_SIGN_HIDING
   for( unsigned k = 0; k < numSigns; k++ )
-#else
-  for( unsigned k = 0; k < numNonZero; k++ )
-#endif
   {
     int AbsCoeff          = coeff[ sigBlkPos[ k ] ];
     sumAbs               += AbsCoeff;
     coeff[ sigBlkPos[k] ] = ( signPattern & ( 1u << 31 ) ? -AbsCoeff : AbsCoeff );
     signPattern         <<= 1;
   }
-#if HEVC_USE_SIGN_HIDING
   if( numNonZero > numSigns )
   {
     int k                 = numSigns;
@@ -2753,9 +3226,199 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
     sumAbs               += AbsCoeff;
     coeff[ sigBlkPos[k] ] = ( sumAbs & 1 ? -AbsCoeff : AbsCoeff );
   }
+}
+
+void CABACReader::residual_codingTS( TransformUnit& tu, ComponentID compID )
+{
+  DTRACE( g_trace_ctx, D_SYNTAX, "residual_codingTS() etype=%d pos=(%d,%d) size=%dx%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height );
+
+  // init coeff coding context
+  CoeffCodingContext  cctx    ( tu, compID, false, isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma);
+  TCoeff*             coeff   = tu.getCoeffs( compID ).buf;
+  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
+  cctx.setNumCtxBins(maxCtxBins);
+
+  for( int subSetId = 0; subSetId <= ( cctx.maxNumCoeff() - 1 ) >> cctx.log2CGSize(); subSetId++ )
+  {
+    cctx.initSubblock         ( subSetId );
+    residual_coding_subblockTS( cctx, coeff );
+  }
+}
+
+void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff* coeff )
+{
+  // NOTE: All coefficients of the subblock must be set to zero before calling this function
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  CodingStatisticsClassType ctype_group ( STATS__CABAC_BITS__SIG_COEFF_GROUP_FLAG,  cctx.width(), cctx.height(), cctx.compID() );
+#if TR_ONLY_COEFF_STATS
+  CodingStatisticsClassType ctype_map   ( STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG_TS, cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_par   ( STATS__CABAC_BITS__PAR_FLAG_TS,           cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_gt1   ( STATS__CABAC_BITS__GT1_FLAG_TS,           cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_gt2   ( STATS__CABAC_BITS__GT2_FLAG_TS,           cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_escs  ( STATS__CABAC_BITS__ESCAPE_BITS_TS,        cctx.width(), cctx.height(), cctx.compID() );
+#else
+  CodingStatisticsClassType ctype_map   ( STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG,    cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_par   ( STATS__CABAC_BITS__PAR_FLAG,              cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_gt1   ( STATS__CABAC_BITS__GT1_FLAG,              cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_gt2   ( STATS__CABAC_BITS__GT2_FLAG,              cctx.width(), cctx.height(), cctx.compID() );
+  CodingStatisticsClassType ctype_escs  ( STATS__CABAC_BITS__ESCAPE_BITS,           cctx.width(), cctx.height(), cctx.compID() );
+#endif
+
+#endif
+
+  //===== init =====
+  const int   minSubPos   = cctx.maxSubPos();
+  int         firstSigPos = cctx.minSubPos();
+  int         nextSigPos  = firstSigPos;
+  unsigned    signPattern = 0;
+
+  //===== decode significant_coeffgroup_flag =====
+  RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_group );
+  bool sigGroup = cctx.isLastSubSet() && cctx.noneSigGroup();
+  if( !sigGroup )
+  {
+      sigGroup = m_BinDecoder.decodeBin( cctx.sigGroupCtxId( true ) );
+      DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", sigGroup, cctx.sigGroupCtxId() );
+  }
+  if( sigGroup )
+  {
+    cctx.setSigGroup();
+  }
+  else
+  {
+    return;
+  }
+
+  //===== decode absolute values =====
+  const int inferSigPos   = minSubPos;
+  int       numNonZero    =  0;
+  int       sigBlkPos[ 1 << MLS_CG_SIZE ];
+
+  int lastScanPosPass1 = -1;
+  int lastScanPosPass2 = -1;
+  for (; nextSigPos <= minSubPos && cctx.numCtxBins() >= 4; nextSigPos++)
+  {
+    int      blkPos     = cctx.blockPos( nextSigPos );
+    unsigned sigFlag    = ( !numNonZero && nextSigPos == inferSigPos );
+    if( !sigFlag )
+    {
+      RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_map );
+        const unsigned sigCtxId = cctx.sigCtxIdAbsTS( nextSigPos, coeff );
+        sigFlag = m_BinDecoder.decodeBin( sigCtxId );
+        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId );
+        cctx.decimateNumCtxBins(1);
+    }
+
+    if( sigFlag )
+    {
+      //===== decode sign's =====
+#if TR_ONLY_COEFF_STATS
+      RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2(STATS__CABAC_BITS__SIGN_BIT_TS, Size(cctx.width(), cctx.height()), cctx.compID());
+#else
+      RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__SIGN_BIT, Size( cctx.width(), cctx.height() ), cctx.compID() );
 #endif
+      int sign;
+        const unsigned signCtxId = cctx.signCtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
+        sign = m_BinDecoder.decodeBin(signCtxId);
+        cctx.decimateNumCtxBins(1);
+
+      signPattern += ( sign << numNonZero );
+
+      sigBlkPos[numNonZero++] = blkPos;
+
+      RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_gt1 );
+      unsigned gt1Flag;
+      const unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
+        gt1Flag = m_BinDecoder.decodeBin(gt1CtxId);
+        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_gt1_flag() bin=%d ctx=%d\n", gt1Flag, gt1CtxId );
+        cctx.decimateNumCtxBins(1);
+
+      unsigned parFlag = 0;
+      if( gt1Flag )
+      {
+        RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_par );
+          parFlag = m_BinDecoder.decodeBin( cctx.parityCtxIdAbsTS() );
+          DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_par_flag() bin=%d ctx=%d\n", parFlag, cctx.parityCtxIdAbsTS() );
+          cctx.decimateNumCtxBins(1);
+      }
+      coeff[ blkPos ] = (sign ? -1 : 1 ) * (1 + parFlag + gt1Flag);
+    }
+    lastScanPosPass1 = nextSigPos;
+  }
+
+  int cutoffVal = 2;
+  const int numGtBins = 4;
+
+  //===== 2nd PASS: gt2 =====
+  for (int scanPos = firstSigPos; scanPos <= minSubPos && cctx.numCtxBins() >= 4; scanPos++)
+  {
+    TCoeff& tcoeff = coeff[cctx.blockPos(scanPos)];
+    cutoffVal = 2;
+    for (int i = 0; i < numGtBins; i++)
+    {
+      if( tcoeff < 0)
+      {
+        tcoeff = -tcoeff;
+      }
+       if (tcoeff >= cutoffVal)
+       {
+          RExt__DECODER_DEBUG_BIT_STATISTICS_SET(ctype_gt2);
+          unsigned gt2Flag;
+            gt2Flag = m_BinDecoder.decodeBin(cctx.greaterXCtxIdAbsTS(cutoffVal >> 1));
+            tcoeff += (gt2Flag << 1);
+            DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt%d_flag() bin=%d ctx=%d sp=%d coeff=%d\n", i, gt2Flag, cctx.greaterXCtxIdAbsTS(cutoffVal >> 1), scanPos, tcoeff);
+            cctx.decimateNumCtxBins(1);
+       }
+       cutoffVal += 2;
+    }
+    lastScanPosPass2 = scanPos;
+  }
+  //===== 3rd PASS: Go-rice codes =====
+  for( int scanPos = firstSigPos; scanPos <= minSubPos; scanPos++ )
+  {
+    TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ];
+    RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_escs );
+
+    cutoffVal = (scanPos <= lastScanPosPass2 ? 10 : (scanPos <= lastScanPosPass1 ? 2 : 0));
+    if (tcoeff < 0)
+    {
+      tcoeff = -tcoeff;
+    }
+    if( tcoeff >= cutoffVal )
+    {
+      int       rice = cctx.templateAbsSumTS( scanPos, coeff );
+      int       rem  = m_BinDecoder.decodeRemAbsEP( rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
+      DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_rem_val() bin=%d ctx=%d sp=%d\n", rem, rice, scanPos );
+      tcoeff += (scanPos <= lastScanPosPass1) ? (rem << 1) : rem;
+      if (tcoeff && scanPos > lastScanPosPass1)
+      {
+        int      blkPos = cctx.blockPos(scanPos);
+        int sign = m_BinDecoder.decodeBinEP();
+        signPattern += (sign << numNonZero);
+        sigBlkPos[numNonZero++] = blkPos;
+      }
+    }
+    if (!cctx.bdpcm() && cutoffVal)
+    {
+      if (tcoeff > 0)
+      {
+        int rightPixel, belowPixel;
+        cctx.neighTS(rightPixel, belowPixel, scanPos, coeff);
+        tcoeff = cctx.decDeriveModCoeff(rightPixel, belowPixel, tcoeff);
+      }
+    }
+  }
+
+  //===== set final coefficents =====
+  for( unsigned k = 0; k < numNonZero; k++ )
+  {
+    int AbsCoeff          = coeff[ sigBlkPos[ k ] ];
+    coeff[ sigBlkPos[k] ] = ( signPattern & 1 ? -AbsCoeff : AbsCoeff );
+    signPattern         >>= 1;
+  }
 }
 
+
 //================================================================================
 //  clause 7.3.8.12
 //--------------------------------------------------------------------------------
@@ -2839,3 +3502,64 @@ unsigned CABACReader::exp_golomb_eqprob( unsigned count )
   return symbol;
 }
 
+unsigned CABACReader::code_unary_fixed( unsigned ctxId, unsigned unary_max, unsigned fixed )
+{
+  unsigned idx;
+  bool unary = m_BinDecoder.decodeBin( ctxId );
+  if( unary )
+  {
+    idx = unary_max_eqprob( unary_max );
+  }
+  else
+  {
+    idx = unary_max + 1 + m_BinDecoder.decodeBinsEP( fixed );
+  }
+  return idx;
+}
+
+void CABACReader::mip_flag( CodingUnit& cu )
+{
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__OTHER );
+
+  if( !cu.Y().valid() )
+  {
+    return;
+  }
+  if( !cu.cs->sps->getUseMIP() )
+  {
+    cu.mipFlag = false;
+    return;
+  }
+
+
+  unsigned ctxId = DeriveCtx::CtxMipFlag( cu );
+  cu.mipFlag = m_BinDecoder.decodeBin( Ctx::MipFlag( ctxId ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "mip_flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.mipFlag ? 1 : 0 );
+}
+
+void CABACReader::mip_pred_modes( CodingUnit &cu )
+{
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__OTHER );
+
+  if( !cu.Y().valid() )
+  {
+    return;
+  }
+  for( auto &pu : CU::traversePUs( cu ) )
+  {
+    mip_pred_mode( pu );
+  }
+}
+
+void CABACReader::mip_pred_mode( PredictionUnit &pu )
+{
+  pu.mipTransposedFlag = bool(m_BinDecoder.decodeBinEP());
+
+  uint32_t mipMode;
+  const int numModes = getNumModesMip( pu.Y() );
+  xReadTruncBinCode( mipMode, numModes );
+  pu.intraDir[CHANNEL_TYPE_LUMA] = mipMode;
+  CHECKD( pu.intraDir[CHANNEL_TYPE_LUMA] < 0 || pu.intraDir[CHANNEL_TYPE_LUMA] >= numModes, "Invalid MIP mode" );
+
+  DTRACE( g_trace_ctx, D_SYNTAX, "mip_pred_mode() pos=(%d,%d) mode=%d transposed=%d\n", pu.lumaPos().x, pu.lumaPos().y, pu.intraDir[CHANNEL_TYPE_LUMA], pu.mipTransposedFlag ? 1 : 0 );
+}
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 34559ae1cef1c92edd6119465c3f0ae0d20c8ffe..132c50232a229395cbcde5d257acb71b7f99eec0 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,7 @@
 class CABACReader
 {
 public:
-  CABACReader(BinDecoderBase& binDecoder) : shareStateDec(0), m_BinDecoder(binDecoder), m_Bitstream(0) {}
+  CABACReader(BinDecoderBase& binDecoder) : m_BinDecoder(binDecoder), m_Bitstream(0) {}
   virtual ~CABACReader() {}
 
 public:
@@ -63,23 +63,25 @@ public:
   void        remaining_bytes           ( bool                          noTrailingBytesExpected );
 
   // coding tree unit (clause 7.3.8.2)
-  bool        coding_tree_unit          ( CodingStructure&              cs,     const UnitArea& area,     int (&qps)[2],   unsigned  ctuRsAddr );
+  void        coding_tree_unit          ( CodingStructure&              cs,     const UnitArea& area,     int (&qps)[2],   unsigned  ctuRsAddr );
 
   // sao (clause 7.3.8.3)
   void        sao                       ( CodingStructure&              cs,     unsigned        ctuRsAddr );
 
+  void        readAlfCtuFilterIndex(CodingStructure&              cs, unsigned        ctuRsAddr);
+
   // coding (quad)tree (clause 7.3.8.4)
-  bool        coding_tree               ( CodingStructure&              cs,     Partitioner&    pm,       CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr);
+  void        coding_tree               ( CodingStructure&              cs,     Partitioner&    pm,       CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr);
   PartSplit   split_cu_mode             ( CodingStructure&              cs,     Partitioner&    pm );
+  ModeType    mode_constraint           ( CodingStructure&              cs,     Partitioner&    pm,       const PartSplit splitMode );
 
   // coding unit (clause 7.3.8.5)
-  bool        coding_unit               ( CodingUnit&                   cu,     Partitioner&    pm,       CUCtx& cuCtx );
-  void        cu_transquant_bypass_flag ( CodingUnit&                   cu );
+  void        coding_unit               ( CodingUnit&                   cu,     Partitioner&    pm,       CUCtx& cuCtx );
   void        cu_skip_flag              ( CodingUnit&                   cu );
   void        pred_mode                 ( CodingUnit&                   cu );
-  void        pcm_flag                  ( CodingUnit&                   cu,     Partitioner&    pm );
+  void        bdpcm_mode                ( CodingUnit&                   cu,     const ComponentID compID );
   void        cu_pred_data              ( CodingUnit&                   cu );
-  void        cu_gbi_flag               ( CodingUnit&                   cu );
+  void        cu_bcw_flag               ( CodingUnit&                   cu );
   void        extend_ref_line           (CodingUnit&                     cu);
   void        intra_luma_pred_modes     ( CodingUnit&                   cu );
   void        intra_chroma_pred_modes   ( CodingUnit&                   cu );
@@ -87,9 +89,14 @@ public:
   void        intra_chroma_pred_mode    ( PredictionUnit&               pu );
   void        cu_residual               ( CodingUnit&                   cu,     Partitioner&    pm,       CUCtx& cuCtx );
   void        rqt_root_cbf              ( CodingUnit&                   cu );
+  void        adaptive_color_transform(CodingUnit&             cu);
   void        sbt_mode                  ( CodingUnit&                   cu );
-  bool        end_of_ctu                ( CodingUnit&                   cu,     CUCtx&          cuCtx );
-
+  void        end_of_ctu                ( CodingUnit&                   cu,     CUCtx&          cuCtx );
+  void        mip_flag                  ( CodingUnit&                   cu );
+  void        mip_pred_modes            ( CodingUnit&                   cu );
+  void        mip_pred_mode             ( PredictionUnit&               pu );
+  void        cu_palette_info           ( CodingUnit&                   cu,     ComponentID     compBegin, uint32_t numComp, CUCtx& cuCtx );
+  void        cuPaletteSubblockInfo     ( CodingUnit&                   cu,     ComponentID     compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType );
   // prediction unit (clause 7.3.8.6)
   void        prediction_unit           ( PredictionUnit&               pu,     MergeCtx&       mrgCtx );
   void        merge_flag                ( PredictionUnit&               pu );
@@ -103,33 +110,34 @@ public:
   void        inter_pred_idc            ( PredictionUnit&               pu );
   void        ref_idx                   ( PredictionUnit&               pu,     RefPicList      eRefList );
   void        mvp_flag                  ( PredictionUnit&               pu,     RefPicList      eRefList );
-  void        MHIntra_flag              ( PredictionUnit&               pu );
-  void        MHIntra_luma_pred_modes   ( CodingUnit&                   cu );
-  void        triangle_mode             ( CodingUnit&                   cu );
+  void        Ciip_flag              ( PredictionUnit&               pu );
   void        smvd_mode              ( PredictionUnit&               pu );
 
-  // pcm samples (clause 7.3.8.7)
-  void        pcm_samples               ( TransformUnit&                tu );
 
   // transform tree (clause 7.3.8.8)
-  void        transform_tree            ( CodingStructure&              cs,     Partitioner&    pm,       CUCtx& cuCtx,  ChromaCbfs& chromaCbfs, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 );
-  bool        cbf_comp                  ( CodingStructure&              cs,     const CompArea& area,     unsigned depth, const bool prevCbCbf = false, const bool useISP = false );
+  void        transform_tree            ( CodingStructure&              cs, Partitioner&    pm, CUCtx& cuCtx, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 );
+  bool        cbf_comp                  ( CodingStructure&              cs,     const CompArea& area,     unsigned depth, const bool prevCbf = false, const bool useISP = false );
 
   // mvd coding (clause 7.3.8.9)
   void        mvd_coding                ( Mv &rMvd );
 
   // transform unit (clause 7.3.8.10)
-  void        transform_unit            ( TransformUnit&                tu,     CUCtx&          cuCtx,  ChromaCbfs& chromaCbfs );
+  void        transform_unit            ( TransformUnit&                tu,     CUCtx&          cuCtx, Partitioner& pm,        const int subTuCounter = -1 );
   void        cu_qp_delta               ( CodingUnit&                   cu,     int             predQP, int8_t& qp );
   void        cu_chroma_qp_offset       ( CodingUnit&                   cu );
 
   // residual coding (clause 7.3.8.11)
-  void        residual_coding           ( TransformUnit&                tu,     ComponentID     compID );
-  void        mts_coding                ( TransformUnit&                tu,     ComponentID     compID );
+  void        residual_coding           ( TransformUnit&                tu,     ComponentID     compID, CUCtx& cuCtx );
+  void        ts_flag                   ( TransformUnit&                tu,     ComponentID     compID );
+  void        mts_idx                   ( CodingUnit&                   cu,     CUCtx&          cuCtx  );
+  void        residual_lfnst_mode       ( CodingUnit&                   cu,     CUCtx&          cuCtx  );
   void        isp_mode                  ( CodingUnit&                   cu );
   void        explicit_rdpcm_mode       ( TransformUnit&                tu,     ComponentID     compID );
   int         last_sig_coeff            ( CoeffCodingContext&           cctx,   TransformUnit& tu, ComponentID   compID );
   void        residual_coding_subblock  ( CoeffCodingContext&           cctx,   TCoeff*         coeff, const int stateTransTable, int& state );
+  void        residual_codingTS         ( TransformUnit&                tu,     ComponentID     compID );
+  void        residual_coding_subblockTS( CoeffCodingContext&           cctx,   TCoeff*         coeff  );
+  void        joint_cb_cr               ( TransformUnit&                tu,     const int cbfMask );
 
   // cross component prediction (clause 7.3.8.12)
   void        cross_comp_pred           ( TransformUnit&                tu,     ComponentID     compID );
@@ -139,15 +147,17 @@ private:
   unsigned    unary_max_eqprob          (                                   unsigned maxSymbol );
   unsigned    exp_golomb_eqprob         ( unsigned count );
   unsigned    get_num_bits_read         () { return m_BinDecoder.getNumBitsRead(); }
+  unsigned    code_unary_fixed          ( unsigned ctxId, unsigned unary_max, unsigned fixed );
 
   void        xReadTruncBinCode(uint32_t& symbol, uint32_t maxSymbol);
+  void        parseScanRotationModeFlag ( CodingUnit& cu,           ComponentID compBegin );
+  void        xDecodePLTPredIndicator   ( CodingUnit& cu,           uint32_t maxPLTSize,   ComponentID compBegin );
+  void        xAdjustPLTIndex           ( CodingUnit& cu,           Pel curLevel,          uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin );
 public:
-  int         shareStateDec;
-  Position    shareParentPos;
-  Size        shareParentSize;
 private:
   BinDecoderBase& m_BinDecoder;
   InputBitstream* m_Bitstream;
+  ScanElement*    m_scanOrder;
 };
 
 
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index 04b10286a175f9b0967cbd2e742ee4cb0414fc6e..3b96128195c8623b5fff067cdcecb0486b61cb18 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -103,12 +103,12 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
 {
 
   const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree( cs ) ? 2 : 1;
-  if (!cs.pcv->isEncoder)
+
+  if (cs.resetIBCBuffer)
   {
-    m_shareStateDec = NO_SHARE;
+    m_pcInterPred->resetIBCBuffer(cs.pcv->chrFormat, cs.slice->getSPS()->getMaxCUHeight());
+    cs.resetIBCBuffer = false;
   }
-  bool sharePrepareCondition = ((!cs.pcv->isEncoder) && (!(cs.slice->isIntra()) || cs.slice->getSPS()->getIBCFlag()));
-
   for( int ch = 0; ch < maxNumChannelType; ch++ )
   {
     const ChannelType chType = ChannelType( ch );
@@ -117,23 +117,15 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
 
     for( auto &currCU : cs.traverseCUs( CS::getArea( cs, ctuArea, chType ), chType ) )
     {
-      if(sharePrepareCondition)
+      if(currCU.Y().valid())
       {
-        if ((currCU.shareParentPos.x >= 0) && (!(currCU.shareParentPos.x == prevTmpPos.x && currCU.shareParentPos.y == prevTmpPos.y)))
-        {
-          m_shareStateDec = GEN_ON_SHARED_BOUND;
-          cs.motionLut.lutShare = cs.motionLut.lut;
-          cs.motionLut.lutShareIbc = cs.motionLut.lutIbc;
-        }
-
-        if (currCU.shareParentPos.x < 0)
+        const int vSize = cs.slice->getSPS()->getMaxCUHeight() > 64 ? 64 : cs.slice->getSPS()->getMaxCUHeight();
+        if((currCU.Y().x % vSize) == 0 && (currCU.Y().y % vSize) == 0)
         {
-          m_shareStateDec = 0;
+          m_pcInterPred->resetVPDUforIBC(cs.pcv->chrFormat, cs.slice->getSPS()->getMaxCUHeight(), vSize, currCU.Y().x  + g_IBCBufferSize / cs.slice->getSPS()->getMaxCUHeight() / 2, currCU.Y().y);
         }
-        prevTmpPos = currCU.shareParentPos;
       }
-      cs.chType = chType;
-      if (currCU.predMode != MODE_INTRA && currCU.Y().valid())
+      if (currCU.predMode != MODE_INTRA && currCU.predMode != MODE_PLT && currCU.Y().valid())
       {
         xDeriveCUMV(currCU);
       }
@@ -143,6 +135,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
       case MODE_IBC:
         xReconInter( currCU );
         break;
+      case MODE_PLT:
       case MODE_INTRA:
         xReconIntraQT( currCU );
         break;
@@ -151,10 +144,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
         break;
       }
 
-      if( CU::isLosslessCoded( currCU ) && !currCU.ipcm )
-      {
-        xFillPCMBuffer( currCU );
-      }
+      m_pcInterPred->xFillIBCBuffer(currCU);
 
       DTRACE_BLOCK_REC( cs.picture->getRecoBuf( currCU ), currCU, currCU.predMode );
     }
@@ -184,11 +174,31 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
 
   const PredictionUnit &pu  = *tu.cs->getPU( area.pos(), chType );
   const uint32_t uiChFinalMode  = PU::getFinalIntraMode( pu, chType );
+  PelBuf pReco              = cs.getRecoBuf(area);
 
   //===== init availability pattern =====
-
-  const bool bUseFilteredPredictions = IntraPrediction::useFilteredIntraRefSamples( compID, pu, true, tu );
-  m_pcIntraPred->initIntraPatternChType( *tu.cu, area, bUseFilteredPredictions );
+  bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu, compID);
+  bool firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, compID, area);
+  CompArea areaPredReg(COMPONENT_Y, tu.chromaFormat, area);
+  if (tu.cu->ispMode && isLuma(compID))
+  {
+    if (predRegDiffFromTB)
+    {
+      if (firstTBInPredReg)
+      {
+        CU::adjustPredArea(areaPredReg);
+        m_pcIntraPred->initIntraPatternChTypeISP(*tu.cu, areaPredReg, pReco);
+      }
+    }
+    else
+    {
+      m_pcIntraPred->initIntraPatternChTypeISP(*tu.cu, area, pReco);
+    }
+  }
+  else
+  {
+    m_pcIntraPred->initIntraPatternChType(*tu.cu, area);
+  }
 
   //===== get prediction signal =====
   if( compID != COMPONENT_Y && PU::isLMCMode( uiChFinalMode ) )
@@ -199,18 +209,32 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
   }
   else
   {
-    m_pcIntraPred->predIntraAng( compID, piPred, pu, bUseFilteredPredictions );
+    if( PU::isMIP( pu, chType ) )
+    {
+      m_pcIntraPred->initIntraMip( pu, area );
+      m_pcIntraPred->predIntraMip( compID, piPred, pu );
+    }
+    else
+    {
+      if (predRegDiffFromTB)
+      {
+        if (firstTBInPredReg)
+        {
+          PelBuf piPredReg = cs.getPredBuf(areaPredReg);
+          m_pcIntraPred->predIntraAng(compID, piPredReg, pu);
+        }
+      }
+      else
+        m_pcIntraPred->predIntraAng(compID, piPred, pu);
+    }
   }
   const Slice           &slice = *cs.slice;
-  bool flag = slice.getReshapeInfo().getUseSliceReshaper() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag()));
-  if (flag && slice.getReshapeInfo().getSliceReshapeChromaAdj() && (compID != COMPONENT_Y))
+  bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag()));
+  if (flag && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (compID != COMPONENT_Y) && (tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr]))
   {
     const Area area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size()));
     const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area);
-    PelBuf piPredY;
-    piPredY = cs.picture->getPredBuf(areaY);
-    const Pel avgLuma = piPredY.computeAvg();
-    int adj = m_pcReshape->calculateChromaAdj(avgLuma);
+    int adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY);
     tu.setChromaAdj(adj);
   }
   //===== inverse transform =====
@@ -218,6 +242,24 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
 
   const QpParam cQP( tu, compID );
 
+  if( tu.jointCbCr && isChroma(compID) )
+  {
+    if( compID == COMPONENT_Cb )
+    {
+      PelBuf resiCr = cs.getResiBuf( tu.blocks[ COMPONENT_Cr ] );
+      if( tu.jointCbCr >> 1 )
+      {
+        m_pcTrQuant->invTransformNxN( tu, COMPONENT_Cb, piResi, cQP );
+      }
+      else
+      {
+        const QpParam qpCr( tu, COMPONENT_Cr );
+        m_pcTrQuant->invTransformNxN( tu, COMPONENT_Cr, resiCr, qpCr );
+      }
+      m_pcTrQuant->invTransformICT( tu, piResi, resiCr );
+    }
+  }
+  else
   if( TU::getCbf( tu, compID ) )
   {
     m_pcTrQuant->invTransformNxN( tu, compID, piResi, cQP );
@@ -229,7 +271,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
 
   //===== reconstruction =====
   flag = flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
-  if (flag && TU::getCbf(tu, compID) && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj())
+  if (flag && (TU::getCbf(tu, compID) || tu.jointCbCr) && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
   {
     piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID));
   }
@@ -238,7 +280,6 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
     CrossComponentPrediction::crossComponentPrediction( tu, compID, cs.getResiBuf( tu.Y() ), piResi, piResi, true );
   }
 
-  PelBuf pReco = cs.getRecoBuf( area );
 
   if( !tu.cu->ispMode || !isLuma( compID ) )
   {
@@ -253,7 +294,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
   CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
   PelBuf tmpPred;
 #endif
-  if (slice.getReshapeInfo().getUseSliceReshaper() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
+  if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
   {
 #if REUSE_CU_RESULTS
     {
@@ -270,7 +311,7 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
 #if !KEEP_PRED_AND_RESI_SIGNALS
   pReco.copyFrom( piPred );
 #endif
-  if (slice.getReshapeInfo().getUseSliceReshaper() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
+  if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
   {
 #if REUSE_CU_RESULTS
     {
@@ -287,72 +328,248 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
 #endif
 }
 
-void DecCu::xReconIntraQT( CodingUnit &cu )
+void DecCu::xIntraRecACTBlk(TransformUnit& tu)
 {
-  if( cu.ipcm )
+  CodingStructure      &cs = *tu.cs;
+  const PredictionUnit &pu = *tu.cs->getPU(tu.blocks[COMPONENT_Y], CHANNEL_TYPE_LUMA);
+  const Slice          &slice = *cs.slice;
+
+  CHECK(!tu.Y().valid() || !tu.Cb().valid() || !tu.Cr().valid(), "Invalid TU");
+  CHECK(&pu != tu.cu->firstPU, "wrong PU fetch");
+  CHECK(tu.cu->ispMode, "adaptive color transform cannot be applied to ISP");
+  CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
+
+  bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag()));
+  if (flag && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr]))
   {
-    xReconPCM( *cu.firstTU );
-    return;
+    const Area      area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size()));
+    const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area);
+    int            adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY);
+    tu.setChromaAdj(adj);
   }
 
-  const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat );
+  for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++)
+  {
+    ComponentID          compID = (ComponentID)i;
+    const CompArea       &area = tu.blocks[compID];
+    const ChannelType    chType = toChannelType(compID);
 
-  for( uint32_t chType = CHANNEL_TYPE_LUMA; chType < numChType; chType++ )
+    PelBuf piPred = cs.getPredBuf(area);
+    m_pcIntraPred->initIntraPatternChType(*tu.cu, area);
+    if (PU::isMIP(pu, chType))
+    {
+      m_pcIntraPred->initIntraMip(pu, area);
+      m_pcIntraPred->predIntraMip(compID, piPred, pu);
+    }
+    else
+    {
+      m_pcIntraPred->predIntraAng(compID, piPred, pu);
+    }
+
+    PelBuf piResi = cs.getResiBuf(area);
+
+    QpParam cQP(tu, compID);
+    for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+    {
+      cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+      cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6;
+      cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6;
+    }
+
+    if (tu.jointCbCr && isChroma(compID))
+    {
+      if (compID == COMPONENT_Cb)
+      {
+        PelBuf resiCr = cs.getResiBuf(tu.blocks[COMPONENT_Cr]);
+        if (tu.jointCbCr >> 1)
+        {
+          m_pcTrQuant->invTransformNxN(tu, COMPONENT_Cb, piResi, cQP);
+        }
+        else
+        {
+          QpParam qpCr(tu, COMPONENT_Cr);
+          for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+          {
+            qpCr.Qps[qpIdx] = qpCr.Qps[qpIdx] + DELTA_QP_FOR_Co;
+            qpCr.pers[qpIdx] = qpCr.Qps[qpIdx] / 6;
+            qpCr.rems[qpIdx] = qpCr.Qps[qpIdx] % 6;
+          }
+
+          m_pcTrQuant->invTransformNxN(tu, COMPONENT_Cr, resiCr, qpCr);
+        }
+        m_pcTrQuant->invTransformICT(tu, piResi, resiCr);
+      }
+    }
+    else
+    {
+      if (TU::getCbf(tu, compID))
+      {
+        m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
+      }
+      else
+      {
+        piResi.fill(0);
+      }
+    }
+
+    flag = flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
+    if (flag && (TU::getCbf(tu, compID) || tu.jointCbCr) && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
+    {
+      piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID));
+    }
+
+    cs.setDecomp(area);
+  }
+
+  cs.getResiBuf(tu).colorSpaceConvert(cs.getResiBuf(tu), false);
+
+  for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++)
   {
-    if( cu.blocks[chType].valid() )
+    ComponentID          compID = (ComponentID)i;
+    const CompArea       &area = tu.blocks[compID];
+
+    PelBuf piPred = cs.getPredBuf(area);
+    PelBuf piResi = cs.getResiBuf(area);
+    PelBuf piReco = cs.getRecoBuf(area);
+
+    PelBuf tmpPred;
+    if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
     {
-      xIntraRecQT( cu, ChannelType( chType ) );
+      CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+      tmpPred = m_tmpStorageLCU->getBuf(tmpArea);
+      tmpPred.copyFrom(piPred);
+    }
+
+    piPred.reconstruct(piPred, piResi, tu.cu->cs->slice->clpRng(compID));
+    piReco.copyFrom(piPred);
+
+    if (slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
+    {
+      piPred.copyFrom(tmpPred);
+    }
+
+    if (cs.pcv->isEncoder)
+    {
+      cs.picture->getRecoBuf(area).copyFrom(piReco);
+      cs.picture->getPredBuf(area).copyFrom(piPred);
     }
   }
 }
 
-/** Function for deriving reconstructed luma/chroma samples of a PCM mode CU.
-* \param pcCU pointer to current CU
-* \param uiPartIdx part index
-* \param piPCM pointer to PCM code arrays
-* \param piReco pointer to reconstructed sample arrays
-* \param uiStride stride of reconstructed sample arrays
-* \param uiWidth CU width
-* \param uiHeight CU height
-* \param compID colour component ID
-* \returns void
-*/
-void DecCu::xDecodePCMTexture(TransformUnit &tu, const ComponentID compID)
+void DecCu::xReconIntraQT( CodingUnit &cu )
 {
-  const CompArea &area         = tu.blocks[compID];
-        PelBuf piPicReco       = tu.cs->getRecoBuf( area );
-  const CPelBuf piPicPcm       = tu.getPcmbuf(compID);
-  const SPS &sps               = *tu.cs->sps;
-  const uint32_t uiPcmLeftShiftBit = sps.getBitDepth(toChannelType(compID)) - sps.getPCMBitDepth(toChannelType(compID));
 
-  for (uint32_t uiY = 0; uiY < area.height; uiY++)
+  if (CU::isPLT(cu))
   {
-    for (uint32_t uiX = 0; uiX < area.width; uiX++)
+    if (cu.isSepTree())
+    {
+      if (cu.chType == CHANNEL_TYPE_LUMA)
+      {
+        xReconPLT(cu, COMPONENT_Y, 1);
+      }
+      if (cu.chromaFormat != CHROMA_400 && (cu.chType == CHANNEL_TYPE_CHROMA))
+      {
+        xReconPLT(cu, COMPONENT_Cb, 2);
+      }
+    }
+    else
     {
-      piPicReco.at(uiX, uiY) = (piPicPcm.at(uiX, uiY) << uiPcmLeftShiftBit);
+      xReconPLT(cu, COMPONENT_Y, 3);
     }
+    return;
+  }
+
+  if (cu.colorTransform)
+  {
+    xIntraRecACTQT(cu);
   }
+  else
+  {
+  const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat );
 
-  tu.cs->picture->getRecoBuf( area ).copyFrom( piPicReco );
-  tu.cs->setDecomp( area );
+  for( uint32_t chType = CHANNEL_TYPE_LUMA; chType < numChType; chType++ )
+  {
+    if( cu.blocks[chType].valid() )
+    {
+      xIntraRecQT( cu, ChannelType( chType ) );
+    }
+  }
+  }
 }
 
-/** Function for reconstructing a PCM mode CU.
-* \param pcCU pointer to current CU
-* \param uiDepth CU Depth
-* \returns void
-*/
-void DecCu::xReconPCM(TransformUnit &tu)
+void DecCu::xReconPLT(CodingUnit &cu, ComponentID compBegin, uint32_t numComp)
 {
-  const CodingStructure *cs = tu.cs;
-  const ChannelType chType = tu.chType;
+  const SPS&       sps = *(cu.cs->sps);
+  TransformUnit&   tu = *cu.firstTU;
+  PelBuf    curPLTIdx = tu.getcurPLTIdx(compBegin);
 
-  ComponentID compStr = (CS::isDualITree(*cs) && !isLuma(chType)) ? COMPONENT_Cb: COMPONENT_Y;
-  ComponentID compEnd = (CS::isDualITree(*cs) && isLuma(chType)) ? COMPONENT_Y : COMPONENT_Cr;
-  for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) )
+  uint32_t height = cu.block(compBegin).height;
+  uint32_t width = cu.block(compBegin).width;
+
+  //recon. pixels
+  uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, sps.getChromaFormatIdc());
+  uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, sps.getChromaFormatIdc());
+  for (uint32_t y = 0; y < height; y++)
   {
+    for (uint32_t x = 0; x < width; x++)
+    {
+      for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
+      {
+        const int  channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)compID));
+        const CompArea &area = cu.blocks[compID];
+
+        PelBuf       picReco   = cu.cs->getRecoBuf(area);
+        PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)compID);
+        if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
+        {
+          Pel value;
+          QpParam cQP(tu, (ComponentID)compID);
+          int qp = cQP.Qp(true);
+          int qpRem = qp % 6;
+          int qpPer = qp / 6;
+          if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
+          {
+            int invquantiserRightShift = IQUANT_SHIFT;
+            int add = 1 << (invquantiserRightShift - 1);
+            value = ((((escapeValue.at(x, y)*g_invQuantScales[0][qpRem]) << qpPer) + add) >> invquantiserRightShift);
+            value = Pel(ClipBD<int>(value, channelBitDepth));
+            picReco.at(x, y) = value;
+          }
+          else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
+          {
+            uint32_t posYC = y >> scaleY;
+            uint32_t posXC = x >> scaleX;
+            int invquantiserRightShift = IQUANT_SHIFT;
+            int add = 1 << (invquantiserRightShift - 1);
+            value = ((((escapeValue.at(posXC, posYC)*g_invQuantScales[0][qpRem]) << qpPer) + add) >> invquantiserRightShift);
+            value = Pel(ClipBD<int>(value, channelBitDepth));
+            picReco.at(posXC, posYC) = value;
 
-    xDecodePCMTexture(tu, compID);
+          }
+        }
+        else
+        {
+          uint32_t curIdx = curPLTIdx.at(x, y);
+          if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
+          {
+            picReco.at(x, y) = cu.curPLT[compID][curIdx];
+          }
+          else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
+          {
+            uint32_t posYC = y >> scaleY;
+            uint32_t posXC = x >> scaleX;
+            picReco.at(posXC, posYC) = cu.curPLT[compID][curIdx];
+          }
+        }
+      }
+    }
+  }
+  for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
+  {
+    const CompArea &area = cu.blocks[compID];
+    PelBuf picReco = cu.cs->getRecoBuf(area);
+    cu.cs->picture->getRecoBuf(area).copyFrom(picReco);
+    cu.cs->setDecomp(area);
   }
 }
 
@@ -387,6 +604,14 @@ DecCu::xIntraRecQT(CodingUnit &cu, const ChannelType chType)
   }
 }
 
+void DecCu::xIntraRecACTQT(CodingUnit &cu)
+{
+  for (auto &currTU : CU::traverseTUs(cu))
+  {
+    xIntraRecACTBlk(currTU);
+  }
+}
+
 /** Function for filling the PCM buffer of a CU using its reconstructed sample array
 * \param pCU   pointer to current CU
 * \param depth CU Depth
@@ -424,7 +649,7 @@ void DecCu::xReconInter(CodingUnit &cu)
   m_pcIntraPred->geneIntrainterPred(cu);
 
   // inter prediction
-  CHECK(CU::isIBC(cu) && cu.firstPU->mhIntraFlag, "IBC and MHIntra cannot be used together");
+  CHECK(CU::isIBC(cu) && cu.firstPU->ciipFlag, "IBC and Ciip cannot be used together");
   CHECK(CU::isIBC(cu) && cu.affine, "IBC and Affine cannot be used together");
   CHECK(CU::isIBC(cu) && cu.triangle, "IBC and triangle cannot be used together");
   CHECK(CU::isIBC(cu) && cu.firstPU->mmvdMergeFlag, "IBC and MMVD cannot be used together");
@@ -441,24 +666,22 @@ void DecCu::xReconInter(CodingUnit &cu)
   }
   if (cu.Y().valid())
   {
-    const PredictionUnit &pu = *cu.firstPU;
-    if (!cu.affine && !cu.triangle)
-    {
-      MotionInfo mi = pu.getMotionInfo();
-      mi.GBiIdx = (mi.interDir == 3) ? cu.GBiIdx : GBI_DEFAULT;
-      cu.cs->addMiToLut(CU::isIBC(cu) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, mi );
-    }
+    bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16);
+    CU::saveMotionInHMVP( cu, isIbcSmallBlk );
   }
 
-  if (cu.firstPU->mhIntraFlag)
+  if (cu.firstPU->ciipFlag)
   {
-    if (cu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+    if (cu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
     {
       cu.cs->getPredBuf(*cu.firstPU).Y().rspSignal(m_pcReshape->getFwdLUT());
     }
     m_pcIntraPred->geneWeightedPred(COMPONENT_Y, cu.cs->getPredBuf(*cu.firstPU).Y(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Y, 0));
-    m_pcIntraPred->geneWeightedPred(COMPONENT_Cb, cu.cs->getPredBuf(*cu.firstPU).Cb(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cb, 0));
-    m_pcIntraPred->geneWeightedPred(COMPONENT_Cr, cu.cs->getPredBuf(*cu.firstPU).Cr(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cr, 0));
+    if (cu.chromaSize().width > 2)
+    {
+      m_pcIntraPred->geneWeightedPred(COMPONENT_Cb, cu.cs->getPredBuf(*cu.firstPU).Cb(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cb, 0));
+      m_pcIntraPred->geneWeightedPred(COMPONENT_Cr, cu.cs->getPredBuf(*cu.firstPU).Cr(), *cu.firstPU, m_pcIntraPred->getPredictorPtr2(COMPONENT_Cr, 0));
+    }
   }
 
   DTRACE    ( g_trace_ctx, D_TMP, "pred " );
@@ -472,12 +695,16 @@ void DecCu::xReconInter(CodingUnit &cu)
 
   if (cu.rootCbf)
   {
+    if (cu.colorTransform)
+    {
+      cs.getResiBuf(cu).colorSpaceConvert(cs.getResiBuf(cu), false);
+    }
 #if REUSE_CU_RESULTS
     const CompArea &area = cu.blocks[COMPONENT_Y];
     CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
     PelBuf tmpPred;
 #endif
-    if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+    if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
     {
 #if REUSE_CU_RESULTS
       if (cs.pcv->isEncoder)
@@ -486,7 +713,7 @@ void DecCu::xReconInter(CodingUnit &cu)
         tmpPred.copyFrom(cs.getPredBuf(cu).get(COMPONENT_Y));
       }
 #endif
-      if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
+      if (!cu.firstPU->ciipFlag && !CU::isIBC(cu))
         cs.getPredBuf(cu).get(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT());
     }
 #if KEEP_PRED_AND_RESI_SIGNALS
@@ -495,7 +722,7 @@ void DecCu::xReconInter(CodingUnit &cu)
     cs.getResiBuf( cu ).reconstruct( cs.getPredBuf( cu ), cs.getResiBuf( cu ), cs.slice->clpRngs() );
     cs.getRecoBuf( cu ).copyFrom   (                      cs.getResiBuf( cu ) );
 #endif
-    if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+    if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
     {
 #if REUSE_CU_RESULTS
       if (cs.pcv->isEncoder)
@@ -508,7 +735,7 @@ void DecCu::xReconInter(CodingUnit &cu)
   else
   {
     cs.getRecoBuf(cu).copyClip(cs.getPredBuf(cu), cs.slice->clpRngs());
-    if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
+    if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && !cu.firstPU->ciipFlag && !CU::isIBC(cu))
     {
       cs.getRecoBuf(cu).get(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT());
     }
@@ -531,8 +758,44 @@ void DecCu::xDecodeInterTU( TransformUnit & currTU, const ComponentID compID )
   //===== inverse transform =====
   PelBuf resiBuf  = cs.getResiBuf(area);
 
-  const QpParam cQP(currTU, compID);
+  QpParam cQP(currTU, compID);
+  if (currTU.cu->colorTransform)
+  {
+    for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+    {
+      cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+      cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6;
+      cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6;
+    }
+  }
 
+  if( currTU.jointCbCr && isChroma(compID) )
+  {
+    if( compID == COMPONENT_Cb )
+    {
+      PelBuf resiCr = cs.getResiBuf( currTU.blocks[ COMPONENT_Cr ] );
+      if( currTU.jointCbCr >> 1 )
+      {
+        m_pcTrQuant->invTransformNxN( currTU, COMPONENT_Cb, resiBuf, cQP );
+      }
+      else
+      {
+        QpParam qpCr(currTU, COMPONENT_Cr);
+        if (currTU.cu->colorTransform)
+        {
+          for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+          {
+            qpCr.Qps[qpIdx] = qpCr.Qps[qpIdx] + DELTA_QP_FOR_Co;
+            qpCr.pers[qpIdx] = qpCr.Qps[qpIdx] / 6;
+            qpCr.rems[qpIdx] = qpCr.Qps[qpIdx] % 6;
+          }
+        }
+        m_pcTrQuant->invTransformNxN( currTU, COMPONENT_Cr, resiCr, qpCr );
+      }
+      m_pcTrQuant->invTransformICT( currTU, resiBuf, resiCr );
+    }
+  }
+  else
   if( TU::getCbf( currTU, compID ) )
   {
     m_pcTrQuant->invTransformNxN( currTU, compID, resiBuf, cQP );
@@ -544,7 +807,8 @@ void DecCu::xDecodeInterTU( TransformUnit & currTU, const ComponentID compID )
 
   //===== reconstruction =====
   const Slice           &slice = *cs.slice;
-  if ( slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && TU::getCbf(currTU, compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && currTU.blocks[compID].width*currTU.blocks[compID].height > 4 )
+  if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && (TU::getCbf(currTU, compID) || currTU.jointCbCr)
+   && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && currTU.blocks[compID].width * currTU.blocks[compID].height > 4)
   {
     resiBuf.scaleSignal(currTU.getChromaAdj(), 0, currTU.cu->cs->slice->clpRng(compID));
   }
@@ -571,17 +835,10 @@ void DecCu::xDecodeInterTexture(CodingUnit &cu)
     {
       CodingStructure  &cs = *cu.cs;
       const Slice &slice = *cs.slice;
-      if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && slice.getReshapeInfo().getSliceReshapeChromaAdj() && (compID == COMPONENT_Y))
+      if (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (compID == COMPONENT_Y) && (currTU.cbf[COMPONENT_Cb] || currTU.cbf[COMPONENT_Cr]))
       {
         const CompArea &areaY = currTU.blocks[COMPONENT_Y];
-        PelBuf predY = cs.getPredBuf(areaY);
-        CompArea tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
-        PelBuf tmpPred = m_tmpStorageLCU->getBuf(tmpArea);
-        tmpPred.copyFrom(predY);
-      if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
-          tmpPred.rspSignal(m_pcReshape->getFwdLUT());
-        const Pel avgLuma = tmpPred.computeAvg();
-        int adj = m_pcReshape->calculateChromaAdj(avgLuma);
+        int adj = m_pcReshape->calculateChromaAdjVpduNei(currTU, areaY);
         currTU.setChromaAdj(adj);
     }
       xDecodeInterTU( currTU, compID );
@@ -607,7 +864,7 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
     {
       if (pu.mmvdMergeFlag || pu.cu->mmvdSkip)
       {
-        CHECK(pu.mhIntraFlag == true, "invalid MHIntra");
+        CHECK(pu.ciipFlag == true, "invalid Ciip");
         if (pu.cs->sps->getSBTMVPEnabledFlag())
         {
           Size bufSize = g_miScaling.scale(pu.lumaSize());
@@ -615,8 +872,6 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
         }
 
         int   fPosBaseIdx = pu.mmvdMergeIdx / MMVD_MAX_REFINE_NUM;
-          pu.shareParentPos = cu.shareParentPos;
-          pu.shareParentSize = cu.shareParentSize;
         PU::getInterMergeCandidates(pu, mrgCtx, 1, fPosBaseIdx + 1);
         PU::getInterMMVDMergeCandidates(pu, mrgCtx,
           pu.mmvdMergeIdx
@@ -646,7 +901,7 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
           PU::getAffineMergeCand( pu, affineMergeCtx, pu.mergeIdx );
           pu.interDir = affineMergeCtx.interDirNeighbours[pu.mergeIdx];
           pu.cu->affineType = affineMergeCtx.affineType[pu.mergeIdx];
-          pu.cu->GBiIdx = affineMergeCtx.GBiIdx[pu.mergeIdx];
+          pu.cu->BcwIdx = affineMergeCtx.BcwIdx[pu.mergeIdx];
           pu.mergeType = affineMergeCtx.mergeType[pu.mergeIdx];
           if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
           {
@@ -671,8 +926,6 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
         }
         else
         {
-          pu.shareParentPos = cu.shareParentPos;
-          pu.shareParentSize = cu.shareParentSize;
           if (CU::isIBC(*pu.cu))
             PU::getIBCMergeCandidates(pu, mrgCtx, pu.mergeIdx);
           else
@@ -713,31 +966,27 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
 
               //    Mv mv[3];
               CHECK( pu.refIdx[eRefList] < 0, "Unexpected negative refIdx." );
-              const int imvShift = ( !cu.cs->pcv->isEncoder && pu.cu->imv == 2 ) ? MV_FRACTIONAL_BITS_DIFF : 0;
-              pu.mvdAffi[eRefList][0] <<= imvShift;
-              pu.mvdAffi[eRefList][1] <<= imvShift;
+              if (!cu.cs->pcv->isEncoder)
+              {
+                pu.mvdAffi[eRefList][0].changeAffinePrecAmvr2Internal(pu.cu->imv);
+                pu.mvdAffi[eRefList][1].changeAffinePrecAmvr2Internal(pu.cu->imv);
+                if (cu.affineType == AFFINEMODEL_6PARAM)
+                {
+                  pu.mvdAffi[eRefList][2].changeAffinePrecAmvr2Internal(pu.cu->imv);
+                }
+              }
 
               Mv mvLT = affineAMVPInfo.mvCandLT[mvp_idx] + pu.mvdAffi[eRefList][0];
               Mv mvRT = affineAMVPInfo.mvCandRT[mvp_idx] + pu.mvdAffi[eRefList][1];
               mvRT += pu.mvdAffi[eRefList][0];
-              if ( pu.cu->imv != 1 )
-              {
-                mvLT.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
-                mvRT.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
-              }
 
               Mv mvLB;
               if ( cu.affineType == AFFINEMODEL_6PARAM )
               {
-                pu.mvdAffi[eRefList][2] <<= imvShift;
                 mvLB = affineAMVPInfo.mvCandLB[mvp_idx] + pu.mvdAffi[eRefList][2];
                 mvLB += pu.mvdAffi[eRefList][0];
-                if ( pu.cu->imv != 1 )
-                {
-                  mvLB.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
-                }
               }
-              PU::setAllAffineMv( pu, mvLT, mvRT, mvLB, eRefList );
+              PU::setAllAffineMv(pu, mvLT, mvRT, mvLB, eRefList, true);
             }
           }
         }
@@ -750,9 +999,15 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
 #if REUSE_CU_RESULTS
           if (!cu.cs->pcv->isEncoder)
 #endif
-            mvd <<= 2;
+          {
+            mvd.changeIbcPrecAmvr2Internal(pu.cu->imv);
+          }
+          if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 )
+          {
+            CHECK( pu.mvpIdx[REF_PIC_LIST_0], "mvpIdx for IBC mode should be 0" );
+          }
           pu.mv[REF_PIC_LIST_0] = amvpInfo.mvCand[pu.mvpIdx[REF_PIC_LIST_0]] + mvd;
-          pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+          pu.mv[REF_PIC_LIST_0].mvCliptoStorageBitDepth();
         }
         else
         {
@@ -764,8 +1019,12 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
               AMVPInfo amvpInfo;
               PU::fillMvpCand(pu, eRefList, pu.refIdx[eRefList], amvpInfo);
               pu.mvpNum [eRefList] = amvpInfo.numCand;
+              if (!cu.cs->pcv->isEncoder)
+              {
+                pu.mvd[eRefList].changeTransPrecAmvr2Internal(pu.cu->imv);
+              }
               pu.mv[eRefList] = amvpInfo.mvCand[pu.mvpIdx[eRefList]] + pu.mvd[eRefList];
-              pu.mv[eRefList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+              pu.mv[eRefList].mvCliptoStorageBitDepth();
             }
           }
         }
@@ -779,6 +1038,17 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
         printf( "DECODER: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
       }
     }
+    if (CU::isIBC(cu))
+    {
+      const int cuPelX = pu.Y().x;
+      const int cuPelY = pu.Y().y;
+      int roiWidth = pu.lwidth();
+      int roiHeight = pu.lheight();
+      const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
+      int xPred = pu.mv[0].getHor() >> MV_FRACTIONAL_BITS_INTERNAL;
+      int yPred = pu.mv[0].getVer() >> MV_FRACTIONAL_BITS_INTERNAL;
+      CHECK(!m_pcInterPred->isLumaBvValid(lcuWidth, cuPelX, cuPelY, roiWidth, roiHeight, xPred, yPred), "invalid block vector for IBC detected.");
+    }
   }
 }
 //! \}
diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h
index 8d70b5f06275284f8d0ba6b91b8726e262c17fbf..2cbb597a9b9a48a09675b02746ef44840e9384df 100644
--- a/source/Lib/DecoderLib/DecCu.h
+++ b/source/Lib/DecoderLib/DecCu.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -73,13 +73,10 @@ public:
   void initDecCuReshaper  ( Reshape* pcReshape, ChromaFormat chromaFormatIDC) ;
   void destoryDecCuReshaprBuf();
 
-  void setShareStateDec (int shareStateDecIn)  { m_shareStateDec = shareStateDecIn; }
-#if ENABLE_SPLIT_PARALLELISM
-  int  getShareStateDec () const { return m_shareStateDec; }
-#endif
   /// reconstruct Ctu information
 protected:
   void xIntraRecQT        ( CodingUnit&      cu, const ChannelType chType );
+  void xIntraRecACTQT(CodingUnit&      cu);
 
   void xReconInter        ( CodingUnit&      cu );
   void xDecodeInterTexture( CodingUnit&      cu );
@@ -87,18 +84,17 @@ protected:
   void xFillPCMBuffer     ( CodingUnit&      cu );
 
   void xIntraRecBlk       ( TransformUnit&   tu, const ComponentID compID );
-  void xReconPCM          ( TransformUnit&   tu);
-  void xDecodePCMTexture  ( TransformUnit&   tu, const ComponentID compID );
+  void xIntraRecACTBlk(TransformUnit&   tu);
   void xDecodeInterTU     ( TransformUnit&   tu, const ComponentID compID );
 
   void xDeriveCUMV        ( CodingUnit&      cu );
+  void xReconPLT          ( CodingUnit&      cu,       ComponentID compBegin, uint32_t numComp );
   PelStorage        *m_tmpStorageLCU;
 private:
   TrQuant*          m_pcTrQuant;
   IntraPrediction*  m_pcIntraPred;
   InterPrediction*  m_pcInterPred;
 
-  int               m_shareStateDec;
 
   MotionInfo        m_SubPuMiBuf[(MAX_CU_SIZE * MAX_CU_SIZE) >> (MIN_CU_LOG2 << 1)];
 
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index b3ecdee4e51e98498763894205abc054eb390e97..fdfe5456da4f4617b21e8a18fdbfdb3ca186066d 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -168,6 +168,7 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
                     pcEncPic->slices.back()->initSlice();
                     pcEncPic->slices.back()->setPPS( pcEncPic->slices[0]->getPPS() );
                     pcEncPic->slices.back()->setSPS( pcEncPic->slices[0]->getSPS() );
+                    pcEncPic->slices.back()->setVPS( pcEncPic->slices[0]->getVPS() );
                     pcEncPic->slices.back()->setPic( pcEncPic->slices[0]->getPic() );
                   }
                   pcEncPic->slices[i]->copySliceInfo( pic->slices[i], false );
@@ -201,16 +202,26 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
 
                 if( pic->cs->sps->getALFEnabledFlag() )
                 {
+                  std::copy(pic->getAlfCtbFilterIndexVec().begin(), pic->getAlfCtbFilterIndexVec().end(), pcEncPic->getAlfCtbFilterIndexVec().begin());
                   for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
                   {
                     std::copy( pic->getAlfCtuEnableFlag()[compIdx].begin(), pic->getAlfCtuEnableFlag()[compIdx].end(), pcEncPic->getAlfCtuEnableFlag()[compIdx].begin() );
                   }
+                  pcEncPic->resizeAlfCtbFilterIndex(pic->cs->pcv->sizeInCtus);
+                  memcpy( pcEncPic->getAlfCtbFilterIndex(), pic->getAlfCtbFilterIndex(), sizeof(short)*pic->cs->pcv->sizeInCtus );
+
+                  std::copy( pic->getAlfCtuAlternative(COMPONENT_Cb).begin(), pic->getAlfCtuAlternative(COMPONENT_Cb).end(), pcEncPic->getAlfCtuAlternative(COMPONENT_Cb).begin() );
+                  std::copy( pic->getAlfCtuAlternative(COMPONENT_Cr).begin(), pic->getAlfCtuAlternative(COMPONENT_Cr).end(), pcEncPic->getAlfCtuAlternative(COMPONENT_Cr).begin() );
 
                   for( int i = 0; i < pic->slices.size(); i++ )
                   {
-                    pcEncPic->slices[i]->setAPSId(pic->slices[i]->getAPSId());
-                    pcEncPic->slices[i]->setAPS( pic->slices[i]->getAPS());
-                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag( pic->slices[i]->getTileGroupAlfEnabledFlag());
+                    pcEncPic->slices[i]->setTileGroupNumAps(pic->slices[i]->getTileGroupNumAps());
+                    pcEncPic->slices[i]->setAlfAPSs(pic->slices[i]->getTileGroupApsIdLuma());
+                    pcEncPic->slices[i]->setAlfAPSs(pic->slices[i]->getAlfAPSs());
+                    pcEncPic->slices[i]->setTileGroupApsIdChroma(pic->slices[i]->getTileGroupApsIdChroma());
+                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Y,  pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Y));
+                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Cb));
+                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Cr));
                   }
                 }
 
@@ -375,6 +386,7 @@ DecLib::DecLib()
   , m_cInterPred()
   , m_cTrQuant()
   , m_cSliceDecoder()
+  , m_cTrQuantScalingList()
   , m_cCuDecoder()
   , m_HLSReader()
   , m_seiReader()
@@ -385,6 +397,7 @@ DecLib::DecLib()
   , m_cacheModel()
 #endif
   , m_pcPic(NULL)
+  , m_prevLayerID(MAX_INT)
   , m_prevPOC(MAX_INT)
   , m_prevTid0POC(0)
   , m_bFirstSliceInPicture(true)
@@ -394,7 +407,8 @@ DecLib::DecLib()
   , m_bFirstSliceInBitstream(true)
   , m_lastPOCNoOutputPriorPics(-1)
   , m_isNoOutputPriorPics(false)
-  , m_craNoRaslOutputFlag(false)
+  , m_lastNoIncorrectPicOutputFlag(false)
+  , m_sliceLmcsApsId(-1)
   , m_pDecodedSEIOutputStream(NULL)
   , m_decodedPictureHashSEIEnabled(false)
   , m_numberOfChecksumErrorsDetected(0)
@@ -402,6 +416,9 @@ DecLib::DecLib()
   , m_prefixSEINALUs()
   , m_debugPOC( -1 )
   , m_debugCTU( -1 )
+  , m_vps( nullptr )
+  , m_scalingListUpdateFlag(true)
+  , m_PreScalingListAPSId(-1)
 {
 #if ENABLE_SIMD_OPT_BUFFER
   g_pelBufOP.initPelBufOpsX86();
@@ -415,6 +432,7 @@ DecLib::~DecLib()
     delete m_prefixSEINALUs.front();
     m_prefixSEINALUs.pop_front();
   }
+
 }
 
 void DecLib::create()
@@ -470,7 +488,7 @@ void DecLib::deletePicBuffer ( )
   m_cReshaper.destroy();
 }
 
-Picture* DecLib::xGetNewPicBuffer ( const SPS &sps, const PPS &pps, const uint32_t temporalLayer )
+Picture* DecLib::xGetNewPicBuffer( const SPS &sps, const PPS &pps, const uint32_t temporalLayer, const int layerId )
 {
   Picture * pcPic = nullptr;
   m_iMaxRefPicNum = sps.getMaxDecPicBuffering(temporalLayer);     // m_uiMaxDecPicBuffering has the space for the picture currently being decoded
@@ -478,7 +496,7 @@ Picture* DecLib::xGetNewPicBuffer ( const SPS &sps, const PPS &pps, const uint32
   {
     pcPic = new Picture();
 
-    pcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true );
+    pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId );
 
     m_cListPic.push_back( pcPic );
 
@@ -514,14 +532,14 @@ Picture* DecLib::xGetNewPicBuffer ( const SPS &sps, const PPS &pps, const uint32
 
     m_cListPic.push_back( pcPic );
 
-    pcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true );
+    pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId );
   }
   else
   {
-    if( !pcPic->Y().Size::operator==( Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ) ) || pcPic->cs->pcv->maxCUWidth != sps.getMaxCUWidth() || pcPic->cs->pcv->maxCUHeight != sps.getMaxCUHeight() )
+    if( !pcPic->Y().Size::operator==( Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ) ) || pps.pcv->maxCUWidth != sps.getMaxCUWidth() || pps.pcv->maxCUHeight != sps.getMaxCUHeight() )
     {
       pcPic->destroy();
-      pcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true );
+      pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId );
     }
   }
 
@@ -540,9 +558,11 @@ void DecLib::executeLoopFilters()
     return; // nothing to deblock
   }
 
+  m_pcPic->cs->slice->startProcessingTimer();
+
   CodingStructure& cs = *m_pcPic->cs;
 
-  if (cs.sps->getUseReshaper() && m_cReshaper.getSliceReshaperInfo().getUseSliceReshaper())
+  if (cs.sps->getUseLmcs() && m_cReshaper.getSliceReshaperInfo().getUseSliceReshaper())
   {
       CHECK((m_cReshaper.getRecReshaped() == false), "Rec picture is not reshaped!");
       m_pcPic->getRecoBuf(COMPONENT_Y).rspSignal(m_cReshaper.getInvLUT());
@@ -559,24 +579,21 @@ void DecLib::executeLoopFilters()
 
   if( cs.sps->getALFEnabledFlag() )
   {
-    if (cs.slice->getTileGroupAlfEnabledFlag())
-    {
       // ALF decodes the differentially coded coefficients and stores them in the parameters structure.
       // Code could be restructured to do directly after parsing. So far we just pass a fresh non-const
       // copy in case the APS gets used more than once.
-
-      AlfSliceParam alfParamCopy = cs.aps->getAlfAPSParam();
-      m_cALF.ALFProcess(cs, alfParamCopy);
-    }
+      m_cALF.ALFProcess(cs);
 
   }
+
+  m_pcPic->cs->slice->stopProcessingTimer();
 }
 
 void DecLib::finishPictureLight(int& poc, PicList*& rpcListPic )
 {
   Slice*  pcSlice = m_pcPic->cs->slice;
 
-  m_pcPic->neededForOutput = (pcSlice->getPicOutputFlag() ? true : false);
+  m_pcPic->neededForOutput = (pcSlice->getPicHeader()->getPicOutputFlag() ? true : false);
   m_pcPic->reconstructed = true;
 
   Slice::sortPicList( m_cListPic ); // sorting for application output
@@ -600,8 +617,10 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl )
     c += 32;  // tolower
   }
 
+  if (pcSlice->isDRAP()) c = 'D';
+
   //-- For time output for each slice
-  msg( msgl, "POC %4d TId: %1d ( %c-SLICE, QP%3d ) ", pcSlice->getPOC(),
+  msg( msgl, "POC %4d LId: %2d TId: %1d ( %c-SLICE, QP%3d ) ", pcSlice->getPOC(), pcSlice->getPic()->layerId,
          pcSlice->getTLayer(),
          c,
          pcSlice->getSliceQp() );
@@ -612,7 +631,29 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl )
     msg( msgl, "[L%d ", iRefList);
     for (int iRefIndex = 0; iRefIndex < pcSlice->getNumRefIdx(RefPicList(iRefList)); iRefIndex++)
     {
-      msg( msgl, "%d ", pcSlice->getRefPOC(RefPicList(iRefList), iRefIndex));
+      const std::pair<int, int>& scaleRatio = pcSlice->getScalingRatio( RefPicList( iRefList ), iRefIndex );
+
+      if( pcSlice->getPicHeader()->getEnableTMVPFlag() && pcSlice->getColFromL0Flag() == bool(1 - iRefList) && pcSlice->getColRefIdx() == iRefIndex )
+      {
+        if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS )
+          msg( msgl, "%dc(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) );
+        else
+          msg( msgl, "%dc ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) );
+      }
+      else
+      {
+        if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS )
+          msg( msgl, "%d(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) );
+        else
+          msg( msgl, "%d ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) );
+      }
+
+      if( pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) == pcSlice->getPOC() )
+      {
+        msg( msgl, ".%d", pcSlice->getRefPic( RefPicList( iRefList ), iRefIndex )->layerId );
+      }   
+
+      msg( msgl, " " );
     }
     msg( msgl, "] ");
   }
@@ -629,7 +670,13 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl )
 
   msg( msgl, "\n");
 
-  m_pcPic->neededForOutput = (pcSlice->getPicOutputFlag() ? true : false);
+#if JVET_J0090_MEMORY_BANDWITH_MEASURE
+    m_cacheModel.reportFrame();
+    m_cacheModel.accumulateFrame();
+    m_cacheModel.clear();
+#endif
+
+  m_pcPic->neededForOutput = (pcSlice->getPicHeader()->getPicOutputFlag() ? true : false);
   m_pcPic->reconstructed = true;
 
 
@@ -641,6 +688,7 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl )
   m_pcPic->destroyTempBuffers();
   m_pcPic->cs->destroyCoeffs();
   m_pcPic->cs->releaseIntermediateData();
+  m_pcPic->cs->picHeader->initPicHeader();
 }
 
 void DecLib::checkNoOutputPriorPics (PicList* pcListPic)
@@ -676,10 +724,10 @@ void DecLib::xUpdateRasInit(Slice* slice)
   }
 }
 
-void DecLib::xCreateLostPicture(int iLostPoc)
+void DecLib::xCreateLostPicture( int iLostPoc, const int layerId )
 {
   msg( INFO, "\ninserting lost poc : %d\n",iLostPoc);
-  Picture *cFillPic = xGetNewPicBuffer(*(m_parameterSetManager.getFirstSPS()), *(m_parameterSetManager.getFirstPPS()), 0);
+  Picture *cFillPic = xGetNewPicBuffer( *( m_parameterSetManager.getFirstSPS() ), *( m_parameterSetManager.getFirstPPS() ), 0, layerId );
 
   CHECK( !cFillPic->slices.size(), "No slices in picture" );
 
@@ -720,42 +768,197 @@ void DecLib::xCreateLostPicture(int iLostPoc)
 
 }
 
+void DecLib::xCreateUnavailablePicture(int iUnavailablePoc, bool longTermFlag, const int layerId, const bool interLayerRefPicFlag)
+{
+  msg(INFO, "\ninserting unavailable poc : %d\n", iUnavailablePoc);
+  Picture* cFillPic = xGetNewPicBuffer( *( m_parameterSetManager.getFirstSPS() ), *( m_parameterSetManager.getFirstPPS() ), 0, layerId );
+
+  CHECK(!cFillPic->slices.size(), "No slices in picture");
+
+  cFillPic->slices[0]->initSlice();
+
+  uint32_t yFill = 1 << (m_parameterSetManager.getFirstSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 1);
+  uint32_t cFill = 1 << (m_parameterSetManager.getFirstSPS()->getBitDepth(CHANNEL_TYPE_CHROMA) - 1);
+  cFillPic->getRecoBuf().Y().fill(yFill);
+  cFillPic->getRecoBuf().Cb().fill(cFill);
+  cFillPic->getRecoBuf().Cr().fill(cFill);
+
+  //  for(int ctuRsAddr=0; ctuRsAddr<cFillPic->getNumberOfCtusInFrame(); ctuRsAddr++)  { cFillPic->getCtu(ctuRsAddr)->initCtu(cFillPic, ctuRsAddr); }
+  cFillPic->referenced = true;
+  cFillPic->interLayerRefPicFlag = interLayerRefPicFlag;
+  cFillPic->longTerm = longTermFlag;
+  cFillPic->slices[0]->setPOC(iUnavailablePoc);
+  xUpdatePreviousTid0POC(cFillPic->slices[0]);
+  cFillPic->reconstructed = true;
+  cFillPic->neededForOutput = false;
+  if (m_pocRandomAccess == MAX_INT)
+  {
+    m_pocRandomAccess = iUnavailablePoc;
+  }
+
+}
 
-void DecLib::xActivateParameterSets()
+/**
+ - Determine if the first VCL NAL unit of a picture is also the first VCL NAL of an Access Unit
+ */
+bool DecLib::isSliceNaluFirstInAU( bool newPicture, InputNALUnit &nalu )
 {
-  if (m_bFirstSliceInPicture)
+  // can only be the start of an AU if this is the start of a new picture
+  if( newPicture == false )
+  {
+    return false;
+  }
+
+  // should only be called for slice NALU types
+  if( nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_TRAIL &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_STSA &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_RASL &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_RADL &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_IDR_W_RADL &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_IDR_N_LP &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_CRA &&
+      nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_GDR )
+  {
+    return false;
+  }
+  
+  // check for valid picture header
+  if(m_picHeader.isValid() == false)
+  {
+    return false;
+  }
+  
+  // check for layer ID less than or equal to previous picture's layer ID
+  if( nalu.m_nuhLayerId <= m_prevLayerID )
+  {
+    return true;
+  }
+
+  // get slice POC
+  m_apcSlicePilot->setPicHeader( &m_picHeader );
+  m_apcSlicePilot->initSlice(); 
+  m_HLSReader.setBitstream( &nalu.getBitstream() );
+  m_HLSReader.parseSliceHeaderToPoc( m_apcSlicePilot, &m_picHeader, &m_parameterSetManager, m_prevTid0POC );
+
+  // check for different POC
+  return (m_apcSlicePilot->getPOC() != m_prevPOC);
+}
+
+void activateAPS(PicHeader* picHeader, Slice* pSlice, ParameterSetManager& parameterSetManager, APS** apss, APS* lmcsAPS, APS* scalingListAPS)
+{
+  //luma APSs
+  if (pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+  {
+    for (int i = 0; i < pSlice->getTileGroupApsIdLuma().size(); i++)
+    {
+      int apsId = pSlice->getTileGroupApsIdLuma()[i];
+      APS* aps = parameterSetManager.getAPS(apsId, ALF_APS);
+
+      if (aps)
+      {
+        apss[apsId] = aps;
+        if (false == parameterSetManager.activateAPS(apsId, ALF_APS))
+        {
+          THROW("APS activation failed!");
+        }
+
+        CHECK( aps->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" );
+        //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it.
+      }
+    }
+  }
+  if (pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb)||pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) )
+  {
+    //chroma APS
+    int apsId = pSlice->getTileGroupApsIdChroma();
+    APS* aps = parameterSetManager.getAPS(apsId, ALF_APS);
+    if (aps)
+    {
+      apss[apsId] = aps;
+      if (false == parameterSetManager.activateAPS(apsId, ALF_APS))
+      {
+        THROW("APS activation failed!");
+      }
+
+      CHECK( aps->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" );
+      //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it.
+    }
+  }
+
+  if (picHeader->getLmcsEnabledFlag() && lmcsAPS == nullptr)
+  {
+    lmcsAPS = parameterSetManager.getAPS(picHeader->getLmcsAPSId(), LMCS_APS);
+    CHECK(lmcsAPS == nullptr, "No LMCS APS present");
+    if (lmcsAPS)
+    {
+      parameterSetManager.clearAPSChangedFlag(picHeader->getLmcsAPSId(), LMCS_APS);
+      if (false == parameterSetManager.activateAPS(picHeader->getLmcsAPSId(), LMCS_APS))
+      {
+        THROW("LMCS APS activation failed!");
+      }
+
+      CHECK( lmcsAPS->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" );
+      //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it.
+    }
+  }
+  picHeader->setLmcsAPS(lmcsAPS);
+
+  if( picHeader->getScalingListPresentFlag() && scalingListAPS == nullptr)
   {
-    APS *aps = m_parameterSetManager.getAPS(m_apcSlicePilot->getAPSId()); // this is a temporary APS object. Do not store this value
-    if (m_apcSlicePilot->getAPSId() != -1)
+    scalingListAPS = parameterSetManager.getAPS( picHeader->getScalingListAPSId(), SCALING_LIST_APS );
+    CHECK( scalingListAPS == nullptr, "No SCALING LIST APS present" );
+    if( scalingListAPS )
     {
-      CHECK(aps == 0, "No APS present");
+      parameterSetManager.clearAPSChangedFlag( picHeader->getScalingListAPSId(), SCALING_LIST_APS );
+      if( false == parameterSetManager.activateAPS( picHeader->getScalingListAPSId(), SCALING_LIST_APS ) )
+      {
+        THROW( "SCALING LIST APS activation failed!" );
+      }
+
+      CHECK( scalingListAPS->getTemporalId() > pSlice->getTLayer(), "TemporalId shall be less than or equal to the TemporalId of the coded slice NAL unit" );
+      //ToDO: APS NAL unit containing the APS RBSP shall have nuh_layer_id either equal to the nuh_layer_id of a coded slice NAL unit that referrs it, or equal to the nuh_layer_id of a direct dependent layer of the layer containing a coded slice NAL unit that referrs it.
     }
-    const PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId()); // this is a temporary PPS object. Do not store this value
+  }
+  picHeader->setScalingListAPS(scalingListAPS);
+}
+
+void DecLib::xActivateParameterSets( const int layerId )
+{
+  if (m_bFirstSliceInPicture)
+  {
+    APS** apss = m_parameterSetManager.getAPSs();
+    memset(apss, 0, sizeof(*apss) * ALF_CTB_MAX_NUM_APS);
+    const PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId()); // this is a temporary PPS object. Do not store this value
     CHECK(pps == 0, "No PPS present");
 
     const SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId());             // this is a temporary SPS object. Do not store this value
     CHECK(sps == 0, "No SPS present");
 
+    const VPS *vps = sps->getVPSId() ? m_parameterSetManager.getVPS( sps->getVPSId() ) : nullptr;
+
     if (NULL == pps->pcv)
     {
-      m_parameterSetManager.getPPS( m_apcSlicePilot->getPPSId() )->pcv = new PreCalcValues( *sps, *pps, false );
+      m_parameterSetManager.getPPS( m_picHeader.getPPSId() )->pcv = new PreCalcValues( *sps, *pps, false );
     }
     m_parameterSetManager.clearSPSChangedFlag(sps->getSPSId());
     m_parameterSetManager.clearPPSChangedFlag(pps->getPPSId());
 
-    if (false == m_parameterSetManager.activatePPS(m_apcSlicePilot->getPPSId(),m_apcSlicePilot->isIRAP()))
+    if (false == m_parameterSetManager.activatePPS(m_picHeader.getPPSId(),m_apcSlicePilot->isIRAP()))
     {
       THROW("Parameter set activation failed!");
     }
-
-    if (aps)
+    m_parameterSetManager.getApsMap()->clear();
+    for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
     {
-      m_parameterSetManager.clearAPSChangedFlag(aps->getAPSId());
-      if (false == m_parameterSetManager.activateAPS(m_apcSlicePilot->getAPSId()))
+      APS* aps = m_parameterSetManager.getAPS(i, ALF_APS);
+      if (aps)
       {
-        THROW("APS activation failed!");
+        m_parameterSetManager.clearAPSChangedFlag(i, ALF_APS);
       }
     }
+    APS* lmcsAPS = nullptr;
+    APS* scalinglistAPS = nullptr;
+    activateAPS(&m_picHeader, m_apcSlicePilot, m_parameterSetManager, apss, lmcsAPS, scalinglistAPS);
 
     xParsePrefixSEImessages();
 
@@ -767,14 +970,12 @@ void DecLib::xActivateParameterSets()
 #endif
 
     //  Get a new picture buffer. This will also set up m_pcPic, and therefore give us a SPS and PPS pointer that we can use.
-    m_pcPic = xGetNewPicBuffer (*sps, *pps, m_apcSlicePilot->getTLayer());
-
-    m_apcSlicePilot->applyReferencePictureSet(m_cListPic, m_apcSlicePilot->getRPS());
-
-    m_pcPic->finalInit(*sps, *pps, *aps);
+    m_pcPic = xGetNewPicBuffer( *sps, *pps, m_apcSlicePilot->getTLayer(), layerId );
 
+    m_apcSlicePilot->applyReferencePictureListBasedMarking( m_cListPic, m_apcSlicePilot->getRPL0(), m_apcSlicePilot->getRPL1(), layerId );
+    m_pcPic->finalInit( vps, *sps, *pps, &m_picHeader, apss, lmcsAPS, scalinglistAPS );
     m_pcPic->createTempBuffers( m_pcPic->cs->pps->pcv->maxCUWidth );
-    m_pcPic->cs->createCoeffs();
+    m_pcPic->cs->createCoeffs((bool)m_pcPic->cs->sps->getPLTMode());
 
     m_pcPic->allocateNewSlice();
     // make the slice-pilot a real slice, and set up the slice-pilot for the next slice
@@ -785,7 +986,6 @@ void DecLib::xActivateParameterSets()
     Slice *pSlice = m_pcPic->slices[m_uiSliceSegmentIdx];
 
     // Update the PPS and SPS pointers with the ones of the picture.
-    aps= pSlice->getAPS();
     pps=pSlice->getPPS();
     sps=pSlice->getSPS();
 
@@ -793,18 +993,20 @@ void DecLib::xActivateParameterSets()
     m_pcPic->cs->slice = pSlice;
     m_pcPic->cs->sps   = sps;
     m_pcPic->cs->pps   = pps;
-    m_pcPic->cs->aps   = aps;
-#if HEVC_VPS
-    m_pcPic->cs->vps   = pSlice->getVPS();
-#endif
+    m_pcPic->cs->vps = vps;
+
+    memcpy(m_pcPic->cs->alfApss, apss, sizeof(m_pcPic->cs->alfApss));
+    m_pcPic->cs->lmcsAps = lmcsAPS;
+    m_pcPic->cs->scalinglistAps = scalinglistAPS;
+
     m_pcPic->cs->pcv   = pps->pcv;
 
     // Initialise the various objects for the new set of settings
-    m_cSAO.create( sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), pps->getPpsRangeExtension().getLog2SaoOffsetScale(CHANNEL_TYPE_LUMA), pps->getPpsRangeExtension().getLog2SaoOffsetScale(CHANNEL_TYPE_CHROMA) );
+    m_cSAO.create( pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), pps->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_LUMA ), pps->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_CHROMA ) );
     m_cLoopFilter.create( sps->getMaxCodingDepth() );
     m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth( CHANNEL_TYPE_LUMA ) );
-    m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc() );
-    if (sps->getUseReshaper())
+    m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() );
+    if (sps->getUseLmcs())
     {
       m_cReshaper.createDec(sps->getBitDepth(CHANNEL_TYPE_LUMA));
     }
@@ -814,13 +1016,13 @@ void DecLib::xActivateParameterSets()
 
     if(!m_SEIs.empty())
     {
-      // Check if any new Picture Timing SEI has arrived
-      SEIMessages pictureTimingSEIs = getSeisByType(m_SEIs, SEI::PICTURE_TIMING);
-      if (pictureTimingSEIs.size()>0)
+      // Check if any new Frame Field Info SEI has arrived
+      SEIMessages frameFieldSEIs = getSeisByType(m_SEIs, SEI::FRAME_FIELD_INFO);
+      if (frameFieldSEIs.size()>0)
       {
-        SEIPictureTiming* pictureTiming = (SEIPictureTiming*) *(pictureTimingSEIs.begin());
-        isField    = (pictureTiming->m_picStruct == 1) || (pictureTiming->m_picStruct == 2) || (pictureTiming->m_picStruct == 9) || (pictureTiming->m_picStruct == 10) || (pictureTiming->m_picStruct == 11) || (pictureTiming->m_picStruct == 12);
-        isTopField = (pictureTiming->m_picStruct == 1) || (pictureTiming->m_picStruct == 9) || (pictureTiming->m_picStruct == 11);
+        SEIFrameFieldInfo* ff = (SEIFrameFieldInfo*) *(frameFieldSEIs.begin());
+        isField    = ff->m_fieldPicFlag;
+        isTopField = isField && (!ff->m_bottomFieldFlag);
       }
     }
 
@@ -834,15 +1036,11 @@ void DecLib::xActivateParameterSets()
 
     // Recursive structure
     m_cCuDecoder.init( &m_cTrQuant, &m_cIntraPred, &m_cInterPred );
-    if (sps->getUseReshaper())
+    if (sps->getUseLmcs())
     {
       m_cCuDecoder.initDecCuReshaper(&m_cReshaper, sps->getChromaFormatIdc());
     }
-#if MAX_TB_SIZE_SIGNALLING
-    m_cTrQuant.init( nullptr, sps->getMaxTbSize(), false, false, false, false, false );
-#else
-    m_cTrQuant.init( nullptr, MAX_TB_SIZEY, false, false, false, false, false );
-#endif
+    m_cTrQuant.init(m_cTrQuantScalingList.getQuant(), sps->getMaxTbSize(), false, false, false, false);
 
     // RdCost
     m_cRdCost.setCostMode ( COST_STANDARD_LOSSY ); // not used in decoder side RdCost stuff -> set to default
@@ -851,7 +1049,7 @@ void DecLib::xActivateParameterSets()
 
     if( sps->getALFEnabledFlag() )
     {
-      m_cALF.create( sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), sps->getBitDepths().recon );
+      m_cALF.create( pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getMaxCUWidth(), sps->getMaxCUHeight(), sps->getMaxCodingDepth(), sps->getBitDepths().recon );
     }
   }
   else
@@ -865,15 +1063,18 @@ void DecLib::xActivateParameterSets()
 
     const SPS *sps = pSlice->getSPS();
     const PPS *pps = pSlice->getPPS();
-    APS *aps = pSlice->getAPS();
+    APS** apss = pSlice->getAlfAPSs();
+    APS *lmcsAPS = m_picHeader.getLmcsAPS();
+    APS *scalinglistAPS = m_picHeader.getScalingListAPS();
+
     // fix Parameter Sets, now that we have the real slice
     m_pcPic->cs->slice = pSlice;
     m_pcPic->cs->sps   = sps;
     m_pcPic->cs->pps   = pps;
-    m_pcPic->cs->aps   = aps;
-#if HEVC_VPS
-    m_pcPic->cs->vps   = pSlice->getVPS();
-#endif
+    memcpy(m_pcPic->cs->alfApss, apss, sizeof(m_pcPic->cs->alfApss));
+    m_pcPic->cs->lmcsAps = lmcsAPS;
+    m_pcPic->cs->scalinglistAps = scalinglistAPS;
+
     m_pcPic->cs->pcv   = pps->pcv;
 
     // check that the current active PPS has not changed...
@@ -885,11 +1086,29 @@ void DecLib::xActivateParameterSets()
     {
       EXIT("Error - a new PPS has been decoded while processing a picture");
     }
-    if (aps && m_parameterSetManager.getAPSChangedFlag(aps->getAPSId()))
+    for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
+    {
+      APS* aps = m_parameterSetManager.getAPS(i, ALF_APS);
+      if (aps && m_parameterSetManager.getAPSChangedFlag(i, ALF_APS))
+      {
+        EXIT("Error - a new APS has been decoded while processing a picture");
+      }
+    }
+
+    if (lmcsAPS && m_parameterSetManager.getAPSChangedFlag(lmcsAPS->getAPSId(), LMCS_APS) )
+    {
+      EXIT("Error - a new LMCS APS has been decoded while processing a picture");
+    }
+    if( scalinglistAPS && m_parameterSetManager.getAPSChangedFlag( scalinglistAPS->getAPSId(), SCALING_LIST_APS ) )
     {
-      EXIT("Error - a new APS has been decoded while processing a picture");
+      EXIT( "Error - a new SCALING LIST APS has been decoded while processing a picture" );
     }
 
+    activateAPS(&m_picHeader, pSlice, m_parameterSetManager, apss, lmcsAPS, scalinglistAPS);
+
+    m_pcPic->cs->lmcsAps = lmcsAPS;
+    m_pcPic->cs->scalinglistAps = scalinglistAPS;
+
     xParsePrefixSEImessages();
 
     // Check if any new SEI has arrived
@@ -902,6 +1121,36 @@ void DecLib::xActivateParameterSets()
        deleteSEIs(m_SEIs);
      }
   }
+
+  // Conformance checks
+  Slice *pSlice = m_pcPic->slices[m_uiSliceSegmentIdx];
+  const SPS *sps = pSlice->getSPS();
+  const PPS *pps = pSlice->getPPS();
+
+  if( !sps->getUseWP() )
+  {
+    CHECK( pps->getUseWP(), "When sps_weighted_pred_flag is equal to 0, the value of pps_weighted_pred_flag shall be equal to 0." );
+  }
+
+  if( !sps->getUseWPBiPred() )
+  {
+    CHECK( pps->getWPBiPred(), "When sps_weighted_bipred_flag is equal to 0, the value of pps_weighted_bipred_flag shall be equal to 0." );
+  }
+
+  CHECK( ( pps->getPicWidthInLumaSamples() % ( std::max( 8, int( sps->getMaxCUWidth() >> ( sps->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame width must be a multiple of Max(8, the minimum unit size)" );
+  CHECK( ( pps->getPicHeightInLumaSamples() % ( std::max( 8, int( sps->getMaxCUHeight() >> ( sps->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame height must be a multiple of Max(8, the minimum unit size)" );
+  if( !sps->getRprEnabledFlag() ) // subpics_present_flag is equal to 1 condition shall be added
+  {
+    CHECK( pps->getPicWidthInLumaSamples() != sps->getMaxPicWidthInLumaSamples(), "When subpics_present_flag is equal to 1 or ref_pic_resampling_enabled_flag equal to 0, the value of pic_width_in_luma_samples shall be equal to pic_width_max_in_luma_samples." );
+    CHECK( pps->getPicHeightInLumaSamples() != sps->getMaxPicHeightInLumaSamples(), "When subpics_present_flag is equal to 1 or ref_pic_resampling_enabled_flag equal to 0, the value of pic_height_in_luma_samples shall be equal to pic_height_max_in_luma_samples." );
+  }
+
+  CHECK( !sps->getRprEnabledFlag() && pps->getScalingWindow().getWindowEnabledFlag(), "When ref_pic_resampling_enabled_flag is equal to 0, the value of scaling_window_flag shall be equal to 0." );
+
+  if( sps->getCTUSize() + 2 * ( 1 << sps->getLog2MinCodingBlockSize() ) > pps->getPicWidthInLumaSamples() )
+  {
+    CHECK( sps->getWrapAroundEnabledFlag(), "Wraparound shall be disabled when the value of ( CtbSizeY / MinCbSizeY + 1) is less than or equal to ( pic_width_in_luma_samples / MinCbSizeY - 1 )" );
+  }
 }
 
 
@@ -922,62 +1171,72 @@ void DecLib::xParsePrefixSEImessages()
   while (!m_prefixSEINALUs.empty())
   {
     InputNALUnit &nalu=*m_prefixSEINALUs.front();
-    m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_SEIs, nalu.m_nalUnitType, m_parameterSetManager.getActiveSPS(), m_pDecodedSEIOutputStream );
+    m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_SEIs, nalu.m_nalUnitType, nalu.m_temporalId, m_parameterSetManager.getActiveSPS(), m_HRD, m_pDecodedSEIOutputStream );
     delete m_prefixSEINALUs.front();
     m_prefixSEINALUs.pop_front();
   }
 }
 
+void DecLib::xDecodePicHeader( InputNALUnit& nalu )
+{
+  m_HLSReader.setBitstream( &nalu.getBitstream() );
+  m_HLSReader.parsePictureHeader( &m_picHeader, &m_parameterSetManager);
+  m_picHeader.setValid();
+}
 
 bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDisplay )
 {
+  if(m_picHeader.isValid() == false) {
+    return false;
+  }
+  m_apcSlicePilot->setPicHeader( &m_picHeader );
   m_apcSlicePilot->initSlice(); // the slice pilot is an object to prepare for a new slice
                                 // it is not associated with picture, sps or pps structures.
 
+  Picture* scaledRefPic[MAX_NUM_REF] = {};
+
   if (m_bFirstSliceInPicture)
   {
     m_uiSliceSegmentIdx = 0;
   }
   else
   {
+      CHECK(nalu.m_nalUnitType != m_pcPic->slices[m_uiSliceSegmentIdx - 1]->getNalUnitType(), "The value of NAL unit type shall be the same for all coded slice NAL units of a picture");
     m_apcSlicePilot->copySliceInfo( m_pcPic->slices[m_uiSliceSegmentIdx-1] );
   }
-#if HEVC_DEPENDENT_SLICES
-  m_apcSlicePilot->setSliceSegmentIdx(m_uiSliceSegmentIdx);
-#endif
 
   m_apcSlicePilot->setNalUnitType(nalu.m_nalUnitType);
-#if !JVET_M0101_HLS
-  bool nonReferenceFlag = (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_TRAIL_N ||
-                           m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_TSA_N   ||
-                           m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_STSA_N  ||
-                           m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_N  ||
-                           m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N);
-  m_apcSlicePilot->setTemporalLayerNonReferenceFlag(nonReferenceFlag);
-#endif
   m_apcSlicePilot->setTLayer(nalu.m_temporalId);
 
+  for( auto& naluTemporalId : m_accessUnitNals )
+  {
+    if( naluTemporalId.first != NAL_UNIT_DPS
+      && naluTemporalId.first != NAL_UNIT_VPS
+      && naluTemporalId.first != NAL_UNIT_SPS
+      && naluTemporalId.first != NAL_UNIT_EOS
+      && naluTemporalId.first != NAL_UNIT_EOB )
+    {
+      CHECK( naluTemporalId.second < nalu.m_temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" );
+    }
+  }
+
+  if (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR)
+    CHECK(nalu.m_temporalId != 0, "Current GDR picture has TemporalId not equal to 0");
+
   m_HLSReader.setBitstream( &nalu.getBitstream() );
-  m_HLSReader.parseSliceHeader( m_apcSlicePilot, &m_parameterSetManager, m_prevTid0POC );
+  m_HLSReader.parseSliceHeader( m_apcSlicePilot, &m_picHeader, &m_parameterSetManager, m_prevTid0POC );
 
   // update independent slice index
   uint32_t uiIndependentSliceIdx = 0;
   if (!m_bFirstSliceInPicture)
   {
     uiIndependentSliceIdx = m_pcPic->slices[m_uiSliceSegmentIdx-1]->getIndependentSliceIdx();
-#if HEVC_DEPENDENT_SLICES
-    if (!m_apcSlicePilot->getDependentSliceSegmentFlag())
-    {
-#endif
       uiIndependentSliceIdx++;
-#if HEVC_DEPENDENT_SLICES
-    }
-#endif
   }
   m_apcSlicePilot->setIndependentSliceIdx(uiIndependentSliceIdx);
 
 #if K0149_BLOCK_STATISTICS
-  PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId());
+  PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId());
   CHECK(pps == 0, "No PPS present");
   SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId());
   CHECK(sps == 0, "No SPS present");
@@ -987,13 +1246,14 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
 
   DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", m_apcSlicePilot->getPOC() ) );
 
-#if HEVC_DEPENDENT_SLICES
-  // set POC for dependent slices in skipped pictures
-  if(m_apcSlicePilot->getDependentSliceSegmentFlag() && m_prevSliceSkipped)
-  {
-    m_apcSlicePilot->setPOC(m_skippedPOC);
-  }
-#endif
+  if ((m_bFirstSliceInPicture ||
+        m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ||
+        m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) &&
+      getNoOutputPriorPicsFlag())
+    {
+      checkNoOutputPriorPics(&m_cListPic);
+      setNoOutputPriorPicsFlag (false);
+    }
 
   xUpdatePreviousTid0POC(m_apcSlicePilot);
 
@@ -1001,38 +1261,35 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   m_apcSlicePilot->setAssociatedIRAPType(m_associatedIRAPType);
 
   //For inference of NoOutputOfPriorPicsFlag
-  if (m_apcSlicePilot->getRapPicFlag())
+  if (m_apcSlicePilot->getRapPicFlag() || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR)
   {
-#if !JVET_M0101_HLS
-    if ((m_apcSlicePilot->getNalUnitType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && m_apcSlicePilot->getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP) ||
-        (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_bFirstSliceInSequence) ||
-        (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_apcSlicePilot->getHandleCraAsCvsStartFlag()))
-#else
     if ((m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_bFirstSliceInSequence) ||
-        (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_apcSlicePilot->getHandleCraAsCvsStartFlag()))
-#endif
+        (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_apcSlicePilot->getHandleCraAsCvsStartFlag()) ||
+        (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR && m_bFirstSliceInSequence))
     {
-      m_apcSlicePilot->setNoRaslOutputFlag(true);
+      m_apcSlicePilot->setNoIncorrectPicOutputFlag(true);
     }
     //the inference for NoOutputPriorPicsFlag
-    if (!m_bFirstSliceInBitstream && m_apcSlicePilot->getRapPicFlag() && m_apcSlicePilot->getNoRaslOutputFlag())
+    if (!m_bFirstSliceInBitstream &&
+        (m_apcSlicePilot->getRapPicFlag() || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) &&
+        m_apcSlicePilot->getNoIncorrectPicOutputFlag())
     {
-      if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)
+      if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR)
       {
-        m_apcSlicePilot->setNoOutputPriorPicsFlag(true);
+        m_picHeader.setNoOutputOfPriorPicsFlag(true);
       }
     }
     else
     {
-      m_apcSlicePilot->setNoOutputPriorPicsFlag(false);
+      m_picHeader.setNoOutputOfPriorPicsFlag(false);
     }
 
-    if(m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)
+    if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR)
     {
-      m_craNoRaslOutputFlag = m_apcSlicePilot->getNoRaslOutputFlag();
+      m_lastNoIncorrectPicOutputFlag = m_apcSlicePilot->getNoIncorrectPicOutputFlag();
     }
   }
-  if (m_apcSlicePilot->getRapPicFlag() && m_apcSlicePilot->getNoOutputPriorPicsFlag())
+  if ((m_apcSlicePilot->getRapPicFlag() || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) && m_picHeader.getNoOutputOfPriorPicsFlag())
   {
     m_lastPOCNoOutputPriorPics = m_apcSlicePilot->getPOC();
     m_isNoOutputPriorPics = true;
@@ -1043,32 +1300,24 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   }
 
   //For inference of PicOutputFlag
-#if !JVET_M0101_HLS
-  if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R)
-#else
   if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL)
-#endif
   {
-    if ( m_craNoRaslOutputFlag )
+    if (m_lastNoIncorrectPicOutputFlag)
     {
-      m_apcSlicePilot->setPicOutputFlag(false);
+      m_picHeader.setPicOutputFlag(false);
     }
   }
 
-  if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_craNoRaslOutputFlag) //Reset POC MSB when CRA has NoRaslOutputFlag equal to 1
+  if ((m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) &&
+      m_lastNoIncorrectPicOutputFlag)                     //Reset POC MSB when CRA or GDR has NoIncorrectPicOutputFlag equal to 1
   {
-    PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId());
+    PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId());
     CHECK(pps == 0, "No PPS present");
     SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId());
     CHECK(sps == 0, "No SPS present");
     int iMaxPOClsb = 1 << sps->getBitsForPOC();
     m_apcSlicePilot->setPOC( m_apcSlicePilot->getPOC() & (iMaxPOClsb - 1) );
     xUpdatePreviousTid0POC(m_apcSlicePilot);
-    if (m_apcSlicePilot->getAPSId() != -1)
-    {
-      APS *aps = m_parameterSetManager.getAPS(m_apcSlicePilot->getAPSId());
-      CHECK(aps == 0, "No APS present");
-    }
   }
 
   // Skip pictures due to random access
@@ -1080,34 +1329,19 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     return false;
   }
   // Skip TFD pictures associated with BLA/BLANT pictures
-#if !JVET_M0101_HLS
-  if (isSkipPictureForBLA(iPOCLastDisplay))
-  {
-    m_prevSliceSkipped = true;
-    m_skippedPOC = m_apcSlicePilot->getPOC();
-    return false;
-  }
-#endif
 
   // clear previous slice skipped flag
   m_prevSliceSkipped = false;
 
   //we should only get a different poc for a new picture (with CTU address==0)
-#if HEVC_DEPENDENT_SLICES
-  if (!m_apcSlicePilot->getDependentSliceSegmentFlag() && m_apcSlicePilot->getPOC()!=m_prevPOC && !m_bFirstSliceInSequence && (m_apcSlicePilot->getSliceCurStartCtuTsAddr() != 0))
-#else
-  if(m_apcSlicePilot->getPOC() != m_prevPOC && !m_bFirstSliceInSequence && (m_apcSlicePilot->getSliceCurStartCtuTsAddr() != 0))
-#endif
+  if(m_apcSlicePilot->getPOC() != m_prevPOC && !m_bFirstSliceInSequence && (m_apcSlicePilot->getFirstCtuRsAddrInSlice() != 0))
   {
     msg( WARNING, "Warning, the first slice of a picture might have been lost!\n");
   }
+  m_prevLayerID = nalu.m_nuhLayerId;
 
   // leave when a new picture is found
-#if HEVC_DEPENDENT_SLICES
-  if (!m_apcSlicePilot->getDependentSliceSegmentFlag() && (m_apcSlicePilot->getSliceCurStartCtuTsAddr() == 0 && !m_bFirstSliceInPicture) )
-#else
-  if(m_apcSlicePilot->getSliceCurStartCtuTsAddr() == 0 && !m_bFirstSliceInPicture)
-#endif
+  if(m_apcSlicePilot->getFirstCtuRsAddrInSlice() == 0 && !m_bFirstSliceInPicture)
   {
     if (m_prevPOC >= m_pocRandomAccess)
     {
@@ -1125,20 +1359,38 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   //detect lost reference picture and insert copy of earlier frame.
   {
     int lostPoc;
-    while((lostPoc=m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPS(), true, m_pocRandomAccess)) > 0)
+    int refPicIndex;
+    while ((lostPoc = m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPL0(), 0, true, &refPicIndex)) > 0)
     {
-      xCreateLostPicture(lostPoc-1);
+      if ( ( (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) || (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) ) && m_apcSlicePilot->getNoIncorrectPicOutputFlag() )
+      {
+        if (m_apcSlicePilot->getRPL0()->isInterLayerRefPic(refPicIndex) == 0)
+        {
+          xCreateUnavailablePicture(lostPoc - 1, m_apcSlicePilot->getRPL0()->isRefPicLongterm(refPicIndex), m_apcSlicePilot->getPic()->layerId, m_apcSlicePilot->getRPL0()->isInterLayerRefPic(refPicIndex));
+        }
+      }
+      else
+      {
+        xCreateLostPicture( lostPoc - 1, m_apcSlicePilot->getPic()->layerId );
+      }
+    }
+    while ((lostPoc = m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPL1(), 0, true, &refPicIndex)) > 0)
+    {
+      if (((m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) || (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)) && m_apcSlicePilot->getNoIncorrectPicOutputFlag())
+      {
+        if (m_apcSlicePilot->getRPL1()->isInterLayerRefPic(refPicIndex) == 0)
+        {
+          xCreateUnavailablePicture(lostPoc - 1, m_apcSlicePilot->getRPL1()->isRefPicLongterm(refPicIndex), m_apcSlicePilot->getPic()->layerId, m_apcSlicePilot->getRPL1()->isInterLayerRefPic(refPicIndex));
+        }
+      }
+      else
+      {
+        xCreateLostPicture( lostPoc - 1, m_apcSlicePilot->getPic()->layerId );
+      }
     }
   }
 
-#if HEVC_DEPENDENT_SLICES
-  if (!m_apcSlicePilot->getDependentSliceSegmentFlag())
-  {
-#endif
     m_prevPOC = m_apcSlicePilot->getPOC();
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
 
   if (m_bFirstSliceInPicture)
   {
@@ -1146,7 +1398,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   }
 
   // actual decoding starts here
-  xActivateParameterSets();
+  xActivateParameterSets( nalu.m_nuhLayerId );
 
   m_bFirstSliceInSequence = false;
   m_bFirstSliceInBitstream  = false;
@@ -1158,36 +1410,15 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   m_pcPic->layer       = pcSlice->getTLayer();
   m_pcPic->referenced  = true;
   m_pcPic->layer       = nalu.m_temporalId;
+  m_pcPic->layerId    = nalu.m_nuhLayerId;
+    m_pcPic->subLayerNonReferencePictureDueToSTSA = false;
 
-  // When decoding the slice header, the stored start and end addresses were actually RS addresses, not TS addresses.
-  // Now, having set up the maps, convert them to the correct form.
-#if HEVC_TILES_WPP
-  const TileMap& tileMap = *(m_pcPic->tileMap);
-#endif
-#if HEVC_DEPENDENT_SLICES
-#if HEVC_TILES_WPP
-  pcSlice->setSliceSegmentCurStartCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceSegmentCurStartCtuTsAddr()) );
-  pcSlice->setSliceSegmentCurEndCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceSegmentCurEndCtuTsAddr()) );
-#endif
-  if(!pcSlice->getDependentSliceSegmentFlag())
-  {
-#endif
-#if HEVC_TILES_WPP
-    pcSlice->setSliceCurStartCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceCurStartCtuTsAddr()) );
-    pcSlice->setSliceCurEndCtuTsAddr( tileMap.getCtuRsToTsAddrMap(pcSlice->getSliceCurEndCtuTsAddr()) );
-#endif
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
 
+  pcSlice->checkCRA(pcSlice->getRPL0(), pcSlice->getRPL1(), m_pocCRA, m_associatedIRAPType, m_cListPic);
+  pcSlice->constructRefPicList(m_cListPic);
+  pcSlice->checkSTSA(m_cListPic);
 
-#if HEVC_DEPENDENT_SLICES
-  if (!pcSlice->getDependentSliceSegmentFlag())
-  {
-#endif
-    pcSlice->checkCRA(pcSlice->getRPS(), m_pocCRA, m_associatedIRAPType, m_cListPic );
-    // Set reference list
-    pcSlice->setRefPicList( m_cListPic, true, true );
+  pcSlice->scaleRefPicList( scaledRefPic, m_pcPic->cs->picHeader, m_parameterSetManager.getAPSs(), m_picHeader.getLmcsAPS(), m_picHeader.getScalingListAPS(), true );
 
     if (!pcSlice->isIntra())
     {
@@ -1216,7 +1447,9 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
       pcSlice->setCheckLDC(bLowDelay);
     }
 
-    if ( pcSlice->getCheckLDC() == false && pcSlice->getMvdL1ZeroFlag() == false )
+    if (pcSlice->getSPS()->getUseSMVD() && pcSlice->getCheckLDC() == false
+      && pcSlice->getPicHeader()->getMvdL1ZeroFlag() == false
+      )
     {
       int currPOC = pcSlice->getPOC();
 
@@ -1230,7 +1463,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
       for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ )
       {
         int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC();
-        if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) )
+        const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm;
+        if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) && !isRefLongTerm )
         {
           forwardPOC = poc;
           refIdx0 = ref;
@@ -1241,7 +1475,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
       for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ )
       {
         int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC();
-        if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) )
+        const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm;
+        if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) && !isRefLongTerm )
         {
           backwardPOC = poc;
           refIdx1 = ref;
@@ -1259,7 +1494,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
         for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ )
         {
           int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC();
-          if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) )
+          const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm;
+          if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) && !isRefLongTerm )
           {
             backwardPOC = poc;
             refIdx0 = ref;
@@ -1270,7 +1506,8 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
         for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ )
         {
           int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC();
-          if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) )
+          const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm;
+          if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) && !isRefLongTerm )
           {
             forwardPOC = poc;
             refIdx1 = ref;
@@ -1295,42 +1532,78 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     //---------------
     pcSlice->setRefPOCList();
 
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
+    SEIMessages drapSEIs = getSeisByType(m_pcPic->SEIs, SEI::DEPENDENT_RAP_INDICATION );
+    if (!drapSEIs.empty())
+    {
+      msg( NOTICE, "Dependent RAP indication SEI decoded\n");
+      pcSlice->setDRAP(true);
+      pcSlice->setLatestDRAPPOC(pcSlice->getPOC());
+    }
+    pcSlice->checkConformanceForDRAP(nalu.m_temporalId);
 
-#if HEVC_USE_SCALING_LISTS
   Quant *quant = m_cTrQuant.getQuant();
 
-  if(pcSlice->getSPS()->getScalingListFlag())
+  if( pcSlice->getSPS()->getScalingListFlag() )
   {
     ScalingList scalingList;
-    if(pcSlice->getPPS()->getScalingListPresentFlag())
-    {
-      scalingList = pcSlice->getPPS()->getScalingList();
-    }
-    else if (pcSlice->getSPS()->getScalingListPresentFlag())
+    if( pcSlice->getPicHeader()->getScalingListPresentFlag() )
     {
-      scalingList = pcSlice->getSPS()->getScalingList();
+      APS* scalingListAPS = pcSlice->getPicHeader()->getScalingListAPS();
+      scalingList = scalingListAPS->getScalingList();
     }
     else
     {
       scalingList.setDefaultScalingList();
     }
-    quant->setScalingListDec(scalingList);
-    quant->setUseScalingList(true);
+    int scalingListAPSId = pcSlice->getPicHeader()->getScalingListAPSId();
+    if (getScalingListUpdateFlag() || (scalingListAPSId != getPreScalingListAPSId()))
+    {
+      quant->setScalingListDec(scalingList);
+      setScalingListUpdateFlag(false);
+      setPreScalingListAPSId(scalingListAPSId);
+    }
+    quant->setUseScalingList( true );
   }
   else
   {
-    quant->setUseScalingList(false);
+    quant->setUseScalingList( false );
   }
-#endif
 
 
-  if (pcSlice->getSPS()->getUseReshaper())
+  if (pcSlice->getSPS()->getUseLmcs())
   {
-    m_cReshaper.copySliceReshaperInfo(m_cReshaper.getSliceReshaperInfo(), pcSlice->getReshapeInfo());
-    if (pcSlice->getReshapeInfo().getSliceReshapeModelPresentFlag())
+    if (m_bFirstSliceInPicture)
+      m_sliceLmcsApsId = -1;
+    if (pcSlice->getPicHeader()->getLmcsEnabledFlag())
+    {
+      APS* lmcsAPS = pcSlice->getPicHeader()->getLmcsAPS();
+      if (m_sliceLmcsApsId == -1)
+      {
+        m_sliceLmcsApsId = lmcsAPS->getAPSId();
+      }
+      else
+      {
+        CHECK(lmcsAPS->getAPSId() != m_sliceLmcsApsId, "same APS ID shall be used for all slices in one picture");
+      }
+      SliceReshapeInfo& sInfo = lmcsAPS->getReshaperAPSInfo();
+      SliceReshapeInfo& tInfo = m_cReshaper.getSliceReshaperInfo();
+      tInfo.reshaperModelMaxBinIdx = sInfo.reshaperModelMaxBinIdx;
+      tInfo.reshaperModelMinBinIdx = sInfo.reshaperModelMinBinIdx;
+      memcpy(tInfo.reshaperModelBinCWDelta, sInfo.reshaperModelBinCWDelta, sizeof(int)*(PIC_CODE_CW_BINS));
+      tInfo.maxNbitsNeededDeltaCW = sInfo.maxNbitsNeededDeltaCW;
+      tInfo.chrResScalingOffset = sInfo.chrResScalingOffset;
+      tInfo.setUseSliceReshaper(pcSlice->getPicHeader()->getLmcsEnabledFlag());
+      tInfo.setSliceReshapeChromaAdj(pcSlice->getPicHeader()->getLmcsChromaResidualScaleFlag());
+      tInfo.setSliceReshapeModelPresentFlag(true);
+    }
+    else
+    {
+      SliceReshapeInfo& tInfo = m_cReshaper.getSliceReshaperInfo();
+      tInfo.setUseSliceReshaper(false);
+      tInfo.setSliceReshapeChromaAdj(false);
+      tInfo.setSliceReshapeModelPresentFlag(false);
+    }
+    if (pcSlice->getPicHeader()->getLmcsEnabledFlag())
     {
       m_cReshaper.constructReshaper();
     }
@@ -1356,6 +1629,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
         m_cReshaper.setRecReshaped(false);
       }
     }
+    m_cReshaper.setVPDULoc(-1, -1);
   }
   else
   {
@@ -1369,34 +1643,52 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   m_bFirstSliceInPicture = false;
   m_uiSliceSegmentIdx++;
 
+  pcSlice->freeScaledRefPicList( scaledRefPic );
+
   return false;
 }
 
-#if HEVC_VPS
 void DecLib::xDecodeVPS( InputNALUnit& nalu )
 {
-  VPS* vps = new VPS();
+  m_vps = new VPS();
   m_HLSReader.setBitstream( &nalu.getBitstream() );
-  m_HLSReader.parseVPS( vps );
-  m_parameterSetManager.storeVPS( vps, nalu.getBitstream().getFifo() );
+
+  CHECK( nalu.m_temporalId, "The value of TemporalId of VPS NAL units shall be equal to 0" );
+
+  m_HLSReader.parseVPS( m_vps );
+  m_parameterSetManager.storeVPS( m_vps, nalu.getBitstream().getFifo());
+}
+
+void DecLib::xDecodeDPS( InputNALUnit& nalu )
+{
+  DPS* dps = new DPS();
+  m_HLSReader.setBitstream( &nalu.getBitstream() );
+
+  CHECK( nalu.m_temporalId, "The value of TemporalId of DPS NAL units shall be equal to 0" );
+
+  m_HLSReader.parseDPS( dps );
+  m_parameterSetManager.storeDPS( dps, nalu.getBitstream().getFifo() );
 }
-#endif
 
 void DecLib::xDecodeSPS( InputNALUnit& nalu )
 {
   SPS* sps = new SPS();
   m_HLSReader.setBitstream( &nalu.getBitstream() );
-  m_HLSReader.parseSPS( sps );
-  m_parameterSetManager.storeSPS( sps, nalu.getBitstream().getFifo() );
 
+  CHECK( nalu.m_temporalId, "The value of TemporalId of SPS NAL units shall be equal to 0" );
+
+  m_HLSReader.parseSPS( sps );
   DTRACE( g_trace_ctx, D_QP_PER_CTU, "CTU Size: %dx%d", sps->getMaxCUWidth(), sps->getMaxCUHeight() );
+  m_parameterSetManager.storeSPS( sps, nalu.getBitstream().getFifo() );
 }
 
 void DecLib::xDecodePPS( InputNALUnit& nalu )
 {
   PPS* pps = new PPS();
   m_HLSReader.setBitstream( &nalu.getBitstream() );
-  m_HLSReader.parsePPS( pps );
+  m_HLSReader.parsePPS( pps, &m_parameterSetManager );
+  pps->setLayerId( nalu.m_nuhLayerId );
+  pps->setTemporalId( nalu.m_temporalId );
   m_parameterSetManager.storePPS( pps, nalu.getBitstream().getFifo() );
 }
 
@@ -1405,25 +1697,34 @@ void DecLib::xDecodeAPS(InputNALUnit& nalu)
   APS* aps = new APS();
   m_HLSReader.setBitstream(&nalu.getBitstream());
   m_HLSReader.parseAPS(aps);
+  aps->setTemporalId(nalu.m_temporalId);
+  aps->setLayerId( nalu.m_nuhLayerId );
+  m_parameterSetManager.checkAuApsContent( aps, m_accessUnitApsNals );
+  if (aps->getAPSType() == SCALING_LIST_APS)
+  {
+    setScalingListUpdateFlag(true);
+  }
+
+  // aps will be deleted if it was already stored (and did not changed),
+  // thus, storing it must be last action.
   m_parameterSetManager.storeAPS(aps, nalu.getBitstream().getFifo());
 }
 bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay)
 {
   bool ret;
   // ignore all NAL units of layers > 0
-  if (nalu.m_nuhLayerId > 0)
-  {
-    msg( WARNING, "Warning: found NAL unit with nuh_layer_id equal to %d. Ignoring.\n", nalu.m_nuhLayerId);
-    return false;
-  }
+
+  m_accessUnitNals.push_back( std::pair<NalUnitType, int>( nalu.m_nalUnitType, nalu.m_temporalId ) );
 
   switch (nalu.m_nalUnitType)
   {
-#if HEVC_VPS
     case NAL_UNIT_VPS:
       xDecodeVPS( nalu );
       return false;
-#endif
+
+    case NAL_UNIT_DPS:
+      xDecodeDPS( nalu );
+      return false;
 
     case NAL_UNIT_SPS:
       xDecodeSPS( nalu );
@@ -1432,7 +1733,13 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay)
     case NAL_UNIT_PPS:
       xDecodePPS( nalu );
       return false;
-    case NAL_UNIT_APS:
+
+    case NAL_UNIT_PH:
+      xDecodePicHeader(nalu);
+      return !m_bFirstSliceInPicture;
+
+    case NAL_UNIT_PREFIX_APS:
+    case NAL_UNIT_SUFFIX_APS:
       xDecodeAPS(nalu);
       return false;
 
@@ -1444,7 +1751,7 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay)
     case NAL_UNIT_SUFFIX_SEI:
       if (m_pcPic)
       {
-        m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_pcPic->SEIs, nalu.m_nalUnitType, m_parameterSetManager.getActiveSPS(), m_pDecodedSEIOutputStream );
+        m_seiReader.parseSEImessage( &(nalu.getBitstream()), m_pcPic->SEIs, nalu.m_nalUnitType, nalu.m_temporalId, m_parameterSetManager.getActiveSPS(), m_HRD, m_pDecodedSEIOutputStream );
       }
       else
       {
@@ -1452,47 +1759,22 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay)
       }
       return false;
 
-#if !JVET_M0101_HLS
-    case NAL_UNIT_CODED_SLICE_TRAIL_R:
-    case NAL_UNIT_CODED_SLICE_TRAIL_N:
-    case NAL_UNIT_CODED_SLICE_TSA_R:
-    case NAL_UNIT_CODED_SLICE_TSA_N:
-    case NAL_UNIT_CODED_SLICE_STSA_R:
-    case NAL_UNIT_CODED_SLICE_STSA_N:
-    case NAL_UNIT_CODED_SLICE_BLA_W_LP:
-    case NAL_UNIT_CODED_SLICE_BLA_W_RADL:
-    case NAL_UNIT_CODED_SLICE_BLA_N_LP:
-    case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
-    case NAL_UNIT_CODED_SLICE_IDR_N_LP:
-    case NAL_UNIT_CODED_SLICE_CRA:
-    case NAL_UNIT_CODED_SLICE_RADL_N:
-    case NAL_UNIT_CODED_SLICE_RADL_R:
-    case NAL_UNIT_CODED_SLICE_RASL_N:
-    case NAL_UNIT_CODED_SLICE_RASL_R:
-#else
     case NAL_UNIT_CODED_SLICE_TRAIL:
     case NAL_UNIT_CODED_SLICE_STSA:
     case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
     case NAL_UNIT_CODED_SLICE_IDR_N_LP:
     case NAL_UNIT_CODED_SLICE_CRA:
+    case NAL_UNIT_CODED_SLICE_GDR:
     case NAL_UNIT_CODED_SLICE_RADL:
     case NAL_UNIT_CODED_SLICE_RASL:
-#endif
       ret = xDecodeSlice(nalu, iSkipFrame, iPOCLastDisplay);
-#if JVET_J0090_MEMORY_BANDWITH_MEASURE
-      if ( ret )
-      {
-        m_cacheModel.reportFrame( );
-        m_cacheModel.accumulateFrame( );
-        m_cacheModel.clear( );
-      }
-#endif
       return ret;
 
     case NAL_UNIT_EOS:
       m_associatedIRAPType = NAL_UNIT_INVALID;
       m_pocCRA = 0;
       m_pocRandomAccess = MAX_INT;
+      m_prevLayerID = MAX_INT;
       m_prevPOC = MAX_INT;
       m_prevSliceSkipped = false;
       m_skippedPOC = 0;
@@ -1503,99 +1785,28 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay)
         AUDReader audReader;
         uint32_t picType;
         audReader.parseAccessUnitDelimiter(&(nalu.getBitstream()),picType);
-        msg( NOTICE, "Note: found NAL_UNIT_ACCESS_UNIT_DELIMITER\n");
-        return false;
+        return !m_bFirstSliceInPicture;
       }
 
     case NAL_UNIT_EOB:
       return false;
 
-    case NAL_UNIT_FILLER_DATA:
-      {
-        FDReader fdReader;
-        uint32_t size;
-        fdReader.parseFillerData(&(nalu.getBitstream()),size);
-        msg( NOTICE, "Note: found NAL_UNIT_FILLER_DATA with %u bytes payload.\n", size);
-        return false;
-      }
-#if !JVET_M0101_HLS
-    case NAL_UNIT_RESERVED_VCL_N10:
-    case NAL_UNIT_RESERVED_VCL_R11:
-    case NAL_UNIT_RESERVED_VCL_N12:
-    case NAL_UNIT_RESERVED_VCL_R13:
-    case NAL_UNIT_RESERVED_VCL_N14:
-    case NAL_UNIT_RESERVED_VCL_R15:
-
-    case NAL_UNIT_RESERVED_IRAP_VCL22:
-    case NAL_UNIT_RESERVED_IRAP_VCL23:
-
-    case NAL_UNIT_RESERVED_VCL24:
-    case NAL_UNIT_RESERVED_VCL25:
-    case NAL_UNIT_RESERVED_VCL26:
-    case NAL_UNIT_RESERVED_VCL27:
-    case NAL_UNIT_RESERVED_VCL28:
-    case NAL_UNIT_RESERVED_VCL29:
-    case NAL_UNIT_RESERVED_VCL30:
-    case NAL_UNIT_RESERVED_VCL31:
-#if !HEVC_VPS
-    case NAL_UNIT_RESERVED_32:
-#endif
-#else
-    case NAL_UNIT_RESERVED_VCL_4:
-    case NAL_UNIT_RESERVED_VCL_5:
-    case NAL_UNIT_RESERVED_VCL_6:
-    case NAL_UNIT_RESERVED_VCL_7:
-
-    case NAL_UNIT_RESERVED_IRAP_VCL11:
-    case NAL_UNIT_RESERVED_IRAP_VCL12:
-    case NAL_UNIT_RESERVED_IRAP_VCL13:
-
-    case NAL_UNIT_RESERVED_VCL14:
-#if !HEVC_VPS
-    case NAL_UNIT_RESERVED_VCL15:
-#endif
-#endif
+    case NAL_UNIT_RESERVED_IRAP_VCL_11:
+    case NAL_UNIT_RESERVED_IRAP_VCL_12:
       msg( NOTICE, "Note: found reserved VCL NAL unit.\n");
       xParsePrefixSEIsForUnknownVCLNal();
       return false;
-#if !JVET_M0101_HLS
-    case NAL_UNIT_RESERVED_NVCL41:
-    case NAL_UNIT_RESERVED_NVCL42:
-    case NAL_UNIT_RESERVED_NVCL43:
-    case NAL_UNIT_RESERVED_NVCL44:
-    case NAL_UNIT_RESERVED_NVCL45:
-    case NAL_UNIT_RESERVED_NVCL46:
-    case NAL_UNIT_RESERVED_NVCL47:
-#else
-    case NAL_UNIT_RESERVED_NVCL16:
-    case NAL_UNIT_RESERVED_NVCL26:
-    case NAL_UNIT_RESERVED_NVCL27:
-#endif
+    case NAL_UNIT_RESERVED_VCL_4:
+    case NAL_UNIT_RESERVED_VCL_5:
+    case NAL_UNIT_RESERVED_VCL_6:
+    case NAL_UNIT_RESERVED_NVCL_26:
+    case NAL_UNIT_RESERVED_NVCL_27:
       msg( NOTICE, "Note: found reserved NAL unit.\n");
       return false;
-#if !JVET_M0101_HLS
-    case NAL_UNIT_UNSPECIFIED_48:
-    case NAL_UNIT_UNSPECIFIED_49:
-    case NAL_UNIT_UNSPECIFIED_50:
-    case NAL_UNIT_UNSPECIFIED_51:
-    case NAL_UNIT_UNSPECIFIED_52:
-    case NAL_UNIT_UNSPECIFIED_53:
-    case NAL_UNIT_UNSPECIFIED_54:
-    case NAL_UNIT_UNSPECIFIED_55:
-    case NAL_UNIT_UNSPECIFIED_56:
-    case NAL_UNIT_UNSPECIFIED_57:
-    case NAL_UNIT_UNSPECIFIED_58:
-    case NAL_UNIT_UNSPECIFIED_59:
-    case NAL_UNIT_UNSPECIFIED_60:
-    case NAL_UNIT_UNSPECIFIED_61:
-    case NAL_UNIT_UNSPECIFIED_62:
-    case NAL_UNIT_UNSPECIFIED_63:
-#else
     case NAL_UNIT_UNSPECIFIED_28:
     case NAL_UNIT_UNSPECIFIED_29:
     case NAL_UNIT_UNSPECIFIED_30:
     case NAL_UNIT_UNSPECIFIED_31:
-#endif
       msg( NOTICE, "Note: found unspecified NAL unit.\n");
       return false;
     default:
@@ -1606,21 +1817,6 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay)
   return false;
 }
 
-#if !JVET_M0101_HLS
-/** Function for checking if picture should be skipped because of association with a previous BLA picture
- *  This function skips all TFD pictures that follow a BLA picture in decoding order and precede it in output order.
- */
-bool DecLib::isSkipPictureForBLA( int& iPOCLastDisplay )
-{
-  if( ( m_associatedIRAPType == NAL_UNIT_CODED_SLICE_BLA_N_LP || m_associatedIRAPType == NAL_UNIT_CODED_SLICE_BLA_W_LP || m_associatedIRAPType == NAL_UNIT_CODED_SLICE_BLA_W_RADL ) &&
-        m_apcSlicePilot->getPOC() < m_pocCRA && ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N ) )
-  {
-    iPOCLastDisplay++;
-    return true;
-  }
-  return false;
-}
-#endif
 
 /** Function for checking if picture should be skipped because of random access. This function checks the skipping of pictures in the case of -s option random access.
  *  All pictures prior to the random access point indicated by the counter iSkipFrame are skipped.
@@ -1638,24 +1834,17 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay )
     iSkipFrame--;   // decrement the counter
     return true;
   }
+  else if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP )
+  {
+    m_pocRandomAccess = -MAX_INT; // no need to skip the reordered pictures in IDR, they are decodable.
+  }
   else if (m_pocRandomAccess == MAX_INT) // start of random access point, m_pocRandomAccess has not been set yet.
   {
-#if !JVET_M0101_HLS
-    if (   m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA
-        || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-        || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP
-        || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL )
-#else
     if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA )
-#endif
     {
       // set the POC random access since we need to skip the reordered pictures in the case of CRA/CRANT/BLA/BLANT.
       m_pocRandomAccess = m_apcSlicePilot->getPOC();
     }
-    else if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP )
-    {
-      m_pocRandomAccess = -MAX_INT; // no need to skip the reordered pictures in IDR, they are decodable.
-    }
     else
     {
       if(!m_warningMessageSkipPicture)
@@ -1667,11 +1856,7 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay )
     }
   }
   // skip the reordered pictures, if necessary
-#if !JVET_M0101_HLS
-  else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess && (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N))
-#else
   else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess && (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL))
-#endif
   {
     iPOCLastDisplay++;
     return true;
@@ -1680,7 +1865,49 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay )
   return false;
 }
 
-
-
+void DecLib::checkNalUnitConstraints( uint32_t naluType )
+{
+  if (m_parameterSetManager.getActiveSPS() != NULL && m_parameterSetManager.getActiveSPS()->getProfileTierLevel() != NULL)
+  {
+    const ConstraintInfo *cInfo = m_parameterSetManager.getActiveSPS()->getProfileTierLevel()->getConstraintInfo();
+    xCheckNalUnitConstraintFlags( cInfo, naluType );
+  }
+  if (m_parameterSetManager.getActiveDPS() != NULL)
+  {
+    const DPS *dps = m_parameterSetManager.getActiveDPS();
+    for (int i=0; i< dps->getNumPTLs(); i++)
+    { 
+      ProfileTierLevel ptl = dps->getProfileTierLevel(i);
+      const ConstraintInfo *cInfo = ptl.getConstraintInfo();
+      xCheckNalUnitConstraintFlags( cInfo, naluType );
+    }
+  }
+}
+void DecLib::xCheckNalUnitConstraintFlags( const ConstraintInfo *cInfo, uint32_t naluType )
+{
+  if (cInfo != NULL)
+  {
+    CHECK(cInfo->getNoTrailConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_TRAIL,
+      "Non-conforming bitstream. no_trail_constraint_flag is equal to 1 but bitstream contains NAL unit of type TRAIL_NUT.");
+    CHECK(cInfo->getNoStsaConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_STSA,
+      "Non-conforming bitstream. no_stsa_constraint_flag is equal to 1 but bitstream contains NAL unit of type STSA_NUT.");
+    CHECK(cInfo->getNoRaslConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_RASL,
+      "Non-conforming bitstream. no_rasl_constraint_flag is equal to 1 but bitstream contains NAL unit of type RASL_NUT.");
+    CHECK(cInfo->getNoRadlConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_RADL,
+      "Non-conforming bitstream. no_radl_constraint_flag is equal to 1 but bitstream contains NAL unit of type RADL_NUT.");
+    CHECK(cInfo->getNoIdrConstraintFlag() && (naluType == NAL_UNIT_CODED_SLICE_IDR_W_RADL),
+      "Non-conforming bitstream. no_idr_constraint_flag is equal to 1 but bitstream contains NAL unit of type IDR_W_RADL.");
+    CHECK(cInfo->getNoIdrConstraintFlag() && (naluType == NAL_UNIT_CODED_SLICE_IDR_N_LP),
+      "Non-conforming bitstream. no_idr_constraint_flag is equal to 1 but bitstream contains NAL unit of type IDR_N_LP.");
+    CHECK(cInfo->getNoCraConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_CRA,
+      "Non-conforming bitstream. no_cra_constraint_flag is equal to 1 but bitstream contains NAL unit of type CRA_NUT.");
+    CHECK(cInfo->getNoGdrConstraintFlag() && naluType == NAL_UNIT_CODED_SLICE_GDR,
+      "Non-conforming bitstream. no_gdr_constraint_flag is equal to 1 but bitstream contains NAL unit of type GDR_NUT.");
+    CHECK(cInfo->getNoApsConstraintFlag() && naluType == NAL_UNIT_PREFIX_APS,
+      "Non-conforming bitstream. no_aps_constraint_flag is equal to 1 but bitstream contains NAL unit of type APS_PREFIX_NUT.");
+    CHECK(cInfo->getNoApsConstraintFlag() && naluType == NAL_UNIT_SUFFIX_APS,
+      "Non-conforming bitstream. no_aps_constraint_flag is equal to 1 but bitstream contains NAL unit of type APS_SUFFIX_NUT.");
+  }
+}
 
 //! \}
diff --git a/source/Lib/DecoderLib/DecLib.h b/source/Lib/DecoderLib/DecLib.h
index 1ea3aacf47749403410a6fed3a3c3c4640871773..d9490513428e302c9b3d7d8a3593acff15a07594 100644
--- a/source/Lib/DecoderLib/DecLib.h
+++ b/source/Lib/DecoderLib/DecLib.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -77,16 +77,19 @@ private:
 
   PicList                 m_cListPic;         //  Dynamic buffer
   ParameterSetManager     m_parameterSetManager;  // storage for parameter sets
+  PicHeader               m_picHeader;            // picture header
   Slice*                  m_apcSlicePilot;
 
 
   SEIMessages             m_SEIs; ///< List of SEI messages that have been received before the first slice and between slices, excluding prefix SEIs...
 
+
   // functional classes
   IntraPrediction         m_cIntraPred;
   InterPrediction         m_cInterPred;
   TrQuant                 m_cTrQuant;
   DecSlice                m_cSliceDecoder;
+  TrQuant                 m_cTrQuantScalingList;
   DecCu                   m_cCuDecoder;
   HLSyntaxReader          m_HLSReader;
   CABACDecoder            m_CABACDecoder;
@@ -95,17 +98,16 @@ private:
   SampleAdaptiveOffset    m_cSAO;
   AdaptiveLoopFilter      m_cALF;
   Reshape                 m_cReshaper;                        ///< reshaper class
+  HRD                     m_HRD;
   // decoder side RD cost computation
   RdCost                  m_cRdCost;                      ///< RD cost computation class
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   CacheModel              m_cacheModel;
-#endif
-#if !JVET_M0101_HLS
-  bool isSkipPictureForBLA(int& iPOCLastDisplay);
 #endif
   bool isRandomAccessSkipPicture(int& iSkipFrame,  int& iPOCLastDisplay);
   Picture*                m_pcPic;
   uint32_t                    m_uiSliceSegmentIdx;
+  uint32_t                m_prevLayerID;
   int                     m_prevPOC;
   int                     m_prevTid0POC;
   bool                    m_bFirstSliceInPicture;
@@ -115,7 +117,8 @@ private:
   bool                    m_bFirstSliceInBitstream;
   int                     m_lastPOCNoOutputPriorPics;
   bool                    m_isNoOutputPriorPics;
-  bool                    m_craNoRaslOutputFlag;    //value of variable NoRaslOutputFlag of the last CRA pic
+  bool                    m_lastNoIncorrectPicOutputFlag;    //value of variable NoIncorrectPicOutputFlag of the last CRA / GDR pic
+  int                     m_sliceLmcsApsId;         //value of LmcsApsId, constraint is same id for all slices in one picture
   std::ostream           *m_pDecodedSEIOutputStream;
 
   int                     m_decodedPictureHashSEIEnabled;  ///< Checksum(3)/CRC(2)/MD5(1)/disable(0) acting on decoded picture hash SEI message
@@ -126,6 +129,14 @@ private:
   std::list<InputNALUnit*> m_prefixSEINALUs; /// Buffered up prefix SEI NAL Units.
   int                     m_debugPOC;
   int                     m_debugCTU;
+
+  std::vector<std::pair<NalUnitType, int>> m_accessUnitNals;
+  std::vector<int> m_accessUnitApsNals;
+
+  VPS*                    m_vps;
+  bool                    m_scalingListUpdateFlag;
+  int                     m_PreScalingListAPSId;
+
 public:
   DecLib();
   virtual ~DecLib();
@@ -147,10 +158,13 @@ public:
   void  finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl = INFO);
   void  finishPictureLight(int& poc, PicList*& rpcListPic );
   void  checkNoOutputPriorPics (PicList* rpcListPic);
+  void  checkNalUnitConstraints( uint32_t naluType );
+
 
   bool  getNoOutputPriorPicsFlag () const   { return m_isNoOutputPriorPics; }
   void  setNoOutputPriorPicsFlag (bool val) { m_isNoOutputPriorPics = val; }
   void  setFirstSliceInPicture (bool val)  { m_bFirstSliceInPicture = val; }
+  bool  getFirstSliceInPicture () const  { return m_bFirstSliceInPicture; }
   bool  getFirstSliceInSequence () const   { return m_bFirstSliceInSequence; }
   void  setFirstSliceInSequence (bool val) { m_bFirstSliceInSequence = val; }
   void  setDecodedSEIMessageOutputStream(std::ostream *pOpStream) { m_pDecodedSEIOutputStream = pOpStream; }
@@ -160,28 +174,40 @@ public:
   void setDebugCTU( int debugCTU )        { m_debugCTU = debugCTU; }
   int  getDebugPOC( )               const { return m_debugPOC; };
   void setDebugPOC( int debugPOC )        { m_debugPOC = debugPOC; };
+  void resetAccessUnitNals()              { m_accessUnitNals.clear();    }
+  void resetAccessUnitApsNals()           { m_accessUnitApsNals.clear(); }
+  bool isSliceNaluFirstInAU( bool newPicture, InputNALUnit &nalu );
+
+  const VPS* getVPS()                     { return m_vps; }
+  void  initScalingList()
+  {
+    m_cTrQuantScalingList.init(nullptr, MAX_TB_SIZEY, false, false, false, false);
+  }
+  bool  getScalingListUpdateFlag() { return m_scalingListUpdateFlag; }
+  void  setScalingListUpdateFlag(bool b) { m_scalingListUpdateFlag = b; }
+  int   getPreScalingListAPSId() { return m_PreScalingListAPSId; }
+  void  setPreScalingListAPSId(int id) { m_PreScalingListAPSId = id; }
+
 protected:
   void  xUpdateRasInit(Slice* slice);
 
-  Picture * xGetNewPicBuffer(const SPS &sps, const PPS &pps, const uint32_t temporalLayer);
-  void  xCreateLostPicture (int iLostPOC);
-
-  void      xActivateParameterSets();
+  Picture * xGetNewPicBuffer( const SPS &sps, const PPS &pps, const uint32_t temporalLayer, const int layerId );
+  void  xCreateLostPicture( int iLostPOC, const int layerId );
+  void  xCreateUnavailablePicture(int iUnavailablePoc, bool longTermFlag, const int layerId, const bool interLayerRefPicFlag);
+  void  xActivateParameterSets( const int layerId );
+  void      xDecodePicHeader( InputNALUnit& nalu );
   bool      xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDisplay);
-#if HEVC_VPS
   void      xDecodeVPS( InputNALUnit& nalu );
-#endif
+  void      xDecodeDPS( InputNALUnit& nalu );
   void      xDecodeSPS( InputNALUnit& nalu );
   void      xDecodePPS( InputNALUnit& nalu );
   void      xDecodeAPS(InputNALUnit& nalu);
-#if !JVET_M0101_HLS
-  void      xUpdatePreviousTid0POC( Slice *pSlice ) { if ((pSlice->getTLayer()==0) && (pSlice->isReferenceNalu() && (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RASL_R)&& (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RADL_R))) { m_prevTid0POC=pSlice->getPOC(); } }
-#else
   void      xUpdatePreviousTid0POC(Slice *pSlice) { if ((pSlice->getTLayer() == 0) && (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RASL) && (pSlice->getNalUnitType()!=NAL_UNIT_CODED_SLICE_RADL))  { m_prevTid0POC = pSlice->getPOC(); }  }
-#endif
   void      xParsePrefixSEImessages();
   void      xParsePrefixSEIsForUnknownVCLNal();
 
+  void  xCheckNalUnitConstraintFlags( const ConstraintInfo *cInfo, uint32_t naluType );
+
 };// END CLASS DEFINITION DecLib
 
 
diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp
index 9e3b90fb681a25ecf7b4abe357ae7302b70d4372..8adadb3a22e641dc8db273f76428105e7b0f7380 100644
--- a/source/Lib/DecoderLib/DecSlice.cpp
+++ b/source/Lib/DecoderLib/DecSlice.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -77,9 +77,6 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
 
   const SPS*     sps          = slice->getSPS();
   Picture*       pic          = slice->getPic();
-#if HEVC_TILES_WPP
-  const TileMap& tileMap      = *pic->tileMap;
-#endif
   CABACReader&   cabacReader  = *m_CABACDecoder->getCABACReader( 0 );
 
   // setup coding structure
@@ -87,18 +84,23 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
   cs.slice            = slice;
   cs.sps              = sps;
   cs.pps              = slice->getPPS();
-  cs.aps              = slice->getAPS();
-#if HEVC_VPS
-  cs.vps              = slice->getVPS();
-#endif
+  memcpy(cs.alfApss, slice->getAlfAPSs(), sizeof(cs.alfApss));
+
+  cs.lmcsAps          = slice->getPicHeader()->getLmcsAPS();
+  cs.scalinglistAps   = slice->getPicHeader()->getScalingListAPS();
+
   cs.pcv              = slice->getPPS()->pcv;
   cs.chromaQpAdj      = 0;
 
   cs.picture->resizeSAO(cs.pcv->sizeInCtus, 0);
 
-  if (slice->getSliceCurStartCtuTsAddr() == 0)
+  cs.resetPrevPLT(cs.prevPLT);
+
+  if (slice->getFirstCtuRsAddrInSlice() == 0)
   {
     cs.picture->resizeAlfCtuEnableFlag( cs.pcv->sizeInCtus );
+    cs.picture->resizeAlfCtbFilterIndex(cs.pcv->sizeInCtus);
+    cs.picture->resizeAlfCtuAlternative( cs.pcv->sizeInCtus );
   }
 
   const unsigned numSubstreams = slice->getNumberOfSubstreamSizes() + 1;
@@ -111,85 +113,33 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
     ppcSubstreams[idx] = bitstream->extractSubstream( idx+1 < numSubstreams ? ( slice->getSubstreamSize(idx) << 3 ) : bitstream->getNumBitsLeft() );
   }
 
-#if HEVC_DEPENDENT_SLICES
-  const int       startCtuTsAddr          = slice->getSliceSegmentCurStartCtuTsAddr();
-#else
-  const int       startCtuTsAddr          = slice->getSliceCurStartCtuTsAddr();
-#endif
-#if HEVC_DEPENDENT_SLICES
-  const int       startCtuRsAddr          = startCtuTsAddr;
-#elif HEVC_TILES_WPP
-  const int       startCtuRsAddr          = tileMap.getCtuTsToRsAddrMap(startCtuTsAddr);
-#endif
-  const unsigned  numCtusInFrame          = cs.pcv->sizeInCtus;
   const unsigned  widthInCtus             = cs.pcv->widthInCtus;
-#if HEVC_DEPENDENT_SLICES
-  const bool      depSliceSegmentsEnabled = cs.pps->getDependentSliceSegmentsEnabledFlag();
-#endif
-#if HEVC_TILES_WPP
   const bool      wavefrontsEnabled       = cs.pps->getEntropyCodingSyncEnabledFlag();
-#endif
 
   cabacReader.initBitstream( ppcSubstreams[0] );
   cabacReader.initCtxModels( *slice );
 
   // Quantization parameter
-#if HEVC_DEPENDENT_SLICES
-  if(!slice->getDependentSliceSegmentFlag())
-  {
-#endif
     pic->m_prevQP[0] = pic->m_prevQP[1] = slice->getSliceQp();
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
   CHECK( pic->m_prevQP[0] == std::numeric_limits<int>::max(), "Invalid previous QP" );
 
   DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", slice->getPOC() );
 
-  // The first CTU of the slice is the first coded substream, but the global substream number, as calculated by getSubstreamForCtuAddr may be higher.
-  // This calculates the common offset for all substreams in this slice.
-#if HEVC_DEPENDENT_SLICES
-  const unsigned subStreamOffset = tileMap.getSubstreamForCtuAddr( startCtuRsAddr, true, slice );
-#elif HEVC_TILES_WPP
-  const unsigned  subStreamOffset         = tileMap.getSubstreamForCtuAddr(startCtuRsAddr, true, slice);
-#endif
 
-#if HEVC_DEPENDENT_SLICES
-  if( depSliceSegmentsEnabled )
-  {
-    // modify initial contexts with previous slice segment if this is a dependent slice.
-    const unsigned  startTileIdx          = tileMap.getTileIdxMap(startCtuRsAddr);
-    const Tile&     currentTile           = tileMap.tiles[startTileIdx];
-    const unsigned  firstCtuRsAddrOfTile  = currentTile.getFirstCtuRsAddr();
-    if( slice->getDependentSliceSegmentFlag() && startCtuRsAddr != firstCtuRsAddrOfTile )
-    {
-      if( currentTile.getTileWidthInCtus() >= 2 || !wavefrontsEnabled )
-      {
-        cabacReader.getCtx() = m_lastSliceSegmentEndContextState;
-      }
-    }
-  }
-#endif
   // for every CTU in the slice segment...
-  bool isLastCtuOfSliceSegment = false;
-  for( unsigned ctuTsAddr = startCtuTsAddr; !isLastCtuOfSliceSegment && ctuTsAddr < numCtusInFrame; ctuTsAddr++ )
+  unsigned subStrmId = 0;
+  for( unsigned ctuIdx = 0; ctuIdx < slice->getNumCtuInSlice(); ctuIdx++ )
   {
-#if HEVC_TILES_WPP
-    const unsigned  ctuRsAddr             = tileMap.getCtuTsToRsAddrMap(ctuTsAddr);
-    const Tile&     currentTile           = tileMap.tiles[ tileMap.getTileIdxMap(ctuRsAddr) ];
-    const unsigned  firstCtuRsAddrOfTile  = currentTile.getFirstCtuRsAddr();
-    const unsigned  tileXPosInCtus        = firstCtuRsAddrOfTile % widthInCtus;
-    const unsigned  tileYPosInCtus        = firstCtuRsAddrOfTile / widthInCtus;
-#else
-    const unsigned  ctuRsAddr             = ctuTsAddr;
-#endif
-    const unsigned  ctuXPosInCtus         = ctuRsAddr % widthInCtus;
-    const unsigned  ctuYPosInCtus         = ctuRsAddr / widthInCtus;
-#if HEVC_TILES_WPP
-    const unsigned  subStrmId             = tileMap.getSubstreamForCtuAddr( ctuRsAddr, true, slice ) - subStreamOffset;
-#else
-    const unsigned  subStrmId             = 0;
-#endif
+    const unsigned  ctuRsAddr       = slice->getCtuAddrInSlice(ctuIdx);
+    const unsigned  ctuXPosInCtus   = ctuRsAddr % widthInCtus;
+    const unsigned  ctuYPosInCtus   = ctuRsAddr / widthInCtus;    
+    const unsigned  tileColIdx      = slice->getPPS()->ctuToTileCol( ctuXPosInCtus );
+    const unsigned  tileRowIdx      = slice->getPPS()->ctuToTileRow( ctuYPosInCtus );
+    const unsigned  tileXPosInCtus  = slice->getPPS()->getTileColumnBd( tileColIdx );
+    const unsigned  tileYPosInCtus  = slice->getPPS()->getTileRowBd( tileRowIdx );
+    const unsigned  tileColWidth    = slice->getPPS()->getTileColumnWidth( tileColIdx );
+    const unsigned  tileRowHeight   = slice->getPPS()->getTileRowHeight( tileRowIdx );
+    const unsigned  tileIdx         = slice->getPPS()->getTileIdx( ctuXPosInCtus, ctuYPosInCtus);
     const unsigned  maxCUSize             = sps->getMaxCUWidth();
     Position pos( ctuXPosInCtus*maxCUSize, ctuYPosInCtus*maxCUSize) ;
     UnitArea ctuArea(cs.area.chromaFormat, Area( pos.x, pos.y, maxCUSize, maxCUSize ) );
@@ -198,44 +148,43 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
 
     cabacReader.initBitstream( ppcSubstreams[subStrmId] );
 
-#if HEVC_TILES_WPP
     // set up CABAC contexts' state for this CTU
-    if( ctuRsAddr == firstCtuRsAddrOfTile )
+    if( ctuXPosInCtus == tileXPosInCtus && ctuYPosInCtus == tileYPosInCtus )
     {
-      if( ctuTsAddr != startCtuTsAddr ) // if it is the first CTU, then the entropy coder has already been reset
+      if( ctuIdx != 0 ) // if it is the first CTU, then the entropy coder has already been reset
       {
         cabacReader.initCtxModels( *slice );
+        cs.resetPrevPLT(cs.prevPLT);
       }
       pic->m_prevQP[0] = pic->m_prevQP[1] = slice->getSliceQp();
     }
     else if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled )
     {
-      // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line.
-      if( ctuTsAddr != startCtuTsAddr ) // if it is the first CTU, then the entropy coder has already been reset
+      // Synchronize cabac probabilities with top CTU if it's available and at the start of a line.
+      if( ctuIdx != 0 ) // if it is the first CTU, then the entropy coder has already been reset
       {
         cabacReader.initCtxModels( *slice );
+        cs.resetPrevPLT(cs.prevPLT);
       }
-      if( cs.getCURestricted( pos.offset(maxCUSize, -1), slice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) )
+      if( cs.getCURestricted( pos.offset(0, -1), pos, slice->getIndependentSliceIdx(), tileIdx, CH_L ) )
       {
-        // Top-right is available, so use it.
+        // Top is available, so use it.
         cabacReader.getCtx() = m_entropyCodingSyncContextState;
       }
       pic->m_prevQP[0] = pic->m_prevQP[1] = slice->getSliceQp();
     }
-#endif
 
-    bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr;
-    if(updateGbiCodingOrder)
+    bool updateBcwCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuIdx == 0;
+    if(updateBcwCodingOrder)
     {
-      resetGbiCodingOrder(true, cs);
+      resetBcwCodingOrder(true, cs);
     }
 
-    if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && ctuXPosInCtus == 0)
+    if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && ctuXPosInCtus == tileXPosInCtus)
     {
       cs.motionLut.lut.resize(0);
       cs.motionLut.lutIbc.resize(0);
-      cs.motionLut.lutShare.resize(0);
-      cs.motionLut.lutShareIbc.resize(0);
+      cs.resetIBCBuffer = true;
     }
 
     if( !cs.slice->isIntra() )
@@ -245,39 +194,28 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
 
     if( ctuRsAddr == debugCTU )
     {
-      isLastCtuOfSliceSegment = true; // get out here
       break;
     }
-    isLastCtuOfSliceSegment = cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr );
+    cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr );
 
     m_pcCuDecoder->decompressCtu( cs, ctuArea );
 
-#if HEVC_TILES_WPP
-    if( ctuXPosInCtus == tileXPosInCtus+1 && wavefrontsEnabled )
+    if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled )
     {
       m_entropyCodingSyncContextState = cabacReader.getCtx();
     }
-#endif
 
 
-    if( isLastCtuOfSliceSegment )
+    if( ctuIdx == slice->getNumCtuInSlice()-1 )
     {
+      unsigned binVal = cabacReader.terminating_bit();
+      CHECK( !binVal, "Expecting a terminating bit" );
 #if DECODER_CHECK_SUBSTREAM_AND_SLICE_TRAILING_BYTES
       cabacReader.remaining_bytes( false );
-#endif
-#if HEVC_DEPENDENT_SLICES
-      if( !slice->getDependentSliceSegmentFlag() )
-      {
-#endif
-        slice->setSliceCurEndCtuTsAddr( ctuTsAddr+1 );
-#if HEVC_DEPENDENT_SLICES
-      }
-      slice->setSliceSegmentCurEndCtuTsAddr( ctuTsAddr+1 );
 #endif
     }
-#if HEVC_TILES_WPP
-    else if( ( ctuXPosInCtus + 1 == tileXPosInCtus + currentTile.getTileWidthInCtus () ) &&
-             ( ctuYPosInCtus + 1 == tileYPosInCtus + currentTile.getTileHeightInCtus() || wavefrontsEnabled ) )
+    else if( ( ctuXPosInCtus + 1 == tileXPosInCtus + tileColWidth ) &&
+             ( ctuYPosInCtus + 1 == tileYPosInCtus + tileRowHeight || wavefrontsEnabled ) )
     {
       // The sub-stream/stream should be terminated after this CTU.
       // (end of slice-segment, end of tile, end of wavefront-CTU-row)
@@ -286,17 +224,10 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
 #if DECODER_CHECK_SUBSTREAM_AND_SLICE_TRAILING_BYTES
       cabacReader.remaining_bytes( true );
 #endif
+      subStrmId++;
     }
-#endif
   }
-  CHECK( !isLastCtuOfSliceSegment, "Last CTU of slice segment not signalled as such" );
 
-#if HEVC_DEPENDENT_SLICES
-  if( depSliceSegmentsEnabled )
-  {
-    m_lastSliceSegmentEndContextState = cabacReader.getCtx();  //ctx end of dep.slice
-  }
-#endif
   // deallocate all created substreams, including internal buffers.
   for( auto substr: ppcSubstreams )
   {
diff --git a/source/Lib/DecoderLib/DecSlice.h b/source/Lib/DecoderLib/DecSlice.h
index c34b8a4d570f32472b89dc9a95d367b80d6f24e4..1ff2a2282be31abb227e326a9167d515ffffe64d 100644
--- a/source/Lib/DecoderLib/DecSlice.h
+++ b/source/Lib/DecoderLib/DecSlice.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -62,12 +62,7 @@ private:
   CABACDecoder*   m_CABACDecoder;
   DecCu*          m_pcCuDecoder;
 
-#if HEVC_DEPENDENT_SLICES
-  Ctx             m_lastSliceSegmentEndContextState;    ///< context storage for state at the end of the previous slice-segment (used for dependent slices only).
-#endif
-#if HEVC_TILES_WPP
   Ctx             m_entropyCodingSyncContextState;      ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row
-#endif
 
 public:
   DecSlice();
diff --git a/source/Lib/DecoderLib/NALread.cpp b/source/Lib/DecoderLib/NALread.cpp
index 7b507b99fbe67d13581ea4bdcf2b7bc48f749907..07dddd03f67e8834f26b88a790f14d2d247b4b5b 100644
--- a/source/Lib/DecoderLib/NALread.cpp
+++ b/source/Lib/DecoderLib/NALread.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -110,11 +110,15 @@ static void convertPayloadToRBSP(vector<uint8_t>& nalUnitBuf, InputBitstream *bi
 static void xTraceNalUnitHeader(InputNALUnit& nalu)
 {
   DTRACE( g_trace_ctx, D_NALUNITHEADER, "*********** NAL UNIT (%s) ***********\n", nalUnitTypeToString(nalu.m_nalUnitType) );
-
-  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "forbidden_zero_bit", 1, 0 );
-  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "nal_unit_type", 6, nalu.m_nalUnitType );
-  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "nuh_layer_id", 6, nalu.m_nuhLayerId );
+  bool zeroTidRequiredFlag = 0;
+  if((nalu.m_nalUnitType >= 16) && (nalu.m_nalUnitType <= 31)) {
+    zeroTidRequiredFlag = 1;
+  }
+  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "zero_tid_required_flag", 1, zeroTidRequiredFlag );
   DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "nuh_temporal_id_plus1", 3, nalu.m_temporalId + 1 );
+  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "nal_unit_type_lsb", 4, (nalu.m_nalUnitType) - (zeroTidRequiredFlag << 4));
+  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "nuh_layer_id_plus1", 7, nalu.m_nuhLayerId+1);
+  DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "nuh_reserved_zero_bit", 1, 0 );
 }
 #endif
 
@@ -122,13 +126,15 @@ void readNalUnitHeader(InputNALUnit& nalu)
 {
   InputBitstream& bs = nalu.getBitstream();
 
-  bool forbidden_zero_bit = bs.read(1);           // forbidden_zero_bit
-  if(forbidden_zero_bit != 0) { THROW( "Forbidden zero-bit not '0'" );}
-  nalu.m_nalUnitType = (NalUnitType) bs.read(6);  // nal_unit_type
-  nalu.m_nuhLayerId = bs.read(6);                 // nuh_layer_id
-  nalu.m_temporalId = bs.read(3) - 1;             // nuh_temporal_id_plus1
+  nalu.m_forbiddenZeroBit   = bs.read(1);                 // forbidden zero bit
+  nalu.m_nuhReservedZeroBit = bs.read(1);                 // nuh_reserved_zero_bit
+  nalu.m_nuhLayerId         = bs.read(6);                 // nuh_layer_id
+  CHECK(nalu.m_nuhLayerId > 55, "The value of nuh_layer_id shall be in the range of 0 to 55, inclusive");
+  nalu.m_nalUnitType        = (NalUnitType) bs.read(5);   // nal_unit_type
+  nalu.m_temporalId         = bs.read(3) - 1;             // nuh_temporal_id_plus1
+
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
-  CodingStatistics::IncrementStatisticEP(STATS__NAL_UNIT_HEADER_BITS, 1+6+6+3, 0);
+  CodingStatistics::IncrementStatisticEP(STATS__NAL_UNIT_HEADER_BITS, 1+3+4+7+1, 0);
 #endif
 
 #if ENABLE_TRACING
@@ -140,65 +146,11 @@ void readNalUnitHeader(InputNALUnit& nalu)
   {
     if ( nalu.m_temporalId )
     {
-#if HEVC_VPS
-#if !JVET_M0101_HLS
-      CHECK(  nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
-           || nalu.m_nalUnitType == NAL_UNIT_VPS
-           || nalu.m_nalUnitType == NAL_UNIT_SPS
-           || nalu.m_nalUnitType == NAL_UNIT_EOS
-           || nalu.m_nalUnitType == NAL_UNIT_EOB
-            , "Invalid NAL type" );
-#else
-      CHECK(  nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
-           || nalu.m_nalUnitType == NAL_UNIT_VPS
-           || nalu.m_nalUnitType == NAL_UNIT_SPS
-           || nalu.m_nalUnitType == NAL_UNIT_EOS
-           || nalu.m_nalUnitType == NAL_UNIT_EOB
-           , "Invalid NAL type" );
-#endif
-#else
-#if !JVET_M0101_HLS
-      CHECK(nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_BLA_N_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
-           || nalu.m_nalUnitType == NAL_UNIT_SPS
-           || nalu.m_nalUnitType == NAL_UNIT_EOS
-           || nalu.m_nalUnitType == NAL_UNIT_EOB
-           , "Invalid NAL type");
-#else
-      CHECK(nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-         || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-         || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA
-         || nalu.m_nalUnitType == NAL_UNIT_SPS
-         || nalu.m_nalUnitType == NAL_UNIT_EOS
-         || nalu.m_nalUnitType == NAL_UNIT_EOB
-         , "Invalid NAL type");
-#endif
-#endif
-
     }
     else
     {
-#if !JVET_M0101_HLS
-      CHECK(  nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_R
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_R
-           || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA_N
-            , "Invalid NAL type" );
-#else
       CHECK(nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA
-         , "Invalid NAL type");
-#endif
+        , "When NAL unit type is equal to STSA_NUT, TemporalId shall not be equal to 0"); 
     }
   }
 }
diff --git a/source/Lib/DecoderLib/NALread.h b/source/Lib/DecoderLib/NALread.h
index 1b0ea21b90b26bcb62750b1c4ba0c52177b67a10..1778dc5055e3013313ed48bd88c6cc9bacad224f 100644
--- a/source/Lib/DecoderLib/NALread.h
+++ b/source/Lib/DecoderLib/NALread.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/DecoderLib/SEIread.cpp b/source/Lib/DecoderLib/SEIread.cpp
index 1495228f5d6cece183edb5f59a264976927dcbc1..f69dff292f77bae5f6b8918a117893ab5d3be481 100644
--- a/source/Lib/DecoderLib/SEIread.cpp
+++ b/source/Lib/DecoderLib/SEIread.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,15 @@
 //! \ingroup DecoderLib
 //! \{
 
+void SEIReader::sei_read_scode(std::ostream *pOS, uint32_t length, int& code, const char *pSymbolName)
+{
+  READ_SCODE(length, code, pSymbolName);
+  if (pOS)
+  {
+    (*pOS) << "  " << std::setw(55) << pSymbolName << ": " << code << "\n";
+  }
+}
+
 void SEIReader::sei_read_code(std::ostream *pOS, uint32_t uiLength, uint32_t& ruiCode, const char *pSymbolName)
 {
   READ_CODE(uiLength, ruiCode, pSymbolName);
@@ -96,6 +105,7 @@ static inline void output_sei_message_header(SEI &sei, std::ostream *pDecodedMes
 }
 
 #undef READ_CODE
+#undef READ_SCODE
 #undef READ_SVLC
 #undef READ_UVLC
 #undef READ_FLAG
@@ -104,14 +114,15 @@ static inline void output_sei_message_header(SEI &sei, std::ostream *pDecodedMes
 /**
  * unmarshal a single SEI message from bitstream bs
  */
-void SEIReader::parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream)
+ // note: for independent parsing no parameter set should not be required here
+void SEIReader::parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream)
 {
   setBitstream(bs);
 
   CHECK(m_pcBitstream->getNumBitsUntilByteAligned(), "Bitstream not aligned");
   do
   {
-    xReadSEImessage(seis, nalUnitType, sps, pDecodedMessageOutputStream);
+    xReadSEImessage(seis, nalUnitType, temporalId, sps, hrd, pDecodedMessageOutputStream);
 
     /* SEI messages are an integer number of bytes, something has failed
     * in the parsing if bitstream not byte-aligned */
@@ -122,7 +133,7 @@ void SEIReader::parseSEImessage(InputBitstream* bs, SEIMessages& seis, const Nal
   xReadRbspTrailingBits();
 }
 
-void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream)
 {
 #if ENABLE_TRACING
   xTraceSEIHeader();
@@ -157,6 +168,7 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType
   setBitstream(bs->extractSubstream(payloadSize * 8));
 
   SEI *sei = NULL;
+  const SEIBufferingPeriod *bp = NULL;
 
   if(nalUnitType == NAL_UNIT_PREFIX_SEI)
   {
@@ -166,116 +178,107 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType
       sei = new SEIuserDataUnregistered;
       xParseSEIuserDataUnregistered((SEIuserDataUnregistered&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::ACTIVE_PARAMETER_SETS:
-      sei = new SEIActiveParameterSets;
-      xParseSEIActiveParameterSets((SEIActiveParameterSets&) *sei, payloadSize, pDecodedMessageOutputStream);
-      break;
     case SEI::DECODING_UNIT_INFO:
-      if (!sps)
+      bp = hrd.getBufferingPeriodSEI();
+      if (!bp)
       {
-        msg( WARNING, "Warning: Found Decoding unit SEI message, but no active SPS is available. Ignoring.");
+        msg( WARNING, "Warning: Found Decoding unit information SEI message, but no active buffering period is available. Ignoring.");
       }
       else
       {
         sei = new SEIDecodingUnitInfo;
-        xParseSEIDecodingUnitInfo((SEIDecodingUnitInfo&) *sei, payloadSize, sps, pDecodedMessageOutputStream);
+        xParseSEIDecodingUnitInfo((SEIDecodingUnitInfo&) *sei, payloadSize, *bp, temporalId, pDecodedMessageOutputStream);
       }
       break;
     case SEI::BUFFERING_PERIOD:
-      if (!sps)
-      {
-        msg( WARNING, "Warning: Found Buffering period SEI message, but no active SPS is available. Ignoring.");
-      }
-      else
-      {
-        sei = new SEIBufferingPeriod;
-        xParseSEIBufferingPeriod((SEIBufferingPeriod&) *sei, payloadSize, sps, pDecodedMessageOutputStream);
-      }
+      sei = new SEIBufferingPeriod;
+      xParseSEIBufferingPeriod((SEIBufferingPeriod&) *sei, payloadSize, pDecodedMessageOutputStream);
+      hrd.setBufferingPeriodSEI((SEIBufferingPeriod*) sei);
       break;
     case SEI::PICTURE_TIMING:
-      if (!sps)
       {
-        msg( WARNING, "Warning: Found Picture timing SEI message, but no active SPS is available. Ignoring.");
-      }
-      else
-      {
-        sei = new SEIPictureTiming;
-        xParseSEIPictureTiming((SEIPictureTiming&)*sei, payloadSize, sps, pDecodedMessageOutputStream);
+        bp = hrd.getBufferingPeriodSEI();
+        if (!bp)
+        {
+          msg( WARNING, "Warning: Found Picture timing SEI message, but no active buffering period is available. Ignoring.");
+        }
+        else
+        {
+          sei = new SEIPictureTiming;
+          xParseSEIPictureTiming((SEIPictureTiming&)*sei, payloadSize, temporalId, *bp, pDecodedMessageOutputStream);
+        }
       }
       break;
-    case SEI::RECOVERY_POINT:
-      sei = new SEIRecoveryPoint;
-      xParseSEIRecoveryPoint((SEIRecoveryPoint&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::FRAME_FIELD_INFO:
+      sei = new SEIFrameFieldInfo;
+      xParseSEIFrameFieldinfo((SEIFrameFieldInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
+      break;
+    case SEI::DEPENDENT_RAP_INDICATION:
+      sei = new SEIDependentRAPIndication;
+      xParseSEIDependentRAPIndication((SEIDependentRAPIndication&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
     case SEI::FRAME_PACKING:
       sei = new SEIFramePacking;
       xParseSEIFramePacking((SEIFramePacking&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::SEGM_RECT_FRAME_PACKING:
-      sei = new SEISegmentedRectFramePacking;
-      xParseSEISegmentedRectFramePacking((SEISegmentedRectFramePacking&) *sei, payloadSize, pDecodedMessageOutputStream);
-      break;
-    case SEI::DISPLAY_ORIENTATION:
-      sei = new SEIDisplayOrientation;
-      xParseSEIDisplayOrientation((SEIDisplayOrientation&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::MASTERING_DISPLAY_COLOUR_VOLUME:
+      sei = new SEIMasteringDisplayColourVolume;
+      xParseSEIMasteringDisplayColourVolume((SEIMasteringDisplayColourVolume&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::TEMPORAL_LEVEL0_INDEX:
-      sei = new SEITemporalLevel0Index;
-      xParseSEITemporalLevel0Index((SEITemporalLevel0Index&) *sei, payloadSize, pDecodedMessageOutputStream);
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+    case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS:
+      sei = new SEIAlternativeTransferCharacteristics;
+      xParseSEIAlternativeTransferCharacteristics((SEIAlternativeTransferCharacteristics&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::REGION_REFRESH_INFO:
-      sei = new SEIGradualDecodingRefreshInfo;
-      xParseSEIRegionRefreshInfo((SEIGradualDecodingRefreshInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
+#endif
+    case SEI::EQUIRECTANGULAR_PROJECTION:
+      sei = new SEIEquirectangularProjection;
+      xParseSEIEquirectangularProjection((SEIEquirectangularProjection&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::NO_DISPLAY:
-      sei = new SEINoDisplay;
-      xParseSEINoDisplay((SEINoDisplay&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::SPHERE_ROTATION:
+      sei = new SEISphereRotation;
+      xParseSEISphereRotation((SEISphereRotation&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::TONE_MAPPING_INFO:
-      sei = new SEIToneMappingInfo;
-      xParseSEIToneMappingInfo((SEIToneMappingInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::OMNI_VIEWPORT:
+      sei = new SEIOmniViewport;
+      xParseSEIOmniViewport((SEIOmniViewport&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::SOP_DESCRIPTION:
-      sei = new SEISOPDescription;
-      xParseSEISOPDescription((SEISOPDescription&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::REGION_WISE_PACKING:
+      sei = new SEIRegionWisePacking;
+      xParseSEIRegionWisePacking((SEIRegionWisePacking&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::SCALABLE_NESTING:
-      sei = new SEIScalableNesting;
-      xParseSEIScalableNesting((SEIScalableNesting&) *sei, nalUnitType, payloadSize, sps, pDecodedMessageOutputStream);
+    case SEI::GENERALIZED_CUBEMAP_PROJECTION:
+      sei = new SEIGeneralizedCubemapProjection;
+      xParseSEIGeneralizedCubemapProjection((SEIGeneralizedCubemapProjection&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-#if HEVC_TILES_WPP
-    case SEI::TEMP_MOTION_CONSTRAINED_TILE_SETS:
-      sei = new SEITempMotionConstrainedTileSets;
-      xParseSEITempMotionConstraintsTileSets((SEITempMotionConstrainedTileSets&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::SUBPICTURE_LEVEL_INFO:
+      sei = new SEISubpicureLevelInfo;
+      xParseSEISubpictureLevelInfo((SEISubpicureLevelInfo&) *sei, sps, payloadSize, pDecodedMessageOutputStream);
       break;
-#endif
-    case SEI::TIME_CODE:
-      sei = new SEITimeCode;
-      xParseSEITimeCode((SEITimeCode&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::SAMPLE_ASPECT_RATIO_INFO:
+      sei = new SEISampleAspectRatioInfo;
+      xParseSEISampleAspectRatioInfo((SEISampleAspectRatioInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::CHROMA_RESAMPLING_FILTER_HINT:
-      sei = new SEIChromaResamplingFilterHint;
-      xParseSEIChromaResamplingFilterHint((SEIChromaResamplingFilterHint&) *sei, payloadSize, pDecodedMessageOutputStream);
-      //}
+    case SEI::USER_DATA_REGISTERED_ITU_T_T35:
+      sei = new SEIUserDataRegistered;
+      xParseSEIUserDataRegistered((SEIUserDataRegistered&)*sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::KNEE_FUNCTION_INFO:
-      sei = new SEIKneeFunctionInfo;
-      xParseSEIKneeFunctionInfo((SEIKneeFunctionInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::FILM_GRAIN_CHARACTERISTICS:
+      sei = new SEIFilmGrainCharacteristics;
+      xParseSEIFilmGrainCharacteristics((SEIFilmGrainCharacteristics&)*sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::COLOUR_REMAPPING_INFO:
-      sei = new SEIColourRemappingInfo;
-      xParseSEIColourRemappingInfo((SEIColourRemappingInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::CONTENT_LIGHT_LEVEL_INFO:
+      sei = new SEIContentLightLevelInfo;
+      xParseSEIContentLightLevelInfo((SEIContentLightLevelInfo&)*sei, payloadSize, pDecodedMessageOutputStream);
       break;
-    case SEI::MASTERING_DISPLAY_COLOUR_VOLUME:
-      sei = new SEIMasteringDisplayColourVolume;
-      xParseSEIMasteringDisplayColourVolume((SEIMasteringDisplayColourVolume&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::AMBIENT_VIEWING_ENVIRONMENT:
+      sei = new SEIAmbientViewingEnvironment;
+      xParseSEIAmbientViewingEnvironment((SEIAmbientViewingEnvironment&)*sei, payloadSize, pDecodedMessageOutputStream);
       break;
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-    case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS:
-      sei = new SEIAlternativeTransferCharacteristics;
-      xParseSEIAlternativeTransferCharacteristics((SEIAlternativeTransferCharacteristics&) *sei, payloadSize, pDecodedMessageOutputStream);
+    case SEI::CONTENT_COLOUR_VOLUME:
+      sei = new SEIContentColourVolume;
+      xParseSEIContentColourVolume((SEIContentColourVolume&)*sei, payloadSize, pDecodedMessageOutputStream);
       break;
-#endif
     default:
       for (uint32_t i = 0; i < payloadSize; i++)
       {
@@ -294,18 +297,22 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType
   {
     switch (payloadType)
     {
+#if HEVC_SEI
       case SEI::USER_DATA_UNREGISTERED:
         sei = new SEIuserDataUnregistered;
         xParseSEIuserDataUnregistered((SEIuserDataUnregistered&) *sei, payloadSize, pDecodedMessageOutputStream);
         break;
+#endif
       case SEI::DECODED_PICTURE_HASH:
         sei = new SEIDecodedPictureHash;
         xParseSEIDecodedPictureHash((SEIDecodedPictureHash&) *sei, payloadSize, pDecodedMessageOutputStream);
         break;
+#if HEVC_SEI
       case SEI::GREEN_METADATA:
         sei = new SEIGreenMetadataInfo;
         xParseSEIGreenMetadataInfo((SEIGreenMetadataInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
         break;
+#endif
       default:
         for (uint32_t i = 0; i < payloadSize; i++)
         {
@@ -450,14 +457,12 @@ void SEIReader::xParseSEIDecodedPictureHash(SEIDecodedPictureHash& sei, uint32_t
   }
 }
 
+#if HEVC_SEI
 void SEIReader::xParseSEIActiveParameterSets(SEIActiveParameterSets& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   uint32_t val;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 
-#if HEVC_VPS
-  sei_read_code( pDecodedMessageOutputStream, 4, val, "active_video_parameter_set_id");   sei.activeVPSId = val;
-#endif
   sei_read_flag( pDecodedMessageOutputStream,    val, "self_contained_cvs_flag");         sei.m_selfContainedCvsFlag     = (val != 0);
   sei_read_flag( pDecodedMessageOutputStream,    val, "no_parameter_set_update_flag");    sei.m_noParameterSetUpdateFlag = (val != 0);
   sei_read_uvlc( pDecodedMessageOutputStream,    val, "num_sps_ids_minus1");              sei.numSpsIdsMinus1 = val;
@@ -465,153 +470,276 @@ void SEIReader::xParseSEIActiveParameterSets(SEIActiveParameterSets& sei, uint32
   sei.activeSeqParameterSetId.resize(sei.numSpsIdsMinus1 + 1);
   for (int i=0; i < (sei.numSpsIdsMinus1 + 1); i++)
   {
-    sei_read_uvlc( pDecodedMessageOutputStream, val, "active_seq_parameter_set_id[i]");    sei.activeSeqParameterSetId[i] = val;
+    sei_read_code( pDecodedMessageOutputStream, 4, val, "active_seq_parameter_set_id[i]" ); sei.activeSeqParameterSetId[i] = val;
   }
 }
+#endif
 
-void SEIReader::xParseSEIDecodingUnitInfo(SEIDecodingUnitInfo& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIDecodingUnitInfo(SEIDecodingUnitInfo& sei, uint32_t payloadSize, const SEIBufferingPeriod& bp, const uint32_t temporalId, std::ostream *pDecodedMessageOutputStream)
 {
   uint32_t val;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
   sei_read_uvlc( pDecodedMessageOutputStream, val, "decoding_unit_idx");
   sei.m_decodingUnitIdx = val;
 
-  const VUI *vui = sps->getVuiParameters();
-  if(vui->getHrdParameters()->getSubPicCpbParamsInPicTimingSEIFlag())
+  if(!bp.m_decodingUnitCpbParamsInPicTimingSeiFlag)
   {
-    sei_read_code( pDecodedMessageOutputStream, ( vui->getHrdParameters()->getDuCpbRemovalDelayLengthMinus1() + 1 ), val, "du_spt_cpb_removal_delay_increment");
-    sei.m_duSptCpbRemovalDelay = val;
+    for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
+    {
+      sei_read_flag( pDecodedMessageOutputStream, val, "dui_sub_layer_delays_present_flag[i]" );
+      sei.m_duiSubLayerDelaysPresentFlag[i] = val;
+      if( sei.m_duiSubLayerDelaysPresentFlag[i] )
+      {
+        sei_read_code( pDecodedMessageOutputStream, bp.getDuCpbRemovalDelayIncrementLength(), val, "du_spt_cpb_removal_delay_increment[i]");
+        sei.m_duSptCpbRemovalDelayIncrement[i] = val;
+      }
+      else
+      {
+        sei.m_duSptCpbRemovalDelayIncrement[i] = 0;
+      }
+    }
   }
   else
   {
-    sei.m_duSptCpbRemovalDelay = 0;
+    for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
+    {
+      sei.m_duSptCpbRemovalDelayIncrement[i] = 0;
+    }
   }
   sei_read_flag( pDecodedMessageOutputStream, val, "dpb_output_du_delay_present_flag"); sei.m_dpbOutputDuDelayPresentFlag = (val != 0);
   if(sei.m_dpbOutputDuDelayPresentFlag)
   {
-    sei_read_code( pDecodedMessageOutputStream, vui->getHrdParameters()->getDpbOutputDelayDuLengthMinus1() + 1, val, "pic_spt_dpb_output_du_delay");
+    sei_read_code( pDecodedMessageOutputStream, bp.getDpbOutputDelayDuLength(), val, "pic_spt_dpb_output_du_delay");
     sei.m_picSptDpbOutputDuDelay = val;
   }
 }
 
-void SEIReader::xParseSEIBufferingPeriod(SEIBufferingPeriod& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIBufferingPeriod(SEIBufferingPeriod& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   int i, nalOrVcl;
   uint32_t code;
 
-  const VUI *pVUI = sps->getVuiParameters();
-  const HRD *pHRD = pVUI->getHrdParameters();
 
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 
-  sei_read_uvlc( pDecodedMessageOutputStream, code, "bp_seq_parameter_set_id" );                         sei.m_bpSeqParameterSetId     = code;
-  if( !pHRD->getSubPicCpbParamsPresentFlag() )
+  sei_read_flag( pDecodedMessageOutputStream, code, "bp_nal_hrd_parameters_present_flag" );               sei.m_bpNalCpbParamsPresentFlag = code;
+  sei_read_flag( pDecodedMessageOutputStream, code, "bp_vcl_hrd_parameters_present_flag" );               sei.m_bpVclCpbParamsPresentFlag = code;
+
+  sei_read_code( pDecodedMessageOutputStream, 5, code, "initial_cpb_removal_delay_length_minus1" );     sei.m_initialCpbRemovalDelayLength = code + 1;
+  sei_read_code( pDecodedMessageOutputStream, 5, code, "cpb_removal_delay_length_minus1" );             sei.m_cpbRemovalDelayLength        = code + 1;
+  sei_read_code( pDecodedMessageOutputStream, 5, code, "dpb_output_delay_length_minus1" );              sei.m_dpbOutputDelayLength         = code + 1;
+  sei_read_flag( pDecodedMessageOutputStream, code, "alt_cpb_params_present_flag");                     sei.m_altCpbParamsPresentFlag      = code;
+  sei_read_flag( pDecodedMessageOutputStream, code, "bp_decoding_unit_hrd_params_present_flag" );       sei.m_bpDecodingUnitHrdParamsPresentFlag = code;
+  if( sei.m_bpDecodingUnitHrdParamsPresentFlag )
   {
-    sei_read_flag( pDecodedMessageOutputStream, code, "irap_cpb_params_present_flag" );                   sei.m_rapCpbParamsPresentFlag = code;
+    sei_read_code( pDecodedMessageOutputStream, 5, code, "du_cpb_removal_delay_increment_length_minus1" );  sei.m_duCpbRemovalDelayIncrementLength = code + 1;
+    sei_read_code( pDecodedMessageOutputStream, 5, code, "dpb_output_delay_du_length_minus1" );             sei.m_dpbOutputDelayDuLength = code + 1;
+    sei_read_flag( pDecodedMessageOutputStream, code, "decoding_unit_cpb_params_in_pic_timing_sei_flag" );  sei.m_decodingUnitCpbParamsInPicTimingSeiFlag = code;
   }
-  if( sei.m_rapCpbParamsPresentFlag )
+  else
   {
-    sei_read_code( pDecodedMessageOutputStream, pHRD->getCpbRemovalDelayLengthMinus1() + 1, code, "cpb_delay_offset" );      sei.m_cpbDelayOffset = code;
-    sei_read_code( pDecodedMessageOutputStream, pHRD->getDpbOutputDelayLengthMinus1()  + 1, code, "dpb_delay_offset" );      sei.m_dpbDelayOffset = code;
+    sei.m_duCpbRemovalDelayIncrementLength = 24;
+    sei.m_dpbOutputDelayDuLength = 24;
   }
 
-  //read splicing flag and cpb_removal_delay_delta
   sei_read_flag( pDecodedMessageOutputStream, code, "concatenation_flag");
   sei.m_concatenationFlag = code;
-  sei_read_code( pDecodedMessageOutputStream, ( pHRD->getCpbRemovalDelayLengthMinus1() + 1 ), code, "au_cpb_removal_delay_delta_minus1" );
-  sei.m_auCpbRemovalDelayDelta = code + 1;
+  sei_read_flag ( pDecodedMessageOutputStream, code, "additional_concatenation_info_present_flag");
+  sei.m_additionalConcatenationInfoPresentFlag = code;
+  if (sei.m_additionalConcatenationInfoPresentFlag)
+  {
+    sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, "max_initial_removal_delay_for_concatenation" );
+    sei.m_maxInitialRemovalDelayForConcatenation = code;
+  }
 
-  for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
+  sei_read_code( pDecodedMessageOutputStream, ( sei.m_cpbRemovalDelayLength ), code, "au_cpb_removal_delay_delta_minus1" );
+  sei.m_auCpbRemovalDelayDelta = code + 1;
+  sei_read_flag( pDecodedMessageOutputStream, code, "cpb_removal_delay_deltas_present_flag" );               sei.m_cpbRemovalDelayDeltasPresentFlag = code;
+  if (sei.m_cpbRemovalDelayDeltasPresentFlag)
+  {
+    sei_read_uvlc( pDecodedMessageOutputStream, code, "num_cpb_removal_delay_deltas_minus1" );               sei.m_numCpbRemovalDelayDeltas = code + 1;
+    for( i = 0; i < sei.m_numCpbRemovalDelayDeltas; i ++ )
+    {
+      sei_read_code( pDecodedMessageOutputStream, ( sei.m_cpbRemovalDelayLength ), code, "cpb_removal_delay_delta[i]" );
+      sei.m_cpbRemovalDelayDelta[ i ] = code;
+    }
+  }
+  sei_read_code( pDecodedMessageOutputStream, 3, code, "bp_max_sub_layers_minus1" );     sei.m_bpMaxSubLayers = code + 1;
+  sei_read_uvlc( pDecodedMessageOutputStream, code, "bp_cpb_cnt_minus1" ); sei.m_bpCpbCnt = code + 1;
+  sei_read_flag(pDecodedMessageOutputStream, code, "sublayer_initial_cpb_removal_delay_present_flag");
+  sei.m_sublayerInitialCpbRemovalDelayPresentFlag = code;
+  for (i = (sei.m_sublayerInitialCpbRemovalDelayPresentFlag ? 0 : sei.m_bpMaxSubLayers - 1); i < sei.m_bpMaxSubLayers; i++)
   {
-    if( ( ( nalOrVcl == 0 ) && ( pHRD->getNalHrdParametersPresentFlag() ) ) ||
-        ( ( nalOrVcl == 1 ) && ( pHRD->getVclHrdParametersPresentFlag() ) ) )
+    for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
     {
-      for( i = 0; i < ( pHRD->getCpbCntMinus1( 0 ) + 1 ); i ++ )
+      if( ( ( nalOrVcl == 0 ) && ( sei.m_bpNalCpbParamsPresentFlag ) ) ||
+         ( ( nalOrVcl == 1 ) && ( sei.m_bpVclCpbParamsPresentFlag ) ) )
       {
-        sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_cpb_removal_delay":"nal_initial_cpb_removal_delay" );
-        sei.m_initialCpbRemovalDelay[i][nalOrVcl] = code;
-        sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_cpb_removal_offset":"nal_initial_cpb_removal_offset" );
-        sei.m_initialCpbRemovalDelayOffset[i][nalOrVcl] = code;
-        if( pHRD->getSubPicCpbParamsPresentFlag() || sei.m_rapCpbParamsPresentFlag )
+        for( int j = 0; j < ( sei.m_bpCpbCnt ); j ++ )
         {
-          sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_alt_cpb_removal_delay":"nal_initial_alt_cpb_removal_delay" );
-          sei.m_initialAltCpbRemovalDelay[i][nalOrVcl] = code;
-          sei_read_code( pDecodedMessageOutputStream, ( pHRD->getInitialCpbRemovalDelayLengthMinus1() + 1 ) , code, nalOrVcl?"vcl_initial_alt_cpb_removal_offset":"nal_initial_alt_cpb_removal_offset" );
-          sei.m_initialAltCpbRemovalDelayOffset[i][nalOrVcl] = code;
+          sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, nalOrVcl ? "vcl_initial_cpb_removal_delay[i][j]" : "nal_initial_cpb_removal_delay[i][j]" );
+          sei.m_initialCpbRemovalDelay[i][j][nalOrVcl] = code;
+          sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, nalOrVcl ? "vcl_initial_cpb_removal_offset[i][j]" : "nal_initial_cpb_removal_offset[i][j]" );
+          sei.m_initialCpbRemovalDelay[i][j][nalOrVcl] = code;
         }
       }
     }
   }
+  if (sei.m_altCpbParamsPresentFlag)
+  {
+    sei_read_flag(pDecodedMessageOutputStream, code, "use_alt_cpb_params_flag"); sei.m_useAltCpbParamsFlag = code;
+  }
+
 }
 
-void SEIReader::xParseSEIPictureTiming(SEIPictureTiming& sei, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIPictureTiming(SEIPictureTiming& sei, uint32_t payloadSize, const uint32_t temporalId, const SEIBufferingPeriod& bp, std::ostream *pDecodedMessageOutputStream)
 {
-  int i;
-  uint32_t code;
 
-  const VUI *vui = sps->getVuiParameters();
-  const HRD *hrd = vui->getHrdParameters();
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 
-  if( vui->getFrameFieldInfoPresentFlag() )
+  uint32_t symbol;
+  sei_read_code( pDecodedMessageOutputStream, bp.m_cpbRemovalDelayLength, symbol, "cpb_removal_delay_minus1[bp_max_sub_layers_minus1]" );
+  sei.m_auCpbRemovalDelay[bp.m_bpMaxSubLayers - 1] = symbol + 1;
+
+  if( bp.m_altCpbParamsPresentFlag ) 
   {
-    sei_read_code( pDecodedMessageOutputStream, 4, code, "pic_struct" );             sei.m_picStruct            = code;
-    sei_read_code( pDecodedMessageOutputStream, 2, code, "source_scan_type" );       sei.m_sourceScanType       = code;
-    sei_read_flag( pDecodedMessageOutputStream,    code, "duplicate_flag" );         sei.m_duplicateFlag        = (code == 1);
+    sei_read_flag( pDecodedMessageOutputStream, symbol, "cpb_alt_timing_info_present_flag" ); sei.m_cpbAltTimingInfoPresentFlag = symbol;
+    if( sei.m_cpbAltTimingInfoPresentFlag ) 
+    {
+      sei.m_cpbAltInitialCpbRemovalDelayDelta.resize(bp.m_bpCpbCnt);
+      sei.m_cpbAltInitialCpbRemovalOffsetDelta.resize(bp.m_bpCpbCnt);
+      for( int i = 0; i < bp.m_bpCpbCnt; i++ ) 
+      {
+        sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, symbol, "cpb_alt_initial_cpb_removal_delay_delta[ i ]" );
+        sei.m_cpbAltInitialCpbRemovalDelayDelta[i]= symbol;
+        sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, symbol, "cpb_alt_initial_cpb_removal_offset_delta[ i ]" );
+        sei.m_cpbAltInitialCpbRemovalOffsetDelta[i]= symbol;
+      }
+      sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, sei.m_cpbDelayOffset, "cpb_delay_offset" );
+      sei_read_code( pDecodedMessageOutputStream, bp.m_initialCpbRemovalDelayLength, sei.m_dpbDelayOffset, "dpb_delay_offset" );
+    }
   }
-
-  if( hrd->getCpbDpbDelaysPresentFlag())
+  else
   {
-    sei_read_code( pDecodedMessageOutputStream, ( hrd->getCpbRemovalDelayLengthMinus1() + 1 ), code, "au_cpb_removal_delay_minus1" );
-    sei.m_auCpbRemovalDelay = code + 1;
-    sei_read_code( pDecodedMessageOutputStream, ( hrd->getDpbOutputDelayLengthMinus1() + 1 ), code, "pic_dpb_output_delay" );
-    sei.m_picDpbOutputDelay = code;
+    sei.m_cpbAltTimingInfoPresentFlag = false;
+    sei.m_cpbDelayOffset = sei.m_dpbDelayOffset = 0;
+  }
 
-    if(hrd->getSubPicCpbParamsPresentFlag())
+  for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
+  {
+    sei_read_flag( pDecodedMessageOutputStream,    symbol, "pt_sub_layer_delays_present_flag[i]" );    sei.m_ptSubLayerDelaysPresentFlag[i] = (symbol == 1);
+    if( sei.m_ptSubLayerDelaysPresentFlag[ i ] )
     {
-      sei_read_code( pDecodedMessageOutputStream, hrd->getDpbOutputDelayDuLengthMinus1()+1, code, "pic_dpb_output_du_delay" );
-      sei.m_picDpbOutputDuDelay = code;
+      if (bp.m_cpbRemovalDelayDeltasPresentFlag)
+      {
+        sei_read_flag(pDecodedMessageOutputStream, symbol, "cpb_removal_delay_delta_enabled_flag[i]");
+        sei.m_cpbRemovalDelayDeltaEnabledFlag[i] = (symbol == 1);
+      }
+      else
+      {
+        sei.m_cpbRemovalDelayDeltaEnabledFlag[i] = false;
+      }
+      if( sei.m_cpbRemovalDelayDeltaEnabledFlag[ i ] )
+      {
+        sei_read_code( pDecodedMessageOutputStream, ceilLog2(bp.m_numCpbRemovalDelayDeltas), symbol, "cpb_removal_delay_delta_idx[i]" );
+        sei.m_cpbRemovalDelayDeltaIdx[ i ] = symbol;
+      }
+      else
+      {
+        sei_read_code( pDecodedMessageOutputStream, bp.m_cpbRemovalDelayLength, symbol, "cpb_removal_delay_minus1[i]" );
+        sei.m_auCpbRemovalDelay[ i ] = symbol + 1;
+      }
     }
-
-    if( hrd->getSubPicCpbParamsPresentFlag() && hrd->getSubPicCpbParamsInPicTimingSEIFlag() )
+  }
+  sei_read_code( pDecodedMessageOutputStream, bp.m_dpbOutputDelayLength,  symbol, "dpb_output_delay" );
+  sei.m_picDpbOutputDelay = symbol;
+  if( bp.m_bpDecodingUnitHrdParamsPresentFlag )
+  {
+    sei_read_code( pDecodedMessageOutputStream, bp.getDpbOutputDelayDuLength(), symbol, "pic_dpb_output_du_delay" );
+    sei.m_picDpbOutputDuDelay = symbol;
+  }
+  if( bp.m_bpDecodingUnitHrdParamsPresentFlag && bp.m_decodingUnitCpbParamsInPicTimingSeiFlag )
+  {
+    sei_read_uvlc( pDecodedMessageOutputStream, symbol, "num_decoding_units_minus1" );
+    sei.m_numDecodingUnitsMinus1 = symbol;
+    sei.m_numNalusInDuMinus1.resize(sei.m_numDecodingUnitsMinus1 + 1 );
+    sei.m_duCpbRemovalDelayMinus1.resize( (sei.m_numDecodingUnitsMinus1 + 1) * bp.m_bpMaxSubLayers );
+
+    sei_read_flag( pDecodedMessageOutputStream, symbol, "du_common_cpb_removal_delay_flag" );
+    sei.m_duCommonCpbRemovalDelayFlag = symbol;
+    if( sei.m_duCommonCpbRemovalDelayFlag )
     {
-      sei_read_uvlc( pDecodedMessageOutputStream, code, "num_decoding_units_minus1");
-      sei.m_numDecodingUnitsMinus1 = code;
-      sei_read_flag( pDecodedMessageOutputStream, code, "du_common_cpb_removal_delay_flag" );
-      sei.m_duCommonCpbRemovalDelayFlag = code;
-      if( sei.m_duCommonCpbRemovalDelayFlag )
+      for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
       {
-        sei_read_code( pDecodedMessageOutputStream, ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ), code, "du_common_cpb_removal_delay_increment_minus1" );
-        sei.m_duCommonCpbRemovalDelayMinus1 = code;
+        if( sei.m_ptSubLayerDelaysPresentFlag[i] )
+        {
+          sei_read_code( pDecodedMessageOutputStream, bp.getDuCpbRemovalDelayIncrementLength(), symbol, "du_common_cpb_removal_delay_increment_minus1[i]" );
+          sei.m_duCommonCpbRemovalDelayMinus1[i] = symbol;
+        }
       }
-      sei.m_numNalusInDuMinus1.resize(sei.m_numDecodingUnitsMinus1 + 1 );
-      sei.m_duCpbRemovalDelayMinus1.resize( sei.m_numDecodingUnitsMinus1 + 1 );
-
-      for( i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ )
+    }
+    for( int i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ )
+    {
+      sei_read_uvlc( pDecodedMessageOutputStream, symbol, "num_nalus_in_du_minus1[i]" );
+      sei.m_numNalusInDuMinus1[i] = symbol;
+      if( !sei.m_duCommonCpbRemovalDelayFlag && i < sei.m_numDecodingUnitsMinus1 )
       {
-        sei_read_uvlc( pDecodedMessageOutputStream, code, "num_nalus_in_du_minus1[i]");
-        sei.m_numNalusInDuMinus1[ i ] = code;
-        if( ( !sei.m_duCommonCpbRemovalDelayFlag ) && ( i < sei.m_numDecodingUnitsMinus1 ) )
+        for( int j = temporalId; j < bp.m_bpMaxSubLayers - 1; j ++ )
         {
-          sei_read_code( pDecodedMessageOutputStream, ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ), code, "du_cpb_removal_delay_minus1[i]" );
-          sei.m_duCpbRemovalDelayMinus1[ i ] = code;
+          if( sei.m_ptSubLayerDelaysPresentFlag[j] )
+          {
+            sei_read_code( pDecodedMessageOutputStream, bp.getDuCpbRemovalDelayIncrementLength(), symbol, "du_cpb_removal_delay_increment_minus1[i][j]" );
+            sei.m_duCpbRemovalDelayMinus1[i * bp.m_bpMaxSubLayers + j] = symbol;
+          }
         }
       }
     }
   }
 }
 
-void SEIReader::xParseSEIRecoveryPoint(SEIRecoveryPoint& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIFrameFieldinfo(SEIFrameFieldInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  int  iCode;
-  uint32_t uiCode;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 
-  sei_read_svlc( pDecodedMessageOutputStream, iCode,  "recovery_poc_cnt" );      sei.m_recoveryPocCnt     = iCode;
-  sei_read_flag( pDecodedMessageOutputStream, uiCode, "exact_matching_flag" );   sei.m_exactMatchingFlag  = uiCode;
-  sei_read_flag( pDecodedMessageOutputStream, uiCode, "broken_link_flag" );      sei.m_brokenLinkFlag     = uiCode;
+  uint32_t symbol;
+  sei_read_flag( pDecodedMessageOutputStream, symbol,      "field_pic_flag" );
+  sei.m_fieldPicFlag= symbol;
+  if (sei.m_fieldPicFlag)
+  {
+    sei_read_flag( pDecodedMessageOutputStream, symbol,    "bottom_field_flag" );
+    sei.m_bottomFieldFlag = symbol;
+    sei_read_flag( pDecodedMessageOutputStream, symbol,    "pairing_indicated_flag" );
+    sei.m_pairingIndicatedFlag = symbol;
+    if (sei.m_pairingIndicatedFlag)
+    {
+      sei_read_flag( pDecodedMessageOutputStream, symbol,  "paired_with_next_field_flag" );
+      sei.m_pairedWithNextFieldFlag = symbol;
+    }
+  }
+  else
+  {
+    sei_read_flag( pDecodedMessageOutputStream, symbol,    "display_fields_from_frame_flag" );
+    sei.m_displayFieldsFromFrameFlag = symbol;
+    if (sei.m_displayFieldsFromFrameFlag)
+    {
+      sei_read_flag( pDecodedMessageOutputStream, symbol,  "display_fields_from_frame_flag" );
+      sei.m_topFieldFirstFlag = symbol;
+    }
+    sei_read_uvlc( pDecodedMessageOutputStream, symbol,    "display_elemental_periods_minus1" );
+    sei.m_displayElementalPeriodsMinus1 = symbol;
+  }
+  sei_read_code( pDecodedMessageOutputStream, 2, symbol,   "source_scan_type" );
+  sei.m_sourceScanType = symbol;
+  sei_read_flag( pDecodedMessageOutputStream, symbol,      "duplicate_flag" );
+  sei.m_duplicateFlag = symbol;
+}
+
+void SEIReader::xParseSEIDependentRAPIndication( SEIDependentRAPIndication& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream )
+{
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 }
 
+
 void SEIReader::xParseSEIFramePacking(SEIFramePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   uint32_t val;
@@ -648,540 +776,398 @@ void SEIReader::xParseSEIFramePacking(SEIFramePacking& sei, uint32_t payloadSize
   }
   sei_read_flag( pDecodedMessageOutputStream, val, "upsampled_aspect_ratio_flag" );                  sei.m_upsampledAspectRatio = val;
 }
-
-void SEIReader::xParseSEISegmentedRectFramePacking(SEISegmentedRectFramePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t val;
+  uint32_t code;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_flag( pDecodedMessageOutputStream, val,       "segmented_rect_frame_packing_arrangement_cancel_flag" );       sei.m_arrangementCancelFlag            = val;
-  if( !sei.m_arrangementCancelFlag )
-  {
-    sei_read_code( pDecodedMessageOutputStream, 2, val, "segmented_rect_content_interpretation_type" );                sei.m_contentInterpretationType = val;
-    sei_read_flag( pDecodedMessageOutputStream, val,     "segmented_rect_frame_packing_arrangement_persistence" );                              sei.m_arrangementPersistenceFlag               = val;
-  }
+
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[0]" ); sei.values.primaries[0][0] = code;
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[0]" ); sei.values.primaries[0][1] = code;
+
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[1]" ); sei.values.primaries[1][0] = code;
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[1]" ); sei.values.primaries[1][1] = code;
+
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[2]" ); sei.values.primaries[2][0] = code;
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[2]" ); sei.values.primaries[2][1] = code;
+
+
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_x" ); sei.values.whitePoint[0] = code;
+  sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_y" ); sei.values.whitePoint[1] = code;
+
+  sei_read_code( pDecodedMessageOutputStream, 32, code, "max_display_mastering_luminance" ); sei.values.maxLuminance = code;
+  sei_read_code( pDecodedMessageOutputStream, 32, code, "min_display_mastering_luminance" ); sei.values.minLuminance = code;
 }
 
-void SEIReader::xParseSEIDisplayOrientation(SEIDisplayOrientation& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+void SEIReader::xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream)
+{
+  uint32_t code;
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+
+  sei_read_code(pDecodedMessageOutputStream, 8, code, "preferred_transfer_characteristics"); sei.m_preferredTransferCharacteristics = code;
+}
+#endif
+void SEIReader::xParseSEIUserDataRegistered(SEIUserDataRegistered& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t val;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_flag( pDecodedMessageOutputStream, val,       "display_orientation_cancel_flag" );       sei.cancelFlag            = val;
-  if( !sei.cancelFlag )
+
+  uint32_t code;
+  assert(payloadSize>0);
+  sei_read_code(pDecodedMessageOutputStream, 8, code, "itu_t_t35_country_code"); payloadSize--;
+  if (code == 255)
   {
-    sei_read_flag( pDecodedMessageOutputStream, val,     "hor_flip" );                              sei.horFlip               = val;
-    sei_read_flag( pDecodedMessageOutputStream, val,     "ver_flip" );                              sei.verFlip               = val;
-    sei_read_code( pDecodedMessageOutputStream, 16, val, "anticlockwise_rotation" );                sei.anticlockwiseRotation = val;
-    sei_read_flag( pDecodedMessageOutputStream, val,     "display_orientation_persistence_flag" );  sei.persistenceFlag       = val;
+    assert(payloadSize>0);
+    sei_read_code(pDecodedMessageOutputStream, 8, code, "itu_t_t35_country_code_extension_byte"); payloadSize--;
+    code += 255;
+  }
+  sei.m_ituCountryCode = code;
+  sei.m_userData.resize(payloadSize);
+  for (uint32_t i = 0; i < sei.m_userData.size(); i++)
+  {
+    sei_read_code(NULL, 8, code, "itu_t_t35_payload_byte");
+    sei.m_userData[i] = code;
+  }
+  if (pDecodedMessageOutputStream)
+  {
+    (*pDecodedMessageOutputStream) << "  itu_t_t35 payload size: " << sei.m_userData.size() << "\n";
   }
 }
 
-void SEIReader::xParseSEITemporalLevel0Index(SEITemporalLevel0Index& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIFilmGrainCharacteristics(SEIFilmGrainCharacteristics& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t val;
+  uint32_t code;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_code( pDecodedMessageOutputStream, 8, val, "temporal_sub_layer_zero_idx" );  sei.tl0Idx = val;
-  sei_read_code( pDecodedMessageOutputStream, 8, val, "irap_pic_id" );  sei.rapIdx = val;
+
+  sei_read_flag(pDecodedMessageOutputStream, code, "film_grain_characteristics_cancel_flag");     sei.m_filmGrainCharacteristicsCancelFlag = code != 0;
+  if (!sei.m_filmGrainCharacteristicsCancelFlag)
+  {
+    sei_read_code(pDecodedMessageOutputStream, 2, code, "film_grain_model_id");                   sei.m_filmGrainModelId = code;
+    sei_read_flag(pDecodedMessageOutputStream, code, "separate_colour_description_present_flag"); sei.m_separateColourDescriptionPresentFlag = code != 0;
+    if (sei.m_separateColourDescriptionPresentFlag)
+    {
+      sei_read_code(pDecodedMessageOutputStream, 3, code, "film_grain_bit_depth_luma_minus8");    sei.m_filmGrainBitDepthLumaMinus8 = code;
+      sei_read_code(pDecodedMessageOutputStream, 3, code, "film_grain_bit_depth_chroma_minus8");  sei.m_filmGrainBitDepthChromaMinus8 = code;
+      sei_read_flag(pDecodedMessageOutputStream, code, "film_grain_full_range_flag");             sei.m_filmGrainFullRangeFlag = code != 0;
+      sei_read_code(pDecodedMessageOutputStream, 8, code, "film_grain_colour_primaries");         sei.m_filmGrainColourPrimaries = code;
+      sei_read_code(pDecodedMessageOutputStream, 8, code, "film_grain_transfer_characteristics"); sei.m_filmGrainTransferCharacteristics = code;
+      sei_read_code(pDecodedMessageOutputStream, 8, code, "film_grain_matrix_coeffs");            sei.m_filmGrainMatrixCoeffs = code;
+    }
+    sei_read_code(pDecodedMessageOutputStream, 2, code, "blending_mode_id");                      sei.m_blendingModeId = code;
+    sei_read_code(pDecodedMessageOutputStream, 4, code, "log2_scale_factor");                     sei.m_log2ScaleFactor = code;
+    for (int c = 0; c<3; c++)
+    {
+      sei_read_flag(pDecodedMessageOutputStream, code, "comp_model_present_flag[c]");             sei.m_compModel[c].presentFlag = code != 0;
+    }
+    for (int c = 0; c<3; c++)
+    {
+      SEIFilmGrainCharacteristics::CompModel &cm = sei.m_compModel[c];
+      if (cm.presentFlag)
+      {
+        uint32_t numIntensityIntervals;
+        sei_read_code(pDecodedMessageOutputStream, 8, code, "num_intensity_intervals_minus1[c]"); numIntensityIntervals = code + 1;
+        sei_read_code(pDecodedMessageOutputStream, 3, code, "num_model_values_minus1[c]");        cm.numModelValues = code + 1;
+        cm.intensityValues.resize(numIntensityIntervals);
+        for (uint32_t interval = 0; interval<numIntensityIntervals; interval++)
+        {
+          SEIFilmGrainCharacteristics::CompModelIntensityValues &cmiv = cm.intensityValues[interval];
+          sei_read_code(pDecodedMessageOutputStream, 8, code, "intensity_interval_lower_bound[c][i]"); cmiv.intensityIntervalLowerBound = code;
+          sei_read_code(pDecodedMessageOutputStream, 8, code, "intensity_interval_upper_bound[c][i]"); cmiv.intensityIntervalUpperBound = code;
+          cmiv.compModelValue.resize(cm.numModelValues);
+          for (uint32_t j = 0; j<cm.numModelValues; j++)
+          {
+            sei_read_svlc(pDecodedMessageOutputStream, cmiv.compModelValue[j], "comp_model_value[c][i]");
+          }
+        }
+      }
+    } // for c
+    sei_read_flag(pDecodedMessageOutputStream, code, "film_grain_characteristics_persistence_flag"); sei.m_filmGrainCharacteristicsPersistenceFlag = code != 0;
+  } // cancel flag
 }
 
-void SEIReader::xParseSEIRegionRefreshInfo(SEIGradualDecodingRefreshInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIContentLightLevelInfo(SEIContentLightLevelInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t val;
+  uint32_t code;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_flag( pDecodedMessageOutputStream, val, "refreshed_region_flag" ); sei.m_gdrForegroundFlag = val ? 1 : 0;
+
+  sei_read_code(pDecodedMessageOutputStream, 16, code, "max_content_light_level");     sei.m_maxContentLightLevel = code;
+  sei_read_code(pDecodedMessageOutputStream, 16, code, "max_pic_average_light_level"); sei.m_maxPicAverageLightLevel = code;
 }
 
-void SEIReader::xParseSEINoDisplay(SEINoDisplay& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
+  uint32_t code;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei.m_noDisplay = true;
+
+  sei_read_code(pDecodedMessageOutputStream, 32, code, "ambient_illuminance"); sei.m_ambientIlluminance = code;
+  sei_read_code(pDecodedMessageOutputStream, 16, code, "ambient_light_x");     sei.m_ambientLightX = (uint16_t)code;
+  sei_read_code(pDecodedMessageOutputStream, 16, code, "ambient_light_y");     sei.m_ambientLightY = (uint16_t)code;
 }
 
-void SEIReader::xParseSEIToneMappingInfo(SEIToneMappingInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIContentColourVolume(SEIContentColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   int i;
   uint32_t val;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_uvlc( pDecodedMessageOutputStream, val, "tone_map_id" );                         sei.m_toneMapId = val;
-  sei_read_flag( pDecodedMessageOutputStream, val, "tone_map_cancel_flag" );                sei.m_toneMapCancelFlag = val;
 
-  if ( !sei.m_toneMapCancelFlag )
+  sei_read_flag(pDecodedMessageOutputStream, val, "ccv_cancel_flag");          sei.m_ccvCancelFlag = val;
+  if (!sei.m_ccvCancelFlag)
   {
-    sei_read_flag( pDecodedMessageOutputStream, val, "tone_map_persistence_flag" );         sei.m_toneMapPersistenceFlag = val;
-    sei_read_code( pDecodedMessageOutputStream, 8, val, "coded_data_bit_depth" );           sei.m_codedDataBitDepth = val;
-    sei_read_code( pDecodedMessageOutputStream, 8, val, "target_bit_depth" );               sei.m_targetBitDepth = val;
-    sei_read_uvlc( pDecodedMessageOutputStream, val, "tone_map_model_id" );                 sei.m_modelId = val;
-    switch(sei.m_modelId)
+    int iVal;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ccv_persistence_flag");   sei.m_ccvPersistenceFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ccv_primaries_present_flag");   sei.m_ccvPrimariesPresentFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ccv_min_luminance_value_present_flag");   sei.m_ccvMinLuminanceValuePresentFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ccv_max_luminance_value_present_flag");   sei.m_ccvMaxLuminanceValuePresentFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ccv_avg_luminance_value_present_flag");   sei.m_ccvAvgLuminanceValuePresentFlag = val;
+
+    if (sei.m_ccvPrimariesPresentFlag)
     {
-    case 0:
-      {
-        sei_read_code( pDecodedMessageOutputStream, 32, val, "min_value" );                 sei.m_minValue = val;
-        sei_read_code( pDecodedMessageOutputStream, 32, val, "max_value" );                 sei.m_maxValue = val;
-        break;
-      }
-    case 1:
-      {
-        sei_read_code( pDecodedMessageOutputStream, 32, val, "sigmoid_midpoint" );          sei.m_sigmoidMidpoint = val;
-        sei_read_code( pDecodedMessageOutputStream, 32, val, "sigmoid_width" );             sei.m_sigmoidWidth = val;
-        break;
-      }
-    case 2:
-      {
-        uint32_t num = 1u << sei.m_targetBitDepth;
-        sei.m_startOfCodedInterval.resize(num+1);
-        for(i = 0; i < num; i++)
-        {
-          sei_read_code( pDecodedMessageOutputStream, ((( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3), val, "start_of_coded_interval[i]" );
-          sei.m_startOfCodedInterval[i] = val;
-        }
-        sei.m_startOfCodedInterval[num] = 1u << sei.m_codedDataBitDepth;
-        break;
-      }
-    case 3:
+      for (i = 0; i < MAX_NUM_COMPONENT; i++)
       {
-        sei_read_code( pDecodedMessageOutputStream, 16, val,  "num_pivots" );                       sei.m_numPivots = val;
-        sei.m_codedPivotValue.resize(sei.m_numPivots);
-        sei.m_targetPivotValue.resize(sei.m_numPivots);
-        for(i = 0; i < sei.m_numPivots; i++ )
-        {
-          sei_read_code( pDecodedMessageOutputStream, ((( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3), val, "coded_pivot_value[i]" );
-          sei.m_codedPivotValue[i] = val;
-          sei_read_code( pDecodedMessageOutputStream, ((( sei.m_targetBitDepth + 7 ) >> 3 ) << 3),    val, "target_pivot_value[i]" );
-          sei.m_targetPivotValue[i] = val;
-        }
-        break;
+        sei_read_scode(pDecodedMessageOutputStream, 32, iVal, "ccv_primaries_x[i]");          sei.m_ccvPrimariesX[i] = iVal;
+        sei_read_scode(pDecodedMessageOutputStream, 32, iVal, "ccv_primaries_y[i]");          sei.m_ccvPrimariesY[i] = iVal;
       }
-    case 4:
-      {
-        sei_read_code( pDecodedMessageOutputStream, 8, val, "camera_iso_speed_idc" );                     sei.m_cameraIsoSpeedIdc = val;
-        if( sei.m_cameraIsoSpeedIdc == 255) //Extended_ISO
-        {
-          sei_read_code( pDecodedMessageOutputStream, 32,   val,   "camera_iso_speed_value" );            sei.m_cameraIsoSpeedValue = val;
-        }
-        sei_read_code( pDecodedMessageOutputStream, 8, val, "exposure_index_idc" );                       sei.m_exposureIndexIdc = val;
-        if( sei.m_exposureIndexIdc == 255) //Extended_ISO
-        {
-          sei_read_code( pDecodedMessageOutputStream, 32,   val,   "exposure_index_value" );              sei.m_exposureIndexValue = val;
-        }
-        sei_read_flag( pDecodedMessageOutputStream, val, "exposure_compensation_value_sign_flag" );       sei.m_exposureCompensationValueSignFlag = val;
-        sei_read_code( pDecodedMessageOutputStream, 16, val, "exposure_compensation_value_numerator" );   sei.m_exposureCompensationValueNumerator = val;
-        sei_read_code( pDecodedMessageOutputStream, 16, val, "exposure_compensation_value_denom_idc" );   sei.m_exposureCompensationValueDenomIdc = val;
-        sei_read_code( pDecodedMessageOutputStream, 32, val, "ref_screen_luminance_white" );              sei.m_refScreenLuminanceWhite = val;
-        sei_read_code( pDecodedMessageOutputStream, 32, val, "extended_range_white_level" );              sei.m_extendedRangeWhiteLevel = val;
-        sei_read_code( pDecodedMessageOutputStream, 16, val, "nominal_black_level_code_value" );          sei.m_nominalBlackLevelLumaCodeValue = val;
-        sei_read_code( pDecodedMessageOutputStream, 16, val, "nominal_white_level_code_value" );          sei.m_nominalWhiteLevelLumaCodeValue= val;
-        sei_read_code( pDecodedMessageOutputStream, 16, val, "extended_white_level_code_value" );         sei.m_extendedWhiteLevelLumaCodeValue = val;
-        break;
-      }
-    default:
-      {
-        THROW("Undefined SEIToneMapModelId");
-        break;
-      }
-    }//switch model id
-  }// if(!sei.m_toneMapCancelFlag)
-}
-
-void SEIReader::xParseSEISOPDescription(SEISOPDescription &sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
-{
-  int iCode;
-  uint32_t uiCode;
-  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-
-  sei_read_uvlc( pDecodedMessageOutputStream, uiCode,           "sop_seq_parameter_set_id"            ); sei.m_sopSeqParameterSetId = uiCode;
-  sei_read_uvlc( pDecodedMessageOutputStream, uiCode,           "num_pics_in_sop_minus1"              ); sei.m_numPicsInSopMinus1 = uiCode;
-  for (uint32_t i = 0; i <= sei.m_numPicsInSopMinus1; i++)
-  {
-    sei_read_code( pDecodedMessageOutputStream, 6, uiCode,                     "sop_vcl_nut[i]" );  sei.m_sopDescVclNaluType[i] = uiCode;
-    sei_read_code( pDecodedMessageOutputStream, 3, sei.m_sopDescTemporalId[i], "sop_temporal_id[i]"   );  sei.m_sopDescTemporalId[i] = uiCode;
-    if (sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_W_RADL && sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_N_LP)
+    }
+    if (sei.m_ccvMinLuminanceValuePresentFlag)
+    {
+      sei_read_code(pDecodedMessageOutputStream, 32, val, "ccv_min_luminance_value");   sei.m_ccvMinLuminanceValue = val;
+    }
+    if (sei.m_ccvMaxLuminanceValuePresentFlag)
     {
-      sei_read_uvlc( pDecodedMessageOutputStream, sei.m_sopDescStRpsIdx[i],    "sop_short_term_rps_idx[i]"    ); sei.m_sopDescStRpsIdx[i] = uiCode;
+      sei_read_code(pDecodedMessageOutputStream, 32, val, "ccv_max_luminance_value");   sei.m_ccvMaxLuminanceValue = val;
     }
-    if (i > 0)
+    if (sei.m_ccvAvgLuminanceValuePresentFlag)
     {
-      sei_read_svlc( pDecodedMessageOutputStream, iCode,                       "sop_poc_delta[i]"     ); sei.m_sopDescPocDelta[i] = iCode;
+      sei_read_code(pDecodedMessageOutputStream, 32, val, "ccv_avg_luminance_value");   sei.m_ccvAvgLuminanceValue = val;
     }
   }
 }
-
-void SEIReader::xParseSEIScalableNesting(SEIScalableNesting& sei, const NalUnitType nalUnitType, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIEquirectangularProjection(SEIEquirectangularProjection& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t uiCode;
-  SEIMessages seis;
+  uint32_t val;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 
-  sei_read_flag( pDecodedMessageOutputStream, uiCode,            "bitstream_subset_flag"         ); sei.m_bitStreamSubsetFlag = uiCode;
-  sei_read_flag( pDecodedMessageOutputStream, uiCode,            "nesting_op_flag"               ); sei.m_nestingOpFlag = uiCode;
-  if (sei.m_nestingOpFlag)
-  {
-    sei_read_flag( pDecodedMessageOutputStream, uiCode,            "default_op_flag"               ); sei.m_defaultOpFlag = uiCode;
-    sei_read_uvlc( pDecodedMessageOutputStream, uiCode,            "nesting_num_ops_minus1"        ); sei.m_nestingNumOpsMinus1 = uiCode;
-    for (uint32_t i = sei.m_defaultOpFlag; i <= sei.m_nestingNumOpsMinus1; i++)
-    {
-      sei_read_code( pDecodedMessageOutputStream, 3,        uiCode,  "nesting_max_temporal_id_plus1[i]"   ); sei.m_nestingMaxTemporalIdPlus1[i] = uiCode;
-      sei_read_uvlc( pDecodedMessageOutputStream, uiCode,            "nesting_op_idx[i]"                  ); sei.m_nestingOpIdx[i] = uiCode;
-    }
-  }
-  else
+  sei_read_flag( pDecodedMessageOutputStream, val,       "erp_cancel_flag" );              sei.m_erpCancelFlag = val;
+  if( !sei.m_erpCancelFlag )
   {
-    sei_read_flag( pDecodedMessageOutputStream, uiCode,            "all_layers_flag"               ); sei.m_allLayersFlag       = uiCode;
-    if (!sei.m_allLayersFlag)
+    sei_read_flag( pDecodedMessageOutputStream, val,      "erp_persistence_flag"    );     sei.m_erpPersistenceFlag   = val;
+    sei_read_flag( pDecodedMessageOutputStream, val,      "erp_guard_band_flag"     );     sei.m_erpGuardBandFlag     = val;
+    sei_read_code( pDecodedMessageOutputStream, 2, val,   "erp_reserved_zero_2bits" );
+    if ( sei.m_erpGuardBandFlag == 1)
     {
-      sei_read_code( pDecodedMessageOutputStream, 3,        uiCode,  "nesting_no_op_max_temporal_id_plus1"  ); sei.m_nestingNoOpMaxTemporalIdPlus1 = uiCode;
-      sei_read_uvlc( pDecodedMessageOutputStream, uiCode,            "nesting_num_layers_minus1"            ); sei.m_nestingNumLayersMinus1        = uiCode;
-      for (uint32_t i = 0; i <= sei.m_nestingNumLayersMinus1; i++)
-      {
-        sei_read_code( pDecodedMessageOutputStream, 6,           uiCode,     "nesting_layer_id[i]"      ); sei.m_nestingLayerId[i]   = uiCode;
-      }
+      sei_read_code( pDecodedMessageOutputStream, 3, val,     "erp_guard_band_type"       );   sei.m_erpGuardBandType  = val;
+      sei_read_code( pDecodedMessageOutputStream, 8, val,     "erp_left_guard_band_width" );   sei.m_erpLeftGuardBandWidth = val;
+      sei_read_code( pDecodedMessageOutputStream, 8, val,     "erp_right_guard_band_width");   sei.m_erpRightGuardBandWidth = val;
     }
   }
+}
 
-  // byte alignment
-  while ( m_pcBitstream->getNumBitsRead() % 8 != 0 )
-  {
-    uint32_t code;
-    sei_read_flag( pDecodedMessageOutputStream, code, "nesting_zero_bit" );
-  }
-
-  // read nested SEI messages
-  do
-  {
-    xReadSEImessage(sei.m_nestedSEIs, nalUnitType, sps, pDecodedMessageOutputStream);
-  } while (m_pcBitstream->getNumBitsLeft() > 8);
-
-  if (pDecodedMessageOutputStream)
+void SEIReader::xParseSEISphereRotation(SEISphereRotation& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  int  sval;
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+  sei_read_flag( pDecodedMessageOutputStream, val,       "sphere_rotation_cancel_flag" );              sei.m_sphereRotationCancelFlag = val;
+  if( !sei.m_sphereRotationCancelFlag )
   {
-    (*pDecodedMessageOutputStream) << "End of scalable nesting SEI message\n";
+    sei_read_flag ( pDecodedMessageOutputStream,      val,   "sphere_rotation_persistence_flag"    );     sei.m_sphereRotationPersistenceFlag = val;
+    sei_read_code ( pDecodedMessageOutputStream, 6,   val,   "sphere_rotation_reserved_zero_6bits" );
+    sei_read_scode( pDecodedMessageOutputStream, 32, sval,   "sphere_rotation_yaw"                 );     sei.m_sphereRotationYaw = sval;
+    sei_read_scode( pDecodedMessageOutputStream, 32, sval,   "sphere_rotation_pitch"               );     sei.m_sphereRotationPitch = sval;
+    sei_read_scode( pDecodedMessageOutputStream, 32, sval,   "sphere_rotation_roll"                );     sei.m_sphereRotationRoll = sval;
   }
 }
 
-#if HEVC_TILES_WPP
-void SEIReader::xParseSEITempMotionConstraintsTileSets(SEITempMotionConstrainedTileSets& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIOmniViewport(SEIOmniViewport& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   uint32_t code;
+  int  scode;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_flag( pDecodedMessageOutputStream, code, "mc_all_tiles_exact_sample_value_match_flag");  sei.m_mc_all_tiles_exact_sample_value_match_flag = (code != 0);
-  sei_read_flag( pDecodedMessageOutputStream, code, "each_tile_one_tile_set_flag");                 sei.m_each_tile_one_tile_set_flag                = (code != 0);
-
-  if(!sei.m_each_tile_one_tile_set_flag)
+  sei_read_code( pDecodedMessageOutputStream, 10, code, "omni_viewport_id"          ); sei.m_omniViewportId         = code;
+  sei_read_flag( pDecodedMessageOutputStream,     code, "omni_viewport_cancel_flag" ); sei.m_omniViewportCancelFlag = code;
+  if (!sei.m_omniViewportCancelFlag)
   {
-    sei_read_flag( pDecodedMessageOutputStream, code, "limited_tile_set_display_flag");  sei.m_limited_tile_set_display_flag = (code != 0);
-    sei_read_uvlc( pDecodedMessageOutputStream, code, "num_sets_in_message_minus1");     sei.setNumberOfTileSets(code + 1);
-
-    if(sei.getNumberOfTileSets() != 0)
+    uint32_t numRegions;
+    sei_read_flag( pDecodedMessageOutputStream,    code,       "omni_viewport_persistence_flag" ); sei.m_omniViewportPersistenceFlag = code;    
+    sei_read_code( pDecodedMessageOutputStream, 4, numRegions, "omni_viewport_cnt_minus1"       ); numRegions++;
+    sei.m_omniViewportRegions.resize(numRegions);
+    for(uint32_t region=0; region<numRegions; region++)
     {
-      for(int i = 0; i < sei.getNumberOfTileSets(); i++)
-      {
-        sei_read_uvlc( pDecodedMessageOutputStream, code, "mcts_id");  sei.tileSetData(i).m_mcts_id = code;
-
-        if(sei.m_limited_tile_set_display_flag)
-        {
-          sei_read_flag( pDecodedMessageOutputStream, code, "display_tile_set_flag");  sei.tileSetData(i).m_display_tile_set_flag = (code != 1);
-        }
-
-        sei_read_uvlc( pDecodedMessageOutputStream, code, "num_tile_rects_in_set_minus1");  sei.tileSetData(i).setNumberOfTileRects(code + 1);
-
-        for(int j=0; j<sei.tileSetData(i).getNumberOfTileRects(); j++)
-        {
-          sei_read_uvlc( pDecodedMessageOutputStream, code, "top_left_tile_index");      sei.tileSetData(i).topLeftTileIndex(j)     = code;
-          sei_read_uvlc( pDecodedMessageOutputStream, code, "bottom_right_tile_index");  sei.tileSetData(i).bottomRightTileIndex(j) = code;
-        }
-
-        if(!sei.m_mc_all_tiles_exact_sample_value_match_flag)
-        {
-          sei_read_flag( pDecodedMessageOutputStream, code, "exact_sample_value_match_flag");   sei.tileSetData(i).m_exact_sample_value_match_flag    = (code != 0);
-        }
-        sei_read_flag( pDecodedMessageOutputStream, code, "mcts_tier_level_idc_present_flag");  sei.tileSetData(i).m_mcts_tier_level_idc_present_flag = (code != 0);
-
-        if(sei.tileSetData(i).m_mcts_tier_level_idc_present_flag)
-        {
-          sei_read_flag( pDecodedMessageOutputStream, code,    "mcts_tier_flag"); sei.tileSetData(i).m_mcts_tier_flag = (code != 0);
-          sei_read_code( pDecodedMessageOutputStream, 8, code, "mcts_level_idc"); sei.tileSetData(i).m_mcts_level_idc =  code;
-        }
-      }
-    }
+      SEIOmniViewport::OmniViewport &viewport = sei.m_omniViewportRegions[region];
+      sei_read_scode( pDecodedMessageOutputStream, 32, scode, "omni_viewport_azimuth_centre"   );   viewport.azimuthCentre = scode;
+      sei_read_scode( pDecodedMessageOutputStream, 32, scode, "omni_viewport_elevation_centre" );   viewport.elevationCentre = scode;
+      sei_read_scode( pDecodedMessageOutputStream, 32, scode, "omni_viewport_tilt_centre"      );   viewport.tiltCentre = code;
+      sei_read_code( pDecodedMessageOutputStream,  32, code, "omni_viewport_hor_range"         );   viewport.horRange        = code;
+      sei_read_code( pDecodedMessageOutputStream,  32, code, "omni_viewport_ver_range"         );   viewport.verRange        = code;
+    }    
   }
   else
   {
-    sei_read_flag( pDecodedMessageOutputStream, code, "max_mcs_tier_level_idc_present_flag");  sei.m_max_mcs_tier_level_idc_present_flag = code;
-    if(sei.m_max_mcs_tier_level_idc_present_flag)
-    {
-      sei_read_flag( pDecodedMessageOutputStream, code, "max_mcts_tier_flag");  sei.m_max_mcts_tier_flag = code;
-      sei_read_code( pDecodedMessageOutputStream, 8, code, "max_mcts_level_idc"); sei.m_max_mcts_level_idc = code;
-    }
+    sei.m_omniViewportRegions.clear();
+    sei.m_omniViewportPersistenceFlag=false;
   }
 }
-#endif
 
-void SEIReader::xParseSEITimeCode(SEITimeCode& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIRegionWisePacking(SEIRegionWisePacking& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t code;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-  sei_read_code( pDecodedMessageOutputStream, 2, code, "num_clock_ts"); sei.numClockTs = code;
-  for(int i = 0; i < sei.numClockTs; i++)
+  uint32_t val;
+
+  sei_read_flag( pDecodedMessageOutputStream,           val,      "rwp_cancel_flag" );                      sei.m_rwpCancelFlag = val;
+  if (!sei.m_rwpCancelFlag)
   {
-    SEITimeSet currentTimeSet;
-    sei_read_flag( pDecodedMessageOutputStream, code, "clock_time_stamp_flag[i]"); currentTimeSet.clockTimeStampFlag = code;
-    if(currentTimeSet.clockTimeStampFlag)
+    sei_read_flag( pDecodedMessageOutputStream,           val,    "rwp_persistence_flag" );                 sei.m_rwpPersistenceFlag = val;
+    sei_read_flag( pDecodedMessageOutputStream,           val,    "constituent_picture_matching_flag" );    sei.m_constituentPictureMatchingFlag = val;
+    sei_read_code( pDecodedMessageOutputStream,       5,  val,    "rwp_reserved_zero_5bits" );
+    sei_read_code( pDecodedMessageOutputStream,       8,  val,    "num_packed_regions" );                   sei.m_numPackedRegions = val;
+    sei_read_code( pDecodedMessageOutputStream,       32, val,    "proj_picture_width" );                   sei.m_projPictureWidth = val;
+    sei_read_code( pDecodedMessageOutputStream,       32, val,    "proj_picture_height" );                  sei.m_projPictureHeight = val;
+    sei_read_code( pDecodedMessageOutputStream,       16, val,    "packed_picture_width" );                 sei.m_packedPictureWidth = val;
+    sei_read_code( pDecodedMessageOutputStream,       16, val,    "packed_picture_height" );                sei.m_packedPictureHeight = val;
+
+    sei.m_rwpTransformType.resize(sei.m_numPackedRegions);
+    sei.m_rwpGuardBandFlag.resize(sei.m_numPackedRegions);
+    sei.m_projRegionWidth.resize(sei.m_numPackedRegions);
+    sei.m_projRegionHeight.resize(sei.m_numPackedRegions);
+    sei.m_rwpProjRegionTop.resize(sei.m_numPackedRegions);
+    sei.m_projRegionLeft.resize(sei.m_numPackedRegions);
+    sei.m_packedRegionWidth.resize(sei.m_numPackedRegions);
+    sei.m_packedRegionHeight.resize(sei.m_numPackedRegions);
+    sei.m_packedRegionTop.resize(sei.m_numPackedRegions);
+    sei.m_packedRegionLeft.resize(sei.m_numPackedRegions);
+    sei.m_rwpLeftGuardBandWidth.resize(sei.m_numPackedRegions);
+    sei.m_rwpRightGuardBandWidth.resize(sei.m_numPackedRegions);
+    sei.m_rwpTopGuardBandHeight.resize(sei.m_numPackedRegions);
+    sei.m_rwpBottomGuardBandHeight.resize(sei.m_numPackedRegions);
+    sei.m_rwpGuardBandNotUsedForPredFlag.resize(sei.m_numPackedRegions);
+    sei.m_rwpGuardBandType.resize(4*sei.m_numPackedRegions);
+
+    for( int i=0; i < sei.m_numPackedRegions; i++ )
     {
-      sei_read_flag( pDecodedMessageOutputStream, code, "nuit_field_based_flag"); currentTimeSet.numUnitFieldBasedFlag = code;
-      sei_read_code( pDecodedMessageOutputStream, 5, code, "counting_type"); currentTimeSet.countingType = code;
-      sei_read_flag( pDecodedMessageOutputStream, code, "full_timestamp_flag"); currentTimeSet.fullTimeStampFlag = code;
-      sei_read_flag( pDecodedMessageOutputStream, code, "discontinuity_flag"); currentTimeSet.discontinuityFlag = code;
-      sei_read_flag( pDecodedMessageOutputStream, code, "cnt_dropped_flag"); currentTimeSet.cntDroppedFlag = code;
-      sei_read_code( pDecodedMessageOutputStream, 9, code, "n_frames"); currentTimeSet.numberOfFrames = code;
-      if(currentTimeSet.fullTimeStampFlag)
-      {
-        sei_read_code( pDecodedMessageOutputStream, 6, code, "seconds_value"); currentTimeSet.secondsValue = code;
-        sei_read_code( pDecodedMessageOutputStream, 6, code, "minutes_value"); currentTimeSet.minutesValue = code;
-        sei_read_code( pDecodedMessageOutputStream, 5, code, "hours_value"); currentTimeSet.hoursValue = code;
-      }
-      else
-      {
-        sei_read_flag( pDecodedMessageOutputStream, code, "seconds_flag"); currentTimeSet.secondsFlag = code;
-        if(currentTimeSet.secondsFlag)
-        {
-          sei_read_code( pDecodedMessageOutputStream, 6, code, "seconds_value"); currentTimeSet.secondsValue = code;
-          sei_read_flag( pDecodedMessageOutputStream, code, "minutes_flag"); currentTimeSet.minutesFlag = code;
-          if(currentTimeSet.minutesFlag)
-          {
-            sei_read_code( pDecodedMessageOutputStream, 6, code, "minutes_value"); currentTimeSet.minutesValue = code;
-            sei_read_flag( pDecodedMessageOutputStream, code, "hours_flag"); currentTimeSet.hoursFlag = code;
-            if(currentTimeSet.hoursFlag)
-            {
-              sei_read_code( pDecodedMessageOutputStream, 5, code, "hours_value"); currentTimeSet.hoursValue = code;
-            }
-          }
-        }
-      }
-      sei_read_code( pDecodedMessageOutputStream, 5, code, "time_offset_length"); currentTimeSet.timeOffsetLength = code;
-      if(currentTimeSet.timeOffsetLength > 0)
+      sei_read_code( pDecodedMessageOutputStream,     4,  val,    "rwp_reserved_zero_4bits" );
+      sei_read_code( pDecodedMessageOutputStream,     3,  val,    "rwp_tTransform_type" );                  sei.m_rwpTransformType[i] = val;
+      sei_read_flag( pDecodedMessageOutputStream,         val,    "rwp_guard_band_flag" );                  sei.m_rwpGuardBandFlag[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     32, val,    "proj_region_width" );                    sei.m_projRegionWidth[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     32, val,    "proj_region_height" );                   sei.m_projRegionHeight[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     32, val,    "rwp_proj_regionTop" );                   sei.m_rwpProjRegionTop[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     32, val,    "proj_region_left" );                     sei.m_projRegionLeft[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     16, val,    "packed_region_width" );                  sei.m_packedRegionWidth[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     16, val,    "packed_region_height" );                 sei.m_packedRegionHeight[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     16, val,    "packed_region_top" );                    sei.m_packedRegionTop[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,     16, val,    "packed_region_left" );                   sei.m_packedRegionLeft[i] = val;
+      if( sei.m_rwpGuardBandFlag[i] )
       {
-        sei_read_code( pDecodedMessageOutputStream, currentTimeSet.timeOffsetLength, code, "time_offset_value");
-        if((code & (1 << (currentTimeSet.timeOffsetLength-1))) == 0)
+        sei_read_code( pDecodedMessageOutputStream,   8,  val,    "rwp_left_guard_band_width" );            sei.m_rwpLeftGuardBandWidth[i] = val;
+        sei_read_code( pDecodedMessageOutputStream,   8,  val,    "rwp_right_guard_band_width" );           sei.m_rwpRightGuardBandWidth[i] = val;
+        sei_read_code( pDecodedMessageOutputStream,   8,  val,    "rwp_top_guard_band_height" );            sei.m_rwpTopGuardBandHeight[i]  = val;
+        sei_read_code( pDecodedMessageOutputStream,   8,  val,    "rwp_bottom_guard_band_height" );         sei. m_rwpBottomGuardBandHeight[i]  = val;
+        sei_read_flag( pDecodedMessageOutputStream,       val,    "rwp_guard_band_not_used_forPred_flag" ); sei.m_rwpGuardBandNotUsedForPredFlag[i] = val;
+        for( int j=0; j < 4; j++ )
         {
-          currentTimeSet.timeOffsetValue = code;
-        }
-        else
-        {
-          code &= (1<< (currentTimeSet.timeOffsetLength-1)) - 1;
-          currentTimeSet.timeOffsetValue = ~code + 1;
+          sei_read_code( pDecodedMessageOutputStream, 3,  val,     "rwp_guard_band_type" ); sei.m_rwpGuardBandType[i*4 + j] = val;
         }
+        sei_read_code( pDecodedMessageOutputStream,   3,  val,    "rwp_guard_band_reserved_zero_3bits" );
       }
     }
-    sei.timeSetArray[i] = currentTimeSet;
   }
 }
 
-void SEIReader::xParseSEIChromaResamplingFilterHint(SEIChromaResamplingFilterHint& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEIGeneralizedCubemapProjection(SEIGeneralizedCubemapProjection& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t uiCode;
+  uint32_t val;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
 
-  sei_read_code( pDecodedMessageOutputStream, 8, uiCode, "ver_chroma_filter_idc"); sei.m_verChromaFilterIdc = uiCode;
-  sei_read_code( pDecodedMessageOutputStream, 8, uiCode, "hor_chroma_filter_idc"); sei.m_horChromaFilterIdc = uiCode;
-  sei_read_flag( pDecodedMessageOutputStream, uiCode, "ver_filtering_field_processing_flag"); sei.m_verFilteringFieldProcessingFlag = uiCode;
-  if(sei.m_verChromaFilterIdc == 1 || sei.m_horChromaFilterIdc == 1)
+  sei_read_flag( pDecodedMessageOutputStream,          val,    "gcmp_cancel_flag" );                      sei.m_gcmpCancelFlag = val;
+  if (!sei.m_gcmpCancelFlag)
   {
-    sei_read_uvlc( pDecodedMessageOutputStream, uiCode, "target_format_idc"); sei.m_targetFormatIdc = uiCode;
-    if(sei.m_verChromaFilterIdc == 1)
+    sei_read_flag( pDecodedMessageOutputStream,        val,    "gcmp_persistence_flag"    );              sei.m_gcmpPersistenceFlag = val;
+    sei_read_code( pDecodedMessageOutputStream,     3, val,    "gcmp_packing_type" );                     sei.m_gcmpPackingType = val;
+    sei_read_code( pDecodedMessageOutputStream,     2, val,    "gcmp_mapping_function_type"     );        sei.m_gcmpMappingFunctionType = val;
+
+    int numFace = sei.m_gcmpPackingType == 4 || sei.m_gcmpPackingType == 5 ? 5 : 6;
+    sei.m_gcmpFaceIndex.resize(numFace);
+    sei.m_gcmpFaceRotation.resize(numFace);
+    if (sei.m_gcmpMappingFunctionType == 2)
     {
-      uint32_t numVerticalFilters;
-      sei_read_uvlc( pDecodedMessageOutputStream, numVerticalFilters, "num_vertical_filters"); sei.m_verFilterCoeff.resize(numVerticalFilters);
-      if(numVerticalFilters > 0)
-      {
-        for(int i = 0; i < numVerticalFilters; i++)
-        {
-          uint32_t verTapLengthMinus1;
-          sei_read_uvlc( pDecodedMessageOutputStream, verTapLengthMinus1, "ver_tap_length_minus_1"); sei.m_verFilterCoeff[i].resize(verTapLengthMinus1+1);
-          for(int j = 0; j < (verTapLengthMinus1 + 1); j++)
-          {
-            sei_read_svlc( pDecodedMessageOutputStream, sei.m_verFilterCoeff[i][j], "ver_filter_coeff");
-          }
-        }
-      }
+      sei.m_gcmpFunctionCoeffU.resize(numFace);
+      sei.m_gcmpFunctionUAffectedByVFlag.resize(numFace);
+      sei.m_gcmpFunctionCoeffV.resize(numFace);
+      sei.m_gcmpFunctionVAffectedByUFlag.resize(numFace);
     }
-    if(sei.m_horChromaFilterIdc == 1)
+
+    for (int i = 0; i < numFace; i++)
     {
-      uint32_t numHorizontalFilters;
-      sei_read_uvlc( pDecodedMessageOutputStream, numHorizontalFilters, "num_horizontal_filters"); sei.m_horFilterCoeff.resize(numHorizontalFilters);
-      if(numHorizontalFilters  > 0)
+      sei_read_code( pDecodedMessageOutputStream,   3, val,    "gcmp_face_index" );                       sei.m_gcmpFaceIndex[i] = val;
+      sei_read_code( pDecodedMessageOutputStream,   2, val,    "gcmp_face_rotation" );                    sei.m_gcmpFaceRotation[i] = val;
+      if (sei.m_gcmpMappingFunctionType == 2)
       {
-        for(int i = 0; i < numHorizontalFilters; i++)
-        {
-          uint32_t horTapLengthMinus1;
-          sei_read_uvlc( pDecodedMessageOutputStream, horTapLengthMinus1, "hor_tap_length_minus_1"); sei.m_horFilterCoeff[i].resize(horTapLengthMinus1+1);
-          for(int j = 0; j < (horTapLengthMinus1 + 1); j++)
-          {
-            sei_read_svlc( pDecodedMessageOutputStream, sei.m_horFilterCoeff[i][j], "hor_filter_coeff");
-          }
-        }
+        sei_read_code( pDecodedMessageOutputStream, 7, val,    "gcmp_function_coeff_u" );                 sei.m_gcmpFunctionCoeffU[i] = val;
+        sei_read_flag( pDecodedMessageOutputStream,    val,    "gcmp_function_u_affected_by_v_flag"    ); sei.m_gcmpFunctionUAffectedByVFlag[i] = val;
+        sei_read_code( pDecodedMessageOutputStream, 7, val,    "gcmp_function_coeff_v" );                 sei.m_gcmpFunctionCoeffV[i] = val;
+        sei_read_flag( pDecodedMessageOutputStream,    val,    "gcmp_function_v_affected_by_u_flag"    ); sei.m_gcmpFunctionVAffectedByUFlag[i] = val;
       }
     }
-  }
-}
-
-void SEIReader::xParseSEIKneeFunctionInfo(SEIKneeFunctionInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
-{
-  int i;
-  uint32_t val;
-  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-
-  sei_read_uvlc( pDecodedMessageOutputStream, val, "knee_function_id" );                   sei.m_kneeId = val;
-  sei_read_flag( pDecodedMessageOutputStream, val, "knee_function_cancel_flag" );          sei.m_kneeCancelFlag = val;
-  if ( !sei.m_kneeCancelFlag )
-  {
-    sei_read_flag( pDecodedMessageOutputStream, val, "knee_function_persistence_flag" );   sei.m_kneePersistenceFlag = val;
-    sei_read_code( pDecodedMessageOutputStream, 32, val, "input_d_range" );                sei.m_kneeInputDrange = val;
-    sei_read_code( pDecodedMessageOutputStream, 32, val, "input_disp_luminance" );         sei.m_kneeInputDispLuminance = val;
-    sei_read_code( pDecodedMessageOutputStream, 32, val, "output_d_range" );               sei.m_kneeOutputDrange = val;
-    sei_read_code( pDecodedMessageOutputStream, 32, val, "output_disp_luminance" );        sei.m_kneeOutputDispLuminance = val;
-    sei_read_uvlc( pDecodedMessageOutputStream, val, "num_knee_points_minus1" );           sei.m_kneeNumKneePointsMinus1 = val;
-    CHECK( sei.m_kneeNumKneePointsMinus1 <= 0, "Invali state" );
-    sei.m_kneeInputKneePoint.resize(sei.m_kneeNumKneePointsMinus1+1);
-    sei.m_kneeOutputKneePoint.resize(sei.m_kneeNumKneePointsMinus1+1);
-    for(i = 0; i <= sei.m_kneeNumKneePointsMinus1; i++ )
+    sei_read_flag( pDecodedMessageOutputStream,        val,    "gcmp_guard_band_flag" );                  sei.m_gcmpGuardBandFlag = val;
+    if (sei.m_gcmpGuardBandFlag)
     {
-      sei_read_code( pDecodedMessageOutputStream, 10, val, "input_knee_point" );           sei.m_kneeInputKneePoint[i] = val;
-      sei_read_code( pDecodedMessageOutputStream, 10, val, "output_knee_point" );          sei.m_kneeOutputKneePoint[i] = val;
+      sei_read_flag( pDecodedMessageOutputStream,      val,    "gcmp_guard_band_boundary_type" );         sei.m_gcmpGuardBandBoundaryType  = val;
+      sei_read_code( pDecodedMessageOutputStream,   4, val,    "gcmp_guard_band_samples_minus1" );        sei.m_gcmpGuardBandSamplesMinus1 = val;
     }
   }
 }
 
-void SEIReader::xParseSEIColourRemappingInfo(SEIColourRemappingInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+void SEIReader::xParseSEISubpictureLevelInfo(SEISubpicureLevelInfo& sei, const SPS *sps, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t  uiVal;
-  int   iVal;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-
-  sei_read_uvlc( pDecodedMessageOutputStream, uiVal, "colour_remap_id" );          sei.m_colourRemapId = uiVal;
-  sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_cancel_flag" ); sei.m_colourRemapCancelFlag = uiVal;
-  if( !sei.m_colourRemapCancelFlag )
+  if (sps == nullptr)
   {
-    sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_persistence_flag" );                sei.m_colourRemapPersistenceFlag = uiVal;
-    sei_read_flag( pDecodedMessageOutputStream, uiVal, "colour_remap_video_signal_info_present_flag" );  sei.m_colourRemapVideoSignalInfoPresentFlag = uiVal;
-    if ( sei.m_colourRemapVideoSignalInfoPresentFlag )
-    {
-      sei_read_flag( pDecodedMessageOutputStream, uiVal,    "colour_remap_full_range_flag" );            sei.m_colourRemapFullRangeFlag = uiVal;
-      sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_primaries" );                  sei.m_colourRemapPrimaries = uiVal;
-      sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_transfer_function" );          sei.m_colourRemapTransferFunction = uiVal;
-      sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_matrix_coefficients" );        sei.m_colourRemapMatrixCoefficients = uiVal;
-    }
-    sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_input_bit_depth" );              sei.m_colourRemapInputBitDepth = uiVal;
-    sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "colour_remap_bit_depth" );                    sei.m_colourRemapBitDepth = uiVal;
+    fprintf (stderr, "no SPS available, not parsing Subpicture level information SEI");
+    return;
+  }
+  uint32_t val;
+  sei_read_code( pDecodedMessageOutputStream,   4,  val,    "sli_seq_parameter_set_id" );         sei.m_sliSeqParameterSetId  = val;
+  sei_read_code( pDecodedMessageOutputStream,   3,  val,    "num_ref_levels_minus1" );            sei.m_numRefLevels  = val + 1;
+  sei_read_flag( pDecodedMessageOutputStream,       val,    "explicit_fraction_present_flag" );   sei.m_explicitFractionPresentFlag = val;
+  
+  sei.m_refLevelIdc.resize(sei.m_numRefLevels);
+  if (sei.m_explicitFractionPresentFlag)
+  {
+    sei.m_refLevelFraction.resize(sei.m_numRefLevels);
+  }
 
-    for( int c=0 ; c<3 ; c++ )
+  for( int i = 0; i  <  sei.m_numRefLevels; i++ ) 
+  {
+    sei_read_code( pDecodedMessageOutputStream,   8,  val,    "ref_level_idc[i]" );         sei.m_refLevelIdc[i]  = (Level::Name) val;
+    if( sei.m_explicitFractionPresentFlag )
     {
-      sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "pre_lut_num_val_minus1[c]" ); sei.m_preLutNumValMinus1[c] = (uiVal==0) ? 1 : uiVal;
-      sei.m_preLut[c].resize(sei.m_preLutNumValMinus1[c]+1);
-      if( uiVal> 0 )
-      {
-        for ( int i=0 ; i<=sei.m_preLutNumValMinus1[c] ; i++ )
-        {
-          sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapInputBitDepth   + 7 ) >> 3 ) << 3, uiVal, "pre_lut_coded_value[c][i]" );  sei.m_preLut[c][i].codedValue  = uiVal;
-          sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, uiVal, "pre_lut_target_value[c][i]" ); sei.m_preLut[c][i].targetValue = uiVal;
-        }
-      }
-      else // pre_lut_num_val_minus1[c] == 0
-      {
-        sei.m_preLut[c][0].codedValue  = 0;
-        sei.m_preLut[c][0].targetValue = 0;
-        sei.m_preLut[c][1].codedValue  = (1 << sei.m_colourRemapInputBitDepth) - 1 ;
-        sei.m_preLut[c][1].targetValue = (1 << sei.m_colourRemapBitDepth) - 1 ;
-      }
-    }
+      int numSubPics = sps->getNumSubPics();
+      sei.m_refLevelFraction[i].resize(numSubPics);
 
-    sei_read_flag( pDecodedMessageOutputStream, uiVal,      "colour_remap_matrix_present_flag" ); sei.m_colourRemapMatrixPresentFlag = uiVal;
-    if( sei.m_colourRemapMatrixPresentFlag )
-    {
-      sei_read_code( pDecodedMessageOutputStream, 4, uiVal, "log2_matrix_denom" ); sei.m_log2MatrixDenom = uiVal;
-      for ( int c=0 ; c<3 ; c++ )
-      {
-        for ( int i=0 ; i<3 ; i++ )
-        {
-          sei_read_svlc( pDecodedMessageOutputStream, iVal, "colour_remap_coeffs[c][i]" ); sei.m_colourRemapCoeffs[c][i] = iVal;
-        }
-      }
-    }
-    else // setting default matrix (I3)
-    {
-      sei.m_log2MatrixDenom = 10;
-      for ( int c=0 ; c<3 ; c++ )
-      {
-        for ( int i=0 ; i<3 ; i++ )
-        {
-          sei.m_colourRemapCoeffs[c][i] = (c==i) << sei.m_log2MatrixDenom;
-        }
-      }
-    }
-    for( int c=0 ; c<3 ; c++ )
-    {
-      sei_read_code( pDecodedMessageOutputStream, 8, uiVal, "post_lut_num_val_minus1[c]" ); sei.m_postLutNumValMinus1[c] = (uiVal==0) ? 1 : uiVal;
-      sei.m_postLut[c].resize(sei.m_postLutNumValMinus1[c]+1);
-      if( uiVal > 0 )
+      for( int j = 0; j  <  numSubPics; j++ )
       {
-        for ( int i=0 ; i<=sei.m_postLutNumValMinus1[c] ; i++ )
-        {
-          sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, uiVal, "post_lut_coded_value[c][i]" );  sei.m_postLut[c][i].codedValue = uiVal;
-          sei_read_code( pDecodedMessageOutputStream, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, uiVal, "post_lut_target_value[c][i]" ); sei.m_postLut[c][i].targetValue = uiVal;
-        }
-      }
-      else
-      {
-        sei.m_postLut[c][0].codedValue  = 0;
-        sei.m_postLut[c][0].targetValue = 0;
-        sei.m_postLut[c][1].targetValue = (1 << sei.m_colourRemapBitDepth) - 1;
-        sei.m_postLut[c][1].codedValue  = (1 << sei.m_colourRemapBitDepth) - 1;
+        sei_read_code( pDecodedMessageOutputStream,   8,  val,    "ref_level_fraction_minus1[i][j]" );  sei.m_refLevelFraction[i][j]= val;
       }
     }
   }
 }
 
-void SEIReader::xParseSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
-{
-  uint32_t code;
-  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[0]" ); sei.values.primaries[0][0] = code;
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[0]" ); sei.values.primaries[0][1] = code;
-
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[1]" ); sei.values.primaries[1][0] = code;
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[1]" ); sei.values.primaries[1][1] = code;
-
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_x[2]" ); sei.values.primaries[2][0] = code;
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "display_primaries_y[2]" ); sei.values.primaries[2][1] = code;
-
-
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_x" ); sei.values.whitePoint[0] = code;
-  sei_read_code( pDecodedMessageOutputStream, 16, code, "white_point_y" ); sei.values.whitePoint[1] = code;
-
-  sei_read_code( pDecodedMessageOutputStream, 32, code, "max_display_mastering_luminance" ); sei.values.maxLuminance = code;
-  sei_read_code( pDecodedMessageOutputStream, 32, code, "min_display_mastering_luminance" ); sei.values.minLuminance = code;
-}
-
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-void SEIReader::xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream)
+void SEIReader::xParseSEISampleAspectRatioInfo(SEISampleAspectRatioInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
-  uint32_t code;
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+  uint32_t val;
 
-  sei_read_code(pDecodedMessageOutputStream, 8, code, "preferred_transfer_characteristics"); sei.m_preferredTransferCharacteristics = code;
+  sei_read_flag( pDecodedMessageOutputStream,           val,    "sari_cancel_flag" );                      sei.m_sariCancelFlag = val;
+  if (!sei.m_sariCancelFlag)
+  {
+    sei_read_flag( pDecodedMessageOutputStream,         val,    "sari_persistence_flag" );                 sei.m_sariPersistenceFlag = val;
+    sei_read_code( pDecodedMessageOutputStream,     8,  val,    "sari_aspect_ratio_idc" );                 sei.m_sariAspectRatioIdc = val;
+    if (sei.m_sariAspectRatioIdc == 255)
+    { 
+      sei_read_code( pDecodedMessageOutputStream,  16,  val,    "sari_sar_width" );                        sei.m_sariSarWidth = val;
+      sei_read_code( pDecodedMessageOutputStream,  16,  val,    "sari_sar_height" );                       sei.m_sariSarHeight = val;
+    }
+  }
 }
-#endif
 
-void SEIReader::xParseSEIGreenMetadataInfo(SEIGreenMetadataInfo& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream)
-{
-  uint32_t code;
-  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
-
-  sei_read_code(pDecodedMessageOutputStream, 8, code, "green_metadata_type");
-  sei.m_greenMetadataType = code;
 
-  sei_read_code(pDecodedMessageOutputStream, 8, code, "xsd_metric_type");
-  sei.m_xsdMetricType = code;
-
-  sei_read_code(pDecodedMessageOutputStream, 16, code, "xsd_metric_value");
-  sei.m_xsdMetricValue = code;
-}
 
 //! \}
diff --git a/source/Lib/DecoderLib/SEIread.h b/source/Lib/DecoderLib/SEIread.h
index 72892a7e6cd285a9dbb5b1f029fd291da81790a8..50988f70cfb87855d24968517629bfbe2b155f5e 100644
--- a/source/Lib/DecoderLib/SEIread.h
+++ b/source/Lib/DecoderLib/SEIread.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,39 +55,36 @@ class SEIReader: public VLCReader
 public:
   SEIReader() {};
   virtual ~SEIReader() {};
-  void parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream);
+  void parseSEImessage(InputBitstream* bs, SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream);
 
 protected:
-  void xReadSEImessage                        (SEIMessages& seis, const NalUnitType nalUnitType, const SPS *sps, std::ostream *pDecodedMessageOutputStream);
+  void xReadSEImessage                        (SEIMessages& seis, const NalUnitType nalUnitType, const uint32_t temporalId, const SPS *sps, HRD &hrd, std::ostream *pDecodedMessageOutputStream);
   void xParseSEIuserDataUnregistered          (SEIuserDataUnregistered &sei,          uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIActiveParameterSets           (SEIActiveParameterSets  &sei,          uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIDecodingUnitInfo              (SEIDecodingUnitInfo& sei,              uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIDecodingUnitInfo              (SEIDecodingUnitInfo& sei,              uint32_t payloadSize, const SEIBufferingPeriod& bp, const uint32_t temporalId, std::ostream *pDecodedMessageOutputStream);
   void xParseSEIDecodedPictureHash            (SEIDecodedPictureHash& sei,            uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIBufferingPeriod               (SEIBufferingPeriod& sei,               uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIPictureTiming                 (SEIPictureTiming& sei,                 uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIRecoveryPoint                 (SEIRecoveryPoint& sei,                 uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIBufferingPeriod               (SEIBufferingPeriod& sei,               uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIPictureTiming                 (SEIPictureTiming& sei,                 uint32_t payloadSize, const uint32_t temporalId, const SEIBufferingPeriod& bp, std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIFrameFieldinfo                (SEIFrameFieldInfo& sei,                 uint32_t payloadSize,                    std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIDependentRAPIndication        (SEIDependentRAPIndication& sei,        uint32_t payLoadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIFramePacking                  (SEIFramePacking& sei,                  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEISegmentedRectFramePacking     (SEISegmentedRectFramePacking& sei,     uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIDisplayOrientation            (SEIDisplayOrientation &sei,            uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEITemporalLevel0Index           (SEITemporalLevel0Index &sei,           uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIRegionRefreshInfo             (SEIGradualDecodingRefreshInfo &sei,    uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEINoDisplay                     (SEINoDisplay &sei,                     uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIToneMappingInfo               (SEIToneMappingInfo& sei,               uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEISOPDescription                (SEISOPDescription &sei,                uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIScalableNesting               (SEIScalableNesting& sei, const NalUnitType nalUnitType, uint32_t payloadSize, const SPS *sps, std::ostream *pDecodedMessageOutputStream);
-#if HEVC_TILES_WPP
-  void xParseSEITempMotionConstraintsTileSets (SEITempMotionConstrainedTileSets& sei, uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-#endif
-  void xParseSEITimeCode                      (SEITimeCode& sei,                      uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIChromaResamplingFilterHint    (SEIChromaResamplingFilterHint& sei,    uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIKneeFunctionInfo              (SEIKneeFunctionInfo& sei,              uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIMasteringDisplayColourVolume  (SEIMasteringDisplayColourVolume& sei,  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
-  void xParseSEIColourRemappingInfo           (SEIColourRemappingInfo& sei,           uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   void xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei,              uint32_t payLoadSize,                     std::ostream *pDecodedMessageOutputStream);
 #endif
-  void xParseSEIGreenMetadataInfo             (SEIGreenMetadataInfo& sei,             uint32_t payLoadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIEquirectangularProjection     (SEIEquirectangularProjection &sei,     uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEISphereRotation                (SEISphereRotation &sei,                uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIOmniViewport                  (SEIOmniViewport& sei,                  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIRegionWisePacking             (SEIRegionWisePacking& sei,             uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIGeneralizedCubemapProjection  (SEIGeneralizedCubemapProjection &sei,  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEISubpictureLevelInfo           (SEISubpicureLevelInfo& sei, const SPS *sps, uint32_t payloadSize,            std::ostream *pDecodedMessageOutputStream);
+  void xParseSEISampleAspectRatioInfo         (SEISampleAspectRatioInfo& sei,         uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIUserDataRegistered            (SEIUserDataRegistered& sei,            uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIFilmGrainCharacteristics      (SEIFilmGrainCharacteristics& sei,      uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIContentLightLevelInfo         (SEIContentLightLevelInfo& sei,         uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIAmbientViewingEnvironment     (SEIAmbientViewingEnvironment& sei,     uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIContentColourVolume           (SEIContentColourVolume& sei,           uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
 
+  void sei_read_scode(std::ostream *pOS, uint32_t length, int& code, const char *pSymbolName);
   void sei_read_code(std::ostream *pOS, uint32_t uiLength, uint32_t& ruiCode, const char *pSymbolName);
   void sei_read_uvlc(std::ostream *pOS,                uint32_t& ruiCode, const char *pSymbolName);
   void sei_read_svlc(std::ostream *pOS,                int&  ruiCode, const char *pSymbolName);
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 896ca3944445b54420348ac33381dbe80de66f1f..19bca72393d5cc00328ea8c9ff6faf64e2352dc0 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -47,7 +47,6 @@
 #endif
 #include "CommonLib/AdaptiveLoopFilter.h"
 
-
 #if ENABLE_TRACING
 
 void  VLCReader::xReadCodeTr(uint32_t length, uint32_t& rValue, const char *pSymbolName)
@@ -104,6 +103,31 @@ void xTraceFillerData ()
 
 #endif
 
+#if RExt__DECODER_DEBUG_BIT_STATISTICS || ENABLE_TRACING
+void VLCReader::xReadSCode (uint32_t length, int& value, const char *pSymbolName)
+#else
+void VLCReader::xReadSCode (uint32_t length, int& value)
+#endif
+{
+  uint32_t val;
+  assert ( length > 0 && length<=32);
+  m_pcBitstream->read (length, val);
+  value= length>=32 ? int(val) : ( (-int( val & (uint32_t(1)<<(length-1)))) | int(val) );
+
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+  CodingStatistics::IncrementStatisticEP(pSymbolName, length, value);
+#endif
+#if ENABLE_TRACING
+  if (length < 10)
+  {
+    DTRACE( g_trace_ctx, D_HEADER, "%-50s i(%d)  : %d\n", pSymbolName, length, value );
+  }
+  else
+  {
+    DTRACE( g_trace_ctx, D_HEADER, "%-50s i(%d) : %d\n", pSymbolName, length, value );
+  }
+#endif
+}
 
 // ====================================================================================================================
 // Protected member functions
@@ -114,7 +138,7 @@ void VLCReader::xReadCode (uint32_t uiLength, uint32_t& ruiCode, const char *pSy
 void VLCReader::xReadCode (uint32_t uiLength, uint32_t& ruiCode)
 #endif
 {
-  CHECK( uiLength == 0, "Reading a code of lenght '0'" );
+  CHECK( uiLength == 0, "Reading a code of length '0'" );
   m_pcBitstream->read (uiLength, ruiCode);
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
   CodingStatistics::IncrementStatisticEP(pSymbolName, uiLength, ruiCode);
@@ -270,106 +294,118 @@ HLSyntaxReader::~HLSyntaxReader()
 // Public member functions
 // ====================================================================================================================
 
-void HLSyntaxReader::parseShortTermRefPicSet( SPS* sps, ReferencePictureSet* rps, int idx )
+void HLSyntaxReader::copyRefPicList(SPS* sps, ReferencePictureList* source_rpl, ReferencePictureList* dest_rp)
 {
-  uint32_t code;
-  uint32_t interRPSPred;
-  if (idx > 0)
+  dest_rp->setNumberOfShorttermPictures(source_rpl->getNumberOfShorttermPictures());
+
+  dest_rp->setNumberOfInterLayerPictures( sps->getInterLayerPresentFlag() ? dest_rp->getNumberOfInterLayerPictures() : 0 );
+
+  if( sps->getLongTermRefsPresent() )
   {
-    READ_FLAG(interRPSPred, "inter_ref_pic_set_prediction_flag");  rps->setInterRPSPrediction(interRPSPred);
+    dest_rp->setNumberOfLongtermPictures( dest_rp->getNumberOfLongtermPictures() );
   }
   else
+    dest_rp->setNumberOfLongtermPictures(0);
+
+  uint32_t numRefPic = dest_rp->getNumberOfShorttermPictures() + dest_rp->getNumberOfLongtermPictures();
+
+  for( int ii = 0; ii < numRefPic; ii++ )
+  {
+    dest_rp->setRefPicIdentifier( ii, source_rpl->getRefPicIdentifier( ii ), source_rpl->isRefPicLongterm( ii ), source_rpl->isInterLayerRefPic( ii ), source_rpl->getInterLayerRefPicIdx( ii ) );
+  }
+}
+
+void HLSyntaxReader::parseRefPicList(SPS* sps, ReferencePictureList* rpl)
+{
+  uint32_t code;
+  READ_UVLC(code, "num_ref_entries[ listIdx ][ rplsIdx ]");
+  uint32_t numRefPic = code;
+  uint32_t numStrp = 0;
+  uint32_t numLtrp = 0;
+  uint32_t numIlrp = 0;
+
+  if (sps->getLongTermRefsPresent())
   {
-    interRPSPred = false;
-    rps->setInterRPSPrediction(false);
+    READ_FLAG(code, "ltrp_in_slice_header_flag[ listIdx ][ rplsIdx ]");
+    rpl->setLtrpInSliceHeaderFlag(code);
   }
 
-  if (interRPSPred)
+  bool isLongTerm;
+  int prevDelta = MAX_INT;
+  int deltaValue = 0;
+  bool firstSTRP = true;
+
+  rpl->setInterLayerPresentFlag( sps->getInterLayerPresentFlag() );
+
+  for (int ii = 0; ii < numRefPic; ii++)
   {
-    uint32_t bit;
-    if(idx == sps->getRPSList()->getNumberOfReferencePictureSets())
+    uint32_t isInterLayerRefPic = 0;
+
+    if( rpl->getInterLayerPresentFlag() )
     {
-      READ_UVLC(code, "delta_idx_minus1" ); // delta index of the Reference Picture Set used for prediction minus 1
+      READ_FLAG( isInterLayerRefPic, "inter_layer_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]" );
+
+      if( isInterLayerRefPic )
+      {
+        READ_UVLC( code, "ilrp_idx[ listIdx ][ rplsIdx ][ i ]" );
+        rpl->setRefPicIdentifier( ii, 0, true, true, code );
+        numIlrp++;
+      }
     }
-    else
+
+    if( !isInterLayerRefPic )
     {
-      code = 0;
+    isLongTerm = false;
+    if (sps->getLongTermRefsPresent())
+    {
+      READ_FLAG(code, "st_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]");
+      isLongTerm = (code == 1) ? false : true;
     }
-    rps->setDeltaRIdxMinus1(code); // th we need that for proper transcoding
-    CHECK(code > idx-1, "Code exceeds boundary"); // delta_idx_minus1 shall not be larger than idx-1, otherwise we will predict from a negative row position that does not exist. When idx equals 0 there is no legal value and interRPSPred must be zero. See J0185-r2
-    int rIdx =  idx - 1 - code;
-    CHECK(rIdx > idx-1 || rIdx < 0, "Invalid index"); // Made assert tighter; if rIdx = idx then prediction is done from itself. rIdx must belong to range 0, idx-1, inclusive, see J0185-r2
-    ReferencePictureSet*   rpsRef = sps->getRPSList()->getReferencePictureSet(rIdx);
-    int k = 0, k0 = 0, k1 = 0;
-    READ_CODE(1, bit, "delta_rps_sign"); // delta_RPS_sign
-    READ_UVLC(code, "abs_delta_rps_minus1");  // absolute delta RPS minus 1
-    int deltaRPS = (1 - 2 * bit) * (code + 1); // delta_RPS
-
-    rps->setDeltaRPS( deltaRPS ); // th we need that for proper transcoding
+    else
+      isLongTerm = false;
 
-    for(int j=0 ; j <= rpsRef->getNumberOfPictures(); j++)
+    if (!isLongTerm)
     {
-      READ_CODE(1, bit, "used_by_curr_pic_flag" ); //first bit is "1" if Idc is 1
-      int refIdc = bit;
-      if (refIdc == 0)
+      READ_UVLC(code, "abs_delta_poc_st[ listIdx ][ rplsIdx ][ i ]");
+      if( !sps->getUseWP() && !sps->getUseWPBiPred() )
       {
-        READ_CODE(1, bit, "use_delta_flag" ); //second bit is "1" if Idc is 2, "0" otherwise.
-        refIdc = bit<<1; //second bit is "1" if refIdc is 2, "0" if refIdc = 0.
+        code++;
       }
-      if (refIdc == 1 || refIdc == 2)
+      int readValue = code;
+      if (readValue > 0)
+        READ_FLAG(code, "strp_entry_sign_flag[ listIdx ][ rplsIdx ][ i ]");
+      else
+        code = 1;
+      readValue = (code) ? readValue : 0 - readValue; //true means positive delta POC -- false otherwise
+      if (firstSTRP)
       {
-        int deltaPOC = deltaRPS + ((j < rpsRef->getNumberOfPictures())? rpsRef->getDeltaPOC(j) : 0);
-        rps->setDeltaPOC(k, deltaPOC);
-        rps->setUsed(k, (refIdc == 1));
-
-        if (deltaPOC < 0)
-        {
-          k0++;
-        }
-        else
-        {
-          k1++;
-        }
-        k++;
+        firstSTRP = false;
+        prevDelta = deltaValue = readValue;
+      }
+      else
+      {
+        deltaValue = prevDelta + readValue;
+        prevDelta = deltaValue;
       }
-      rps->setRefIdc(j,refIdc);
+
+      rpl->setRefPicIdentifier( ii, deltaValue, isLongTerm, false, 0 );
+      numStrp++;
     }
-    rps->setNumRefIdc(rpsRef->getNumberOfPictures()+1);
-    rps->setNumberOfPictures(k);
-    rps->setNumberOfNegativePictures(k0);
-    rps->setNumberOfPositivePictures(k1);
-    rps->sortDeltaPOC();
-  }
-  else
-  {
-    READ_UVLC(code, "num_negative_pics");           rps->setNumberOfNegativePictures(code);
-    READ_UVLC(code, "num_positive_pics");           rps->setNumberOfPositivePictures(code);
-    int prev = 0;
-    int poc;
-    for(int j=0 ; j < rps->getNumberOfNegativePictures(); j++)
+    else
     {
-      READ_UVLC(code, "delta_poc_s0_minus1");
-      poc = prev-code-1;
-      prev = poc;
-      rps->setDeltaPOC(j,poc);
-      READ_FLAG(code, "used_by_curr_pic_s0_flag");  rps->setUsed(j,code);
+      if (!rpl->getLtrpInSliceHeaderFlag())
+        READ_CODE(sps->getBitsForPOC(), code, "poc_lsb_lt[listIdx][rplsIdx][j]");
+      rpl->setRefPicIdentifier( ii, code, isLongTerm, false, 0 );
+      numLtrp++;
     }
-    prev = 0;
-    for(int j=rps->getNumberOfNegativePictures(); j < rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures(); j++)
-    {
-      READ_UVLC(code, "delta_poc_s1_minus1");
-      poc = prev+code+1;
-      prev = poc;
-      rps->setDeltaPOC(j,poc);
-      READ_FLAG(code, "used_by_curr_pic_s1_flag");  rps->setUsed(j,code);
     }
-    rps->setNumberOfPictures(rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures());
   }
-
-  rps->printDeltaPOC();
+  rpl->setNumberOfShorttermPictures(numStrp);
+  rpl->setNumberOfLongtermPictures(numLtrp);
+  rpl->setNumberOfInterLayerPictures( numIlrp );
 }
 
-void HLSyntaxReader::parsePPS( PPS* pcPPS )
+void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetManager )
 {
 #if ENABLE_TRACING
   xTracePPSHeader ();
@@ -382,19 +418,167 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
   CHECK(uiCode > 63, "PPS id exceeds boundary (63)");
   pcPPS->setPPSId (uiCode);
 
-  READ_UVLC( uiCode, "pps_seq_parameter_set_id");
-  CHECK(uiCode > 15, "SPS id exceeds boundary (15)");
+  READ_CODE(4, uiCode, "pps_seq_parameter_set_id");
   pcPPS->setSPSId (uiCode);
 
-#if HEVC_DEPENDENT_SLICES
-  READ_FLAG( uiCode, "dependent_slice_segments_enabled_flag"    );    pcPPS->setDependentSliceSegmentsEnabledFlag   ( uiCode == 1 );
-#endif
+  READ_UVLC( uiCode, "pic_width_in_luma_samples" );          pcPPS->setPicWidthInLumaSamples( uiCode );
+  READ_UVLC( uiCode, "pic_height_in_luma_samples" );         pcPPS->setPicHeightInLumaSamples( uiCode );
+
+  READ_FLAG( uiCode, "conformance_window_flag" );
+  if( uiCode != 0 )
+  {
+    Window &conf = pcPPS->getConformanceWindow();
+    READ_UVLC( uiCode, "conf_win_left_offset" );               conf.setWindowLeftOffset( uiCode );
+    READ_UVLC( uiCode, "conf_win_right_offset" );              conf.setWindowRightOffset( uiCode );
+    READ_UVLC( uiCode, "conf_win_top_offset" );                conf.setWindowTopOffset( uiCode );
+    READ_UVLC( uiCode, "conf_win_bottom_offset" );             conf.setWindowBottomOffset( uiCode );
+  }
+
+  READ_FLAG( uiCode, "scaling_window_flag" );
+  if( uiCode != 0 )
+  {
+    Window &scalingWindow = pcPPS->getScalingWindow();
+    READ_UVLC( uiCode, "scaling_win_left_offset" );               scalingWindow.setWindowLeftOffset( uiCode );
+    READ_UVLC( uiCode, "scaling_win_right_offset" );              scalingWindow.setWindowRightOffset( uiCode );
+    READ_UVLC( uiCode, "scaling_win_top_offset" );                scalingWindow.setWindowTopOffset( uiCode );
+    READ_UVLC( uiCode, "scaling_win_bottom_offset" );             scalingWindow.setWindowBottomOffset( uiCode );
+  }
 
   READ_FLAG( uiCode, "output_flag_present_flag" );                    pcPPS->setOutputFlagPresentFlag( uiCode==1 );
 
-  READ_CODE(3, uiCode, "num_extra_slice_header_bits");                pcPPS->setNumExtraSliceHeaderBits(uiCode);
+  READ_FLAG(uiCode, "pps_subpic_id_signalling_present_flag");              pcPPS->setSubPicIdSignallingPresentFlag( uiCode != 0 );
+  if( pcPPS->getSubPicIdSignallingPresentFlag() )
+  {
+    READ_UVLC( uiCode, "pps_num_subpics_minus1" );                         pcPPS->setNumSubPics( uiCode + 1 );
+    CHECK( uiCode > MAX_NUM_SUB_PICS-1,  "Number of sub-pictures exceeds limit");
+
+    READ_UVLC( uiCode, "pps_subpic_id_len_minus1" );                       pcPPS->setSubPicIdLen( uiCode + 1 );
+    CHECK( uiCode > 15, "Invalid pps_subpic_id_len_minus1 signalled");
+
+    for( int picIdx = 0; picIdx < pcPPS->getNumSubPics( ); picIdx++ )
+    {
+      READ_CODE( pcPPS->getSubPicIdLen( ), uiCode, "pps_subpic_id[i]" );   pcPPS->setSubPicId( picIdx, uiCode );
+    }
+  }
+  else 
+  {
+    for( int picIdx = 0; picIdx < MAX_NUM_SUB_PICS; picIdx++ )
+    {
+      pcPPS->setSubPicId( picIdx, picIdx );
+    }
+  }
+
+
+  READ_FLAG( uiCode, "no_pic_partition_flag" );                       pcPPS->setNoPicPartitionFlag( uiCode == 1 );
+  if(!pcPPS->getNoPicPartitionFlag())
+  {
+    int colIdx, rowIdx;
+    pcPPS->resetTileSliceInfo();
+
+    // CTU size - required to match size in SPS
+    READ_CODE(2, uiCode, "pps_log2_ctu_size_minus5");                 pcPPS->setLog2CtuSize(uiCode + 5);
+    CHECK(uiCode > 2, "pps_log2_ctu_size_minus5 must be less than or equal to 2");
+    
+    // number of explicit tile columns/rows
+    READ_UVLC( uiCode, "num_exp_tile_columns_minus1" );               pcPPS->setNumExpTileColumns( uiCode + 1 );
+    READ_UVLC( uiCode, "num_exp_tile_rows_minus1" );                  pcPPS->setNumExpTileRows( uiCode + 1 );
+    CHECK(pcPPS->getNumExpTileColumns() > MAX_TILE_COLS,              "Number of explicit tile columns exceeds valid range");
+    CHECK(pcPPS->getNumExpTileRows() > MAX_TILE_ROWS,                 "Number of explicit tile rows exceeds valid range");
+    
+    // tile sizes
+    for( colIdx = 0; colIdx < pcPPS->getNumExpTileColumns(); colIdx++ )
+    {
+      READ_UVLC( uiCode, "tile_column_width_minus1[i]" );             pcPPS->addTileColumnWidth( uiCode + 1 );
+    }
+    for( rowIdx = 0; rowIdx < pcPPS->getNumExpTileRows(); rowIdx++ )
+    {
+      READ_UVLC( uiCode, "tile_row_height_minus1[i]" );               pcPPS->addTileRowHeight( uiCode + 1 );
+    }
+    pcPPS->initTiles();
+     
+    // rectangular slice signalling
+    READ_CODE(1, uiCode, "rect_slice_flag");                          pcPPS->setRectSliceFlag( uiCode == 1 );
+    if (pcPPS->getRectSliceFlag()) 
+    {
+      READ_FLAG(uiCode, "single_slice_per_subpic_flag");            pcPPS->setSingleSlicePerSubPicFlag(uiCode == 1);
+    }
+    if (pcPPS->getRectSliceFlag() & !(pcPPS->getSingleSlicePerSubPicFlag()))
+    {
+      int32_t tileIdx = 0;
+
+      READ_UVLC( uiCode, "num_slices_in_pic_minus1" );                pcPPS->setNumSlicesInPic( uiCode + 1 );
+      CHECK(pcPPS->getNumSlicesInPic() > MAX_SLICES,                  "Number of slices in picture exceeds valid range");
+      READ_CODE(1, uiCode, "tile_idx_delta_present_flag");            pcPPS->setTileIdxDeltaPresentFlag( uiCode == 1 );
+      pcPPS->initRectSlices();
+      
+      // read rectangular slice parameters
+      for( int i = 0; i < pcPPS->getNumSlicesInPic()-1; i++ )
+      {
+        pcPPS->setSliceTileIdx( i, tileIdx );
+
+        // complete tiles within a single slice
+        READ_UVLC( uiCode, "slice_width_in_tiles_minus1[i]" );        pcPPS->setSliceWidthInTiles ( i, uiCode + 1 );
+#if JVET_Q0480_RASTER_RECT_SLICES
+        if( pcPPS->getTileIdxDeltaPresentFlag() || ( (tileIdx % pcPPS->getNumTileColumns()) == 0 ) )
+        {
+          READ_UVLC( uiCode, "slice_height_in_tiles_minus1[i]" );     pcPPS->setSliceHeightInTiles( i, uiCode + 1 );
+        }
+        else 
+        {
+          pcPPS->setSliceHeightInTiles( i, pcPPS->getSliceHeightInTiles(i-1) );
+        }
+#else
+        READ_UVLC( uiCode, "slice_height_in_tiles_minus1[i]" );       pcPPS->setSliceHeightInTiles( i, uiCode + 1 );
+#endif
+
+        // multiple slices within a single tile special case
+        if( pcPPS->getSliceWidthInTiles( i ) == 1 && pcPPS->getSliceHeightInTiles( i ) == 1 ) 
+        {
+          READ_UVLC( uiCode, "num_slices_in_tile_minus1[i]" );        pcPPS->setNumSlicesInTile( i, uiCode + 1 );
+          uint32_t numSlicesInTile = pcPPS->getNumSlicesInTile( i );
+          for( int j = 0; j < numSlicesInTile-1; j++ )
+          {
+            READ_UVLC( uiCode, "slice_height_in_ctu_minus1[i]" );     pcPPS->setSliceHeightInCtu( i, uiCode + 1 );
+            i++;
+            pcPPS->setSliceWidthInTiles ( i, 1 );
+            pcPPS->setSliceHeightInTiles( i, 1 );
+            pcPPS->setNumSlicesInTile   ( i, numSlicesInTile );
+            pcPPS->setSliceTileIdx      ( i, tileIdx );
+          }
+        }
+
+        // tile index offset to start of next slice
+        if( i < pcPPS->getNumSlicesInPic()-1 )
+        {
+          if( pcPPS->getTileIdxDeltaPresentFlag() ) 
+          {
+            int32_t  tileIdxDelta;
+            READ_SVLC( tileIdxDelta, "tile_idx_delta[i]" );
+            tileIdx += tileIdxDelta;
+            CHECK( tileIdx < 0 || tileIdx >= pcPPS->getNumTiles(), "Invalid tile_idx_delta.");
+          }
+          else
+          {
+            tileIdx += pcPPS->getSliceWidthInTiles( i );
+            if( tileIdx % pcPPS->getNumTileColumns() == 0)
+            {
+              tileIdx += (pcPPS->getSliceHeightInTiles( i ) - 1) * pcPPS->getNumTileColumns();
+            }
+          }
+        }
+      }
+      pcPPS->setSliceTileIdx(pcPPS->getNumSlicesInPic()-1, tileIdx );
+      
+      // initialize mapping between rectangular slices and CTUs
+      pcPPS->initRectSliceMap();
+    }
 
+    // loop filtering across slice/tile controls
+    READ_CODE(1, uiCode, "loop_filter_across_tiles_enabled_flag");    pcPPS->setLoopFilterAcrossTilesEnabledFlag( uiCode == 1 );
+    READ_CODE(1, uiCode, "loop_filter_across_slices_enabled_flag");   pcPPS->setLoopFilterAcrossSlicesEnabledFlag( uiCode == 1 );
+  }
 
+  READ_FLAG(uiCode, "entropy_coding_sync_enabled_flag");       pcPPS->setEntropyCodingSyncEnabledFlag(uiCode == 1);
   READ_FLAG( uiCode,   "cabac_init_present_flag" );            pcPPS->setCabacInitPresentFlag( uiCode ? true : false );
 
   READ_UVLC(uiCode, "num_ref_idx_l0_default_active_minus1");
@@ -405,21 +589,15 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
   CHECK(uiCode > 14, "Invalid code read");
   pcPPS->setNumRefIdxL1DefaultActive(uiCode+1);
 
+  READ_FLAG(uiCode, "rpl1_idx_present_flag");
+  pcPPS->setRpl1IdxPresentFlag(uiCode);
+
+
   READ_SVLC(iCode, "init_qp_minus26" );                            pcPPS->setPicInitQPMinus26(iCode);
-  READ_FLAG( uiCode, "constrained_intra_pred_flag" );              pcPPS->setConstrainedIntraPred( uiCode ? true : false );
-  READ_FLAG( uiCode, "transform_skip_enabled_flag" );
-  pcPPS->setUseTransformSkip ( uiCode ? true : false );
+  READ_UVLC(uiCode, "log2_transform_skip_max_size_minus2");
+  pcPPS->setLog2MaxTransformSkipBlockSize(uiCode + 2);
 
   READ_FLAG( uiCode, "cu_qp_delta_enabled_flag" );            pcPPS->setUseDQP( uiCode ? true : false );
-  if( pcPPS->getUseDQP() )
-  {
-    READ_UVLC( uiCode, "cu_qp_delta_subdiv" );
-    pcPPS->setCuQpDeltaSubdiv( uiCode );
-  }
-  else
-  {
-    pcPPS->setCuQpDeltaSubdiv( 0 );
-  }
   READ_SVLC( iCode, "pps_cb_qp_offset");
   pcPPS->setQpOffset(COMPONENT_Cb, iCode);
   CHECK( pcPPS->getQpOffset(COMPONENT_Cb) < -12, "Invalid Cb QP offset" );
@@ -430,62 +608,67 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
   CHECK( pcPPS->getQpOffset(COMPONENT_Cr) < -12, "Invalid Cr QP offset" );
   CHECK( pcPPS->getQpOffset(COMPONENT_Cr) >  12, "Invalid Cr QP offset" );
 
+  READ_FLAG(uiCode, "pps_joint_cbcr_qp_offset_present_flag");
+  pcPPS->setJointCbCrQpOffsetPresentFlag(uiCode ? true : false);
+
+  if (pcPPS->getJointCbCrQpOffsetPresentFlag())
+  {
+    READ_SVLC(iCode, "pps_joint_cbcr_qp_offset_value");
+  }
+  else
+  {
+    iCode = 0;
+  }
+  pcPPS->setQpOffset(JOINT_CbCr, iCode);
+
+  CHECK( pcPPS->getQpOffset(JOINT_CbCr) < -12, "Invalid CbCr QP offset" );
+  CHECK( pcPPS->getQpOffset(JOINT_CbCr) >  12, "Invalid CbCr QP offset" );
+
   CHECK(MAX_NUM_COMPONENT>3, "Invalid maximal number of components");
 
   READ_FLAG( uiCode, "pps_slice_chroma_qp_offsets_present_flag" );
   pcPPS->setSliceChromaQpFlag( uiCode ? true : false );
 
-  READ_FLAG( uiCode, "weighted_pred_flag" );          // Use of Weighting Prediction (P_SLICE)
-  pcPPS->setUseWP( uiCode==1 );
-  READ_FLAG( uiCode, "weighted_bipred_flag" );         // Use of Bi-Directional Weighting Prediction (B_SLICE)
-  pcPPS->setWPBiPred( uiCode==1 );
-
-  READ_FLAG( uiCode, "transquant_bypass_enabled_flag");
-  pcPPS->setTransquantBypassEnabledFlag(uiCode ? true : false);
-#if HEVC_TILES_WPP
-  READ_FLAG( uiCode, "tiles_enabled_flag" );    pcPPS->setTilesEnabledFlag( uiCode == 1 );
-#endif
-#if HEVC_TILES_WPP
-  READ_FLAG( uiCode, "entropy_coding_sync_enabled_flag" );    pcPPS->setEntropyCodingSyncEnabledFlag( uiCode == 1 );
-
-  if( pcPPS->getTilesEnabledFlag() )
+  READ_FLAG( uiCode, "pps_cu_chroma_qp_offset_enabled_flag");
+  if (uiCode == 0)
   {
-    READ_UVLC ( uiCode, "num_tile_columns_minus1" );                pcPPS->setNumTileColumnsMinus1( uiCode );
-    READ_UVLC ( uiCode, "num_tile_rows_minus1" );                   pcPPS->setNumTileRowsMinus1( uiCode );
-    READ_FLAG ( uiCode, "uniform_spacing_flag" );                   pcPPS->setTileUniformSpacingFlag( uiCode == 1 );
-
-    const uint32_t tileColumnsMinus1 = pcPPS->getNumTileColumnsMinus1();
-    const uint32_t tileRowsMinus1    = pcPPS->getNumTileRowsMinus1();
+    pcPPS->clearChromaQpOffsetList();
+  }
+  else
+  {
+    uint32_t tableSizeMinus1 = 0;
+    READ_UVLC(tableSizeMinus1, "chroma_qp_offset_list_len_minus1");
+    CHECK(tableSizeMinus1 >= MAX_QP_OFFSET_LIST_SIZE, "Table size exceeds maximum");
 
-    if ( !pcPPS->getTileUniformSpacingFlag())
+    for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx <= (tableSizeMinus1); cuChromaQpOffsetIdx++)
     {
-      if (tileColumnsMinus1 > 0)
+      int cbOffset;
+      int crOffset;
+      int jointCbCrOffset;
+      READ_SVLC(cbOffset, "cb_qp_offset_list[i]");
+      CHECK(cbOffset < -12 || cbOffset > 12, "Invalid chroma QP offset");
+      READ_SVLC(crOffset, "cr_qp_offset_list[i]");
+      CHECK(crOffset < -12 || crOffset > 12, "Invalid chroma QP offset");
+      if (pcPPS->getJointCbCrQpOffsetPresentFlag())
       {
-        std::vector<int> columnWidth(tileColumnsMinus1);
-        for(uint32_t i = 0; i < tileColumnsMinus1; i++)
-        {
-          READ_UVLC( uiCode, "column_width_minus1" );
-          columnWidth[i] = uiCode+1;
-        }
-        pcPPS->setTileColumnWidth(columnWidth);
+        READ_SVLC(jointCbCrOffset, "joint_cbcr_qp_offset_list[i]");
       }
-
-      if (tileRowsMinus1 > 0)
+      else
       {
-        std::vector<int> rowHeight (tileRowsMinus1);
-        for(uint32_t i = 0; i < tileRowsMinus1; i++)
-        {
-          READ_UVLC( uiCode, "row_height_minus1" );
-          rowHeight[i] = uiCode + 1;
-        }
-        pcPPS->setTileRowHeight(rowHeight);
+        jointCbCrOffset = 0;
       }
+      CHECK(jointCbCrOffset < -12 || jointCbCrOffset > 12, "Invalid chroma QP offset");
+      // table uses +1 for index (see comment inside the function)
+      pcPPS->setChromaQpOffsetListEntry(cuChromaQpOffsetIdx + 1, cbOffset, crOffset, jointCbCrOffset);
     }
-    CHECK((tileColumnsMinus1 + tileRowsMinus1) == 0, "Invalid tile configuration");
-    READ_FLAG ( uiCode, "loop_filter_across_tiles_enabled_flag" );     pcPPS->setLoopFilterAcrossTilesEnabledFlag( uiCode ? true : false );
+    CHECK(pcPPS->getChromaQpOffsetListLen() != tableSizeMinus1 + 1, "Invalid chroma QP offset list length");
   }
-#endif
-  READ_FLAG( uiCode, "pps_loop_filter_across_slices_enabled_flag" );   pcPPS->setLoopFilterAcrossSlicesEnabledFlag( uiCode ? true : false );
+
+  READ_FLAG( uiCode, "weighted_pred_flag" );          // Use of Weighting Prediction (P_SLICE)
+  pcPPS->setUseWP( uiCode==1 );
+  READ_FLAG( uiCode, "weighted_bipred_flag" );         // Use of Bi-Directional Weighting Prediction (B_SLICE)
+  pcPPS->setWPBiPred( uiCode==1 );
+
   READ_FLAG( uiCode, "deblocking_filter_control_present_flag" );       pcPPS->setDeblockingFilterControlPresentFlag( uiCode ? true : false );
   if(pcPPS->getDeblockingFilterControlPresentFlag())
   {
@@ -497,21 +680,31 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
       READ_SVLC ( iCode, "pps_tc_offset_div2" );                       pcPPS->setDeblockingFilterTcOffsetDiv2( iCode );
     }
   }
-#if HEVC_USE_SCALING_LISTS
-  READ_FLAG( uiCode, "pps_scaling_list_data_present_flag" );           pcPPS->setScalingListPresentFlag( uiCode ? true : false );
-  if(pcPPS->getScalingListPresentFlag ())
+  READ_FLAG( uiCode, "constant_slice_header_params_enabled_flag"); pcPPS->setConstantSliceHeaderParamsEnabledFlag(uiCode);
+  if ( pcPPS->getConstantSliceHeaderParamsEnabledFlag() ) {
+    READ_CODE( 2, uiCode, "pps_dep_quant_enabled_idc");        pcPPS->setPPSDepQuantEnabledIdc(uiCode);
+    READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[0]");      pcPPS->setPPSRefPicListSPSIdc0(uiCode);
+    READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[1]");      pcPPS->setPPSRefPicListSPSIdc1(uiCode);
+    READ_CODE( 2, uiCode, "pps_mvd_l1_zero_idc");              pcPPS->setPPSMvdL1ZeroIdc(uiCode);
+    READ_CODE( 2, uiCode, "pps_collocated_from_l0_idc");       pcPPS->setPPSCollocatedFromL0Idc(uiCode);
+    READ_UVLC( uiCode, "pps_six_minus_max_num_merge_cand_plus1"); pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(uiCode);
+    READ_UVLC( uiCode, "pps_max_num_merge_cand_minus_max_num_triangle_cand_plus1");pcPPS->setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(uiCode);
+  }
+  else
   {
-    parseScalingList( &(pcPPS->getScalingList()) );
+    pcPPS->setPPSDepQuantEnabledIdc(0);
+    pcPPS->setPPSRefPicListSPSIdc0(0);
+    pcPPS->setPPSRefPicListSPSIdc1(0);
+    pcPPS->setPPSMvdL1ZeroIdc(0);
+    pcPPS->setPPSCollocatedFromL0Idc(0);
+    pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(0);
+    pcPPS->setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(0);
   }
-#endif
 
-  READ_FLAG( uiCode, "lists_modification_present_flag");
-  pcPPS->setListsModificationPresentFlag(uiCode);
 
-  READ_UVLC( uiCode, "log2_parallel_merge_level_minus2");
-  pcPPS->setLog2ParallelMergeLevelMinus2 (uiCode);
-
-  READ_FLAG( uiCode, "slice_segment_header_extension_present_flag");
+  READ_FLAG( uiCode, "picture_header_extension_present_flag");
+  pcPPS->setPictureHeaderExtensionPresentFlag(uiCode);
+  READ_FLAG( uiCode, "slice_header_extension_present_flag");
   pcPPS->setSliceHeaderExtensionPresentFlag(uiCode);
 
 
@@ -548,42 +741,9 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
           PPSRExt &ppsRangeExtension = pcPPS->getPpsRangeExtension();
           CHECK(bSkipTrailingExtensionBits, "Invalid state");
 
-          if (pcPPS->getUseTransformSkip())
-          {
-            READ_UVLC( uiCode, "log2_max_transform_skip_block_size_minus2");
-            ppsRangeExtension.setLog2MaxTransformSkipBlockSize(uiCode+2);
-          }
-
           READ_FLAG( uiCode, "cross_component_prediction_enabled_flag");
           ppsRangeExtension.setCrossComponentPredictionEnabledFlag(uiCode != 0);
 
-          READ_FLAG( uiCode, "chroma_qp_offset_list_enabled_flag");
-          if (uiCode == 0)
-          {
-            ppsRangeExtension.clearChromaQpOffsetList();
-            ppsRangeExtension.setCuChromaQpOffsetSubdiv(0);
-          }
-          else
-          {
-            READ_UVLC(uiCode, "cu_chroma_qp_offset_subdiv"); ppsRangeExtension.setCuChromaQpOffsetSubdiv(uiCode);
-            uint32_t tableSizeMinus1 = 0;
-            READ_UVLC(tableSizeMinus1, "chroma_qp_offset_list_len_minus1");
-            CHECK(tableSizeMinus1 >= MAX_QP_OFFSET_LIST_SIZE, "Table size exceeds maximum");
-
-            for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx <= (tableSizeMinus1); cuChromaQpOffsetIdx++)
-            {
-              int cbOffset;
-              int crOffset;
-              READ_SVLC(cbOffset, "cb_qp_offset_list[i]");
-              CHECK(cbOffset < -12 || cbOffset > 12, "Invalid chroma QP offset");
-              READ_SVLC(crOffset, "cr_qp_offset_list[i]");
-              CHECK(crOffset < -12 || crOffset > 12, "Invalid chroma QP offset");
-              // table uses +1 for index (see comment inside the function)
-              ppsRangeExtension.setChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1, cbOffset, crOffset);
-            }
-            CHECK(ppsRangeExtension.getChromaQpOffsetListLen() != tableSizeMinus1 + 1, "Invalid chroma QP offset list lenght");
-          }
-
           READ_UVLC( uiCode, "log2_sao_offset_scale_luma");
           ppsRangeExtension.setLog2SaoOffsetScale(CHANNEL_TYPE_LUMA, uiCode);
           READ_UVLC( uiCode, "log2_sao_offset_scale_chroma");
@@ -607,7 +767,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
   xReadRbspTrailingBits();
 }
 
-void HLSyntaxReader::parseAPS(APS* aps)
+void HLSyntaxReader::parseAPS( APS* aps )
 {
 #if ENABLE_TRACING
   xTraceAPSHeader();
@@ -618,37 +778,134 @@ void HLSyntaxReader::parseAPS(APS* aps)
   READ_CODE(5, code, "adaptation_parameter_set_id");
   aps->setAPSId(code);
 
-  AlfSliceParam param = aps->getAlfAPSParam();
-  param.enabledFlag[COMPONENT_Y] = true;
+  READ_CODE(3, code, "aps_params_type");
+  aps->setAPSType( ApsType(code) );
+  if( code == ALF_APS )
+  {
+    parseAlfAps( aps );
+  }
+  else if( code == LMCS_APS )
+  {
+    parseLmcsAps( aps );
+  }
+  else if( code == SCALING_LIST_APS )
+  {
+    parseScalingListAps( aps );
+  }
+  READ_FLAG(code, "aps_extension_flag");
+  if (code)
+  {
+    while (xMoreRbspData())
+    {
+      READ_FLAG(code, "aps_extension_data_flag");
+    }
+  }
+  xReadRbspTrailingBits();
+}
+
+void HLSyntaxReader::parseAlfAps( APS* aps )
+{
+  uint32_t  code;
 
-  int alfChromaIdc = truncatedUnaryEqProb(3);        //alf_chroma_idc
-  param.enabledFlag[COMPONENT_Cb] = alfChromaIdc >> 1;
-  param.enabledFlag[COMPONENT_Cr] = alfChromaIdc & 1;
+  AlfParam param = aps->getAlfAPSParam();
+  param.reset();
+  param.enabledFlag[COMPONENT_Y] = param.enabledFlag[COMPONENT_Cb] = param.enabledFlag[COMPONENT_Cr] = true;
+  READ_FLAG(code, "alf_luma_new_filter");
+  param.newFilterFlag[CHANNEL_TYPE_LUMA] = code;
+  READ_FLAG(code, "alf_chroma_new_filter");
+  param.newFilterFlag[CHANNEL_TYPE_CHROMA] = code;
 
-  xReadTruncBinCode(code, MAX_NUM_ALF_CLASSES);  //number_of_filters_minus1
-  param.numLumaFilters = code + 1;
-  if (param.numLumaFilters > 1)
+  CHECK(param.newFilterFlag[CHANNEL_TYPE_LUMA] == 0 && param.newFilterFlag[CHANNEL_TYPE_CHROMA] == 0,
+    "bitstream conformance error, alf_luma_filter_signal_flag and alf_chroma_filter_signal_flag shall not equal to zero at the same time");
+
+  if (param.newFilterFlag[CHANNEL_TYPE_LUMA])
   {
-    for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++)
+    READ_FLAG(code, "alf_luma_clip");
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    param.nonLinearFlag[CHANNEL_TYPE_LUMA] = code ? true : false;
+#else
+    param.nonLinearFlag[CHANNEL_TYPE_LUMA][0] = code ? true : false;
+#endif
+    READ_UVLC(code, "alf_luma_num_filters_signalled_minus1");
+    param.numLumaFilters = code + 1;
+    if (param.numLumaFilters > 1)
+    {
+      const int length =  ceilLog2(param.numLumaFilters);
+      for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++)
+      {
+        READ_CODE(length, code, "alf_luma_coeff_delta_idx");
+        param.filterCoeffDeltaIdx[i] = code;
+      }
+    }
+    else
     {
-      xReadTruncBinCode(code, param.numLumaFilters);
-      param.filterCoeffDeltaIdx[i] = code;
+      memset(param.filterCoeffDeltaIdx, 0, sizeof(param.filterCoeffDeltaIdx));
     }
+    alfFilter( param, false, 0 );
   }
-  else
+  if (param.newFilterFlag[CHANNEL_TYPE_CHROMA])
   {
-    memset(param.filterCoeffDeltaIdx, 0, sizeof(param.filterCoeffDeltaIdx));
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    READ_FLAG(code, "alf_nonlinear_enable_flag_chroma");
+    param.nonLinearFlag[CHANNEL_TYPE_CHROMA] = code ? true : false;
+#endif
+
+    if( MAX_NUM_ALF_ALTERNATIVES_CHROMA > 1 )
+      READ_UVLC( code, "alf_chroma_num_alts_minus1" );
+    else
+      code = 0;
+
+    param.numAlternativesChroma = code + 1;
+
+    for( int altIdx=0; altIdx < param.numAlternativesChroma; ++altIdx )
+    {
+#if !JVET_Q0249_ALF_CHROMA_CLIPFLAG
+      READ_FLAG(code, "alf_nonlinear_enable_flag_chroma");
+      param.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx] = code ? true : false;
+#endif
+      alfFilter( param, true, altIdx );
+    }
   }
+  aps->setAlfAPSParam(param);
+}
 
-  alfFilter(param, false);
+void HLSyntaxReader::parseLmcsAps( APS* aps )
+{
+  uint32_t  code;
 
-  if (alfChromaIdc)
+  SliceReshapeInfo& info = aps->getReshaperAPSInfo();
+  memset(info.reshaperModelBinCWDelta, 0, PIC_CODE_CW_BINS * sizeof(int));
+  READ_UVLC(code, "lmcs_min_bin_idx");                             info.reshaperModelMinBinIdx = code;
+  READ_UVLC(code, "lmcs_delta_max_bin_idx");                       info.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1 - code;
+  READ_UVLC(code, "lmcs_delta_cw_prec_minus1");                    info.maxNbitsNeededDeltaCW = code + 1;
+  assert(info.maxNbitsNeededDeltaCW > 0);
+  for (uint32_t i = info.reshaperModelMinBinIdx; i <= info.reshaperModelMaxBinIdx; i++)
   {
-    alfFilter(param, true);
+    READ_CODE(info.maxNbitsNeededDeltaCW, code, "lmcs_delta_abs_cw[ i ]");
+    int absCW = code;
+    if (absCW > 0)
+    {
+      READ_CODE(1, code, "lmcs_delta_sign_cw_flag[ i ]");
+    }
+    int signCW = code;
+    info.reshaperModelBinCWDelta[i] = (1 - 2 * signCW) * absCW;
   }
-  aps->setAlfAPSParam(param);
+  READ_CODE(3, code, "lmcs_delta_abs_crs");
+  int absCW = code;
+  if (absCW > 0)
+  {
+    READ_CODE(1, code, "lmcs_delta_sign_crs_flag");
+  }
+  int signCW = code;
+  info.chrResScalingOffset = (1 - 2 * signCW) * absCW;
 
-  xReadRbspTrailingBits();
+  aps->setReshaperAPSInfo(info);
+}
+
+void HLSyntaxReader::parseScalingListAps( APS* aps )
+{
+  ScalingList& info = aps->getScalingList();
+  parseScalingList( &info );
 }
 
 void  HLSyntaxReader::parseVUI(VUI* pcVUI, SPS *pcSPS)
@@ -656,212 +913,136 @@ void  HLSyntaxReader::parseVUI(VUI* pcVUI, SPS *pcSPS)
 #if ENABLE_TRACING
   DTRACE( g_trace_ctx, D_HEADER, "----------- vui_parameters -----------\n");
 #endif
-  uint32_t  uiCode;
 
-  READ_FLAG(     uiCode, "aspect_ratio_info_present_flag");           pcVUI->setAspectRatioInfoPresentFlag(uiCode);
+
+  uint32_t  symbol;
+
+  READ_FLAG( symbol, "aspect_ratio_info_present_flag");           pcVUI->setAspectRatioInfoPresentFlag(symbol);
   if (pcVUI->getAspectRatioInfoPresentFlag())
   {
-    READ_CODE(8, uiCode, "aspect_ratio_idc");                         pcVUI->setAspectRatioIdc(uiCode);
+    READ_FLAG( symbol, "aspect_ratio_constant_flag");           pcVUI->setAspectRatioConstantFlag(symbol);
+    READ_CODE(8, symbol, "aspect_ratio_idc");                         pcVUI->setAspectRatioIdc(symbol);
     if (pcVUI->getAspectRatioIdc() == 255)
     {
-      READ_CODE(16, uiCode, "sar_width");                             pcVUI->setSarWidth(uiCode);
-      READ_CODE(16, uiCode, "sar_height");                            pcVUI->setSarHeight(uiCode);
+      READ_CODE(16, symbol, "sar_width");                             pcVUI->setSarWidth(symbol);
+      READ_CODE(16, symbol, "sar_height");                            pcVUI->setSarHeight(symbol);
     }
   }
 
-  READ_FLAG(     uiCode, "overscan_info_present_flag");               pcVUI->setOverscanInfoPresentFlag(uiCode);
-  if (pcVUI->getOverscanInfoPresentFlag())
+  READ_FLAG(   symbol, "colour_description_present_flag");          pcVUI->setColourDescriptionPresentFlag(symbol);
+  if (pcVUI->getColourDescriptionPresentFlag())
   {
-    READ_FLAG(   uiCode, "overscan_appropriate_flag");                pcVUI->setOverscanAppropriateFlag(uiCode);
+    READ_CODE(8, symbol, "colour_primaries");                       pcVUI->setColourPrimaries(symbol);
+    READ_CODE(8, symbol, "transfer_characteristics");               pcVUI->setTransferCharacteristics(symbol);
+    READ_CODE(8, symbol, "matrix_coeffs");                          pcVUI->setMatrixCoefficients(symbol);
+    READ_FLAG(   symbol, "video_full_range_flag");                    pcVUI->setVideoFullRangeFlag(symbol);
   }
 
-  READ_FLAG(     uiCode, "video_signal_type_present_flag");           pcVUI->setVideoSignalTypePresentFlag(uiCode);
-  if (pcVUI->getVideoSignalTypePresentFlag())
-  {
-    READ_CODE(3, uiCode, "video_format");                             pcVUI->setVideoFormat(uiCode);
-    READ_FLAG(   uiCode, "video_full_range_flag");                    pcVUI->setVideoFullRangeFlag(uiCode);
-    READ_FLAG(   uiCode, "colour_description_present_flag");          pcVUI->setColourDescriptionPresentFlag(uiCode);
-    if (pcVUI->getColourDescriptionPresentFlag())
-    {
-      READ_CODE(8, uiCode, "colour_primaries");                       pcVUI->setColourPrimaries(uiCode);
-      READ_CODE(8, uiCode, "transfer_characteristics");               pcVUI->setTransferCharacteristics(uiCode);
-      READ_CODE(8, uiCode, "matrix_coeffs");                          pcVUI->setMatrixCoefficients(uiCode);
-    }
-  }
+  READ_FLAG(     symbol, "field_seq_flag");                           pcVUI->setFieldSeqFlag(symbol);
 
-  READ_FLAG(     uiCode, "chroma_loc_info_present_flag");             pcVUI->setChromaLocInfoPresentFlag(uiCode);
+  READ_FLAG(     symbol, "chroma_loc_info_present_flag");             pcVUI->setChromaLocInfoPresentFlag(symbol);
   if (pcVUI->getChromaLocInfoPresentFlag())
   {
-    READ_UVLC(   uiCode, "chroma_sample_loc_type_top_field" );        pcVUI->setChromaSampleLocTypeTopField(uiCode);
-    READ_UVLC(   uiCode, "chroma_sample_loc_type_bottom_field" );     pcVUI->setChromaSampleLocTypeBottomField(uiCode);
-  }
-
-  READ_FLAG(     uiCode, "neutral_chroma_indication_flag");           pcVUI->setNeutralChromaIndicationFlag(uiCode);
-
-  READ_FLAG(     uiCode, "field_seq_flag");                           pcVUI->setFieldSeqFlag(uiCode);
-
-  READ_FLAG(uiCode, "frame_field_info_present_flag");                 pcVUI->setFrameFieldInfoPresentFlag(uiCode);
-
-  READ_FLAG(     uiCode, "default_display_window_flag");
-  if (uiCode != 0)
-  {
-    Window &defDisp = pcVUI->getDefaultDisplayWindow();
-    READ_UVLC(   uiCode, "def_disp_win_left_offset" );                defDisp.setWindowLeftOffset  ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc()) );
-    READ_UVLC(   uiCode, "def_disp_win_right_offset" );               defDisp.setWindowRightOffset ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc()) );
-    READ_UVLC(   uiCode, "def_disp_win_top_offset" );                 defDisp.setWindowTopOffset   ( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc()) );
-    READ_UVLC(   uiCode, "def_disp_win_bottom_offset" );              defDisp.setWindowBottomOffset( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc()) );
-  }
-
-  TimingInfo *timingInfo = pcVUI->getTimingInfo();
-  READ_FLAG(       uiCode, "vui_timing_info_present_flag");         timingInfo->setTimingInfoPresentFlag      (uiCode ? true : false);
-  if(timingInfo->getTimingInfoPresentFlag())
-  {
-    READ_CODE( 32, uiCode, "vui_num_units_in_tick");                timingInfo->setNumUnitsInTick             (uiCode);
-    READ_CODE( 32, uiCode, "vui_time_scale");                       timingInfo->setTimeScale                  (uiCode);
-    READ_FLAG(     uiCode, "vui_poc_proportional_to_timing_flag");  timingInfo->setPocProportionalToTimingFlag(uiCode ? true : false);
-    if(timingInfo->getPocProportionalToTimingFlag())
+    if(pcVUI->getFieldSeqFlag())
     {
-      READ_UVLC(   uiCode, "vui_num_ticks_poc_diff_one_minus1");    timingInfo->setNumTicksPocDiffOneMinus1   (uiCode);
+      READ_UVLC(   symbol, "chroma_sample_loc_type_top_field" );        pcVUI->setChromaSampleLocTypeTopField(symbol);
+      READ_UVLC(   symbol, "chroma_sample_loc_type_bottom_field" );     pcVUI->setChromaSampleLocTypeBottomField(symbol);
     }
-
-    READ_FLAG(     uiCode, "vui_hrd_parameters_present_flag");        pcVUI->setHrdParametersPresentFlag(uiCode);
-    if( pcVUI->getHrdParametersPresentFlag() )
+    else
     {
-      parseHrdParameters( pcVUI->getHrdParameters(), 1, pcSPS->getMaxTLayers() - 1 );
+      READ_UVLC(   symbol, "chroma_sample_loc_type" );        pcVUI->setChromaSampleLocType(symbol);
     }
   }
 
-  READ_FLAG(     uiCode, "bitstream_restriction_flag");               pcVUI->setBitstreamRestrictionFlag(uiCode);
-  if (pcVUI->getBitstreamRestrictionFlag())
+  READ_FLAG(     symbol, "overscan_info_present_flag");               pcVUI->setOverscanInfoPresentFlag(symbol);
+  if (pcVUI->getOverscanInfoPresentFlag())
   {
-#if HEVC_TILES_WPP
-    READ_FLAG(   uiCode, "tiles_fixed_structure_flag");               pcVUI->setTilesFixedStructureFlag(uiCode);
-#endif
-    READ_FLAG(   uiCode, "motion_vectors_over_pic_boundaries_flag");  pcVUI->setMotionVectorsOverPicBoundariesFlag(uiCode);
-    READ_FLAG(   uiCode, "restricted_ref_pic_lists_flag");            pcVUI->setRestrictedRefPicListsFlag(uiCode);
-    READ_UVLC(   uiCode, "min_spatial_segmentation_idc");             pcVUI->setMinSpatialSegmentationIdc(uiCode);
-    CHECK(uiCode >= 4096, "Invalid code signalled");
-    READ_UVLC(   uiCode, "max_bytes_per_pic_denom" );                 pcVUI->setMaxBytesPerPicDenom(uiCode);
-    READ_UVLC(   uiCode, "max_bits_per_min_cu_denom" );               pcVUI->setMaxBitsPerMinCuDenom(uiCode);
-    READ_UVLC(   uiCode, "log2_max_mv_length_horizontal" );           pcVUI->setLog2MaxMvLengthHorizontal(uiCode);
-    READ_UVLC(   uiCode, "log2_max_mv_length_vertical" );             pcVUI->setLog2MaxMvLengthVertical(uiCode);
+    READ_FLAG(   symbol, "overscan_appropriate_flag");                pcVUI->setOverscanAppropriateFlag(symbol);
   }
 }
 
-void HLSyntaxReader::parseHrdParameters(HRD *hrd, bool commonInfPresentFlag, uint32_t maxNumSubLayersMinus1)
+void HLSyntaxReader::parseHrdParameters(HRDParameters *hrd, uint32_t firstSubLayer, uint32_t maxNumSubLayersMinus1)
 {
-  uint32_t  uiCode;
-  if( commonInfPresentFlag )
+  uint32_t  symbol;
+  READ_FLAG( symbol, "general_nal_hrd_parameters_present_flag" );           hrd->setNalHrdParametersPresentFlag( symbol == 1 ? true : false );
+  READ_FLAG( symbol, "general_vcl_hrd_parameters_present_flag" );           hrd->setVclHrdParametersPresentFlag( symbol == 1 ? true : false );
+  READ_FLAG( symbol, "general_decoding_unit_hrd_params_present_flag" );           hrd->setGeneralDecodingUnitHrdParamsPresentFlag( symbol == 1 ? true : false );
+
+  if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
   {
-    READ_FLAG( uiCode, "nal_hrd_parameters_present_flag" );           hrd->setNalHrdParametersPresentFlag( uiCode == 1 ? true : false );
-    READ_FLAG( uiCode, "vcl_hrd_parameters_present_flag" );           hrd->setVclHrdParametersPresentFlag( uiCode == 1 ? true : false );
-    if( hrd->getNalHrdParametersPresentFlag() || hrd->getVclHrdParametersPresentFlag() )
-    {
-      READ_FLAG( uiCode, "sub_pic_hrd_params_present_flag" );         hrd->setSubPicCpbParamsPresentFlag( uiCode == 1 ? true : false );
-      if( hrd->getSubPicCpbParamsPresentFlag() )
-      {
-        READ_CODE( 8, uiCode, "tick_divisor_minus2" );                hrd->setTickDivisorMinus2( uiCode );
-        READ_CODE( 5, uiCode, "du_cpb_removal_delay_increment_length_minus1" ); hrd->setDuCpbRemovalDelayLengthMinus1( uiCode );
-        READ_FLAG( uiCode, "sub_pic_cpb_params_in_pic_timing_sei_flag" ); hrd->setSubPicCpbParamsInPicTimingSEIFlag( uiCode == 1 ? true : false );
-        READ_CODE( 5, uiCode, "dpb_output_delay_du_length_minus1"  ); hrd->setDpbOutputDelayDuLengthMinus1( uiCode );
-      }
-      READ_CODE( 4, uiCode, "bit_rate_scale" );                       hrd->setBitRateScale( uiCode );
-      READ_CODE( 4, uiCode, "cpb_size_scale" );                       hrd->setCpbSizeScale( uiCode );
-      if( hrd->getSubPicCpbParamsPresentFlag() )
-      {
-        READ_CODE( 4, uiCode, "cpb_size_du_scale" );                  hrd->setDuCpbSizeScale( uiCode );
-      }
-      READ_CODE( 5, uiCode, "initial_cpb_removal_delay_length_minus1" ); hrd->setInitialCpbRemovalDelayLengthMinus1( uiCode );
-      READ_CODE( 5, uiCode, "au_cpb_removal_delay_length_minus1" );      hrd->setCpbRemovalDelayLengthMinus1( uiCode );
-      READ_CODE( 5, uiCode, "dpb_output_delay_length_minus1" );       hrd->setDpbOutputDelayLengthMinus1( uiCode );
-    }
+    READ_CODE( 8, symbol, "tick_divisor_minus2" );                        hrd->setTickDivisorMinus2( symbol );
+  }
+  READ_CODE( 4, symbol, "bit_rate_scale" );                       hrd->setBitRateScale( symbol );
+  READ_CODE( 4, symbol, "cpb_size_scale" );                       hrd->setCpbSizeScale( symbol );
+  if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
+  {
+    READ_CODE( 4, symbol, "cpb_size_du_scale" );                  hrd->setCpbSizeDuScale( symbol );
   }
-  int i, j, nalOrVcl;
-  for( i = 0; i <= maxNumSubLayersMinus1; i ++ )
+
+  for( int i = firstSubLayer; i <= maxNumSubLayersMinus1; i ++ )
   {
-    READ_FLAG( uiCode, "fixed_pic_rate_general_flag" );                     hrd->setFixedPicRateFlag( i, uiCode == 1 ? true : false  );
+    READ_FLAG( symbol, "fixed_pic_rate_general_flag" );                     hrd->setFixedPicRateFlag( i, symbol == 1 ? true : false  );
     if( !hrd->getFixedPicRateFlag( i ) )
     {
-      READ_FLAG( uiCode, "fixed_pic_rate_within_cvs_flag" );                hrd->setFixedPicRateWithinCvsFlag( i, uiCode == 1 ? true : false  );
+      READ_FLAG( symbol, "fixed_pic_rate_within_cvs_flag" );                hrd->setFixedPicRateWithinCvsFlag( i, symbol == 1 ? true : false  );
     }
     else
     {
       hrd->setFixedPicRateWithinCvsFlag( i, true );
     }
 
-    hrd->setLowDelayHrdFlag( i, 0 ); // Infered to be 0 when not present
-    hrd->setCpbCntMinus1   ( i, 0 ); // Infered to be 0 when not present
+    hrd->setLowDelayHrdFlag( i, false ); // Inferred to be 0 when not present
+    hrd->setCpbCntMinus1   ( i, 0 );     // Inferred to be 0 when not present
 
     if( hrd->getFixedPicRateWithinCvsFlag( i ) )
     {
-      READ_UVLC( uiCode, "elemental_duration_in_tc_minus1" );             hrd->setPicDurationInTcMinus1( i, uiCode );
+      READ_UVLC( symbol, "elemental_duration_in_tc_minus1" );             hrd->setPicDurationInTcMinus1( i, symbol );
     }
     else
     {
-      READ_FLAG( uiCode, "low_delay_hrd_flag" );                      hrd->setLowDelayHrdFlag( i, uiCode == 1 ? true : false  );
+      READ_FLAG( symbol, "low_delay_hrd_flag" );                      hrd->setLowDelayHrdFlag( i, symbol == 1 ? true : false  );
     }
     if (!hrd->getLowDelayHrdFlag( i ))
     {
-      READ_UVLC( uiCode, "cpb_cnt_minus1" );                          hrd->setCpbCntMinus1( i, uiCode );
+      READ_UVLC( symbol, "cpb_cnt_minus1" );                          hrd->setCpbCntMinus1( i, symbol );
     }
 
-    for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
+    for( int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
     {
       if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) ||
           ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) )
       {
-        for( j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ )
+        for( int j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ )
         {
-          READ_UVLC( uiCode, "bit_rate_value_minus1" );             hrd->setBitRateValueMinus1( i, j, nalOrVcl, uiCode );
-          READ_UVLC( uiCode, "cpb_size_value_minus1" );             hrd->setCpbSizeValueMinus1( i, j, nalOrVcl, uiCode );
-          if( hrd->getSubPicCpbParamsPresentFlag() )
-          {
-            READ_UVLC( uiCode, "cpb_size_du_value_minus1" );        hrd->setDuCpbSizeValueMinus1( i, j, nalOrVcl, uiCode );
-            READ_UVLC( uiCode, "bit_rate_du_value_minus1" );        hrd->setDuBitRateValueMinus1( i, j, nalOrVcl, uiCode );
-          }
-          READ_FLAG( uiCode, "cbr_flag" );                          hrd->setCbrFlag( i, j, nalOrVcl, uiCode == 1 ? true : false  );
+          READ_UVLC( symbol, "bit_rate_value_minus1" );             hrd->setBitRateValueMinus1( i, j, nalOrVcl, symbol );
+          READ_UVLC( symbol, "cpb_size_value_minus1" );             hrd->setCpbSizeValueMinus1( i, j, nalOrVcl, symbol );
+          READ_FLAG( symbol, "cbr_flag" );                          hrd->setCbrFlag( i, j, nalOrVcl, symbol == 1 ? true : false  );
         }
       }
     }
   }
-}
-
-void HLSyntaxReader::parseReshaper(SliceReshapeInfo& info, const SPS* pcSPS, const bool isIntra)
-{
-  unsigned  symbol = 0;
-  READ_FLAG(symbol, "tile_group_reshaper_model_present_flag");                 info.setSliceReshapeModelPresentFlag(symbol == 1);
-  if (info.getSliceReshapeModelPresentFlag())
+  for (int i = 0; i < firstSubLayer; i++)
   {
-    memset(info.reshaperModelBinCWDelta, 0, PIC_CODE_CW_BINS * sizeof(int));
-    READ_UVLC(symbol, "reshaper_model_min_bin_idx");                             info.reshaperModelMinBinIdx = symbol;
-    READ_UVLC(symbol, "reshaper_model_delta_max_bin_idx");                       info.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1 - symbol;
-    READ_UVLC(symbol, "reshaper_model_bin_delta_abs_cw_prec_minus1");            info.maxNbitsNeededDeltaCW = symbol + 1;
-    assert(info.maxNbitsNeededDeltaCW > 0);
-    for (uint32_t i = info.reshaperModelMinBinIdx; i <= info.reshaperModelMaxBinIdx; i++)
+    for (int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl++)
     {
-      READ_CODE(info.maxNbitsNeededDeltaCW, symbol, "reshaper_model_bin_delta_abs_CW");
-      int absCW = symbol;
-      if (absCW > 0)
+      if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) ||
+          ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) )
       {
-        READ_CODE(1, symbol, "reshaper_model_bin_delta_sign_CW_flag");
+        for (int j = 0; j <= (hrd->getCpbCntMinus1(i)); j++)
+        {
+          uint32_t bitRate = hrd->getBitRateValueMinus1(maxNumSubLayersMinus1, j, nalOrVcl);
+          hrd->setBitRateValueMinus1(i, j, nalOrVcl, bitRate);
+          uint32_t cpbSize = hrd->getCpbSizeValueMinus1(maxNumSubLayersMinus1, j, nalOrVcl);
+          hrd->setCpbSizeValueMinus1(i, j, nalOrVcl, cpbSize);
+          bool flag = hrd->getCbrFlag(maxNumSubLayersMinus1, j, nalOrVcl);
+          hrd->setCbrFlag(i, j, nalOrVcl, flag);
+        }
       }
-      int signCW = symbol;
-      info.reshaperModelBinCWDelta[i] = (1 - 2 * signCW) * absCW;
-    }
-  }
-  READ_FLAG(symbol, "tile_group_reshaper_enable_flag");           info.setUseSliceReshaper(symbol == 1);
-  if (info.getUseSliceReshaper())
-  {
-    if (!(pcSPS->getUseDualITree() && isIntra))
-    {
-      READ_FLAG(symbol, "slice_reshaper_ChromaAdj");                info.setSliceReshapeChromaAdj(symbol);
-    }
-    else
-    {
-      info.setSliceReshapeChromaAdj(0);
     }
   }
 }
+
+
 void HLSyntaxReader::parseSPS(SPS* pcSPS)
 {
   uint32_t  uiCode;
@@ -869,49 +1050,8 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
 #if ENABLE_TRACING
   xTraceSPSHeader ();
 #endif
-#if HEVC_VPS
-  READ_CODE( 4,  uiCode, "sps_video_parameter_set_id");          pcSPS->setVPSId        ( uiCode );
-#endif
-#if !JVET_M0101_HLS
-  READ_UVLC(     uiCode, "sps_seq_parameter_set_id" );           pcSPS->setSPSId( uiCode );
-  CHECK(uiCode > 15, "Invalid SPS id signalled");
-
-  READ_FLAG(uiCode, "intra_only_constraint_flag");               pcSPS->setIntraOnlyConstraintFlag(uiCode > 0 ? true : false);
-  READ_CODE(4, uiCode, "max_bitdepth_constraint_idc");           pcSPS->setMaxBitDepthConstraintIdc(uiCode);
-  READ_CODE(2, uiCode, "max_chroma_format_constraint_idc");      pcSPS->setMaxChromaFormatConstraintIdc(uiCode);
-  READ_FLAG(uiCode, "frame_only_constraint_flag");               pcSPS->setFrameConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_qtbtt_dual_tree_intra_constraint_flag"); pcSPS->setNoQtbttDualTreeIntraConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_sao_constraint_flag");                   pcSPS->setNoSaoConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_alf_constraint_flag");                   pcSPS->setNoAlfConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_pcm_constraint_flag");                   pcSPS->setNoPcmConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_ref_wraparound_constraint_flag");        pcSPS->setNoRefWraparoundConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_temporal_mvp_constraint_flag");          pcSPS->setNoTemporalMvpConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_sbtmvp_constraint_flag");                pcSPS->setNoSbtmvpConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_amvr_constraint_flag");                  pcSPS->setNoAmvrConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_bdof_constraint_flag");                  pcSPS->setNoBdofConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_cclm_constraint_flag");                  pcSPS->setNoCclmConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_mts_constraint_flag");                   pcSPS->setNoMtsConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_affine_motion_constraint_flag");         pcSPS->setNoAffineMotionConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_gbi_constraint_flag");                   pcSPS->setNoGbiConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_mh_intra_constraint_flag");              pcSPS->setNoMhIntraConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_triangle_constraint_flag");              pcSPS->setNoTriangleConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_ladf_constraint_flag");                  pcSPS->setNoLadfConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_curr_pic_ref_constraint_flag");          pcSPS->setNoCurrPicRefConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_qp_delta_constraint_flag");              pcSPS->setNoQpDeltaConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_dep_quant_constraint_flag");             pcSPS->setNoDepQuantConstraintFlag(uiCode > 0 ? true : false);
-  READ_FLAG(uiCode, "no_sign_data_hiding_constraint_flag");      pcSPS->setNoSignDataHidingConstraintFlag(uiCode > 0 ? true : false);
-
-  // KJS: Marakech decision: sub-layers added back
-  READ_CODE( 3,  uiCode, "sps_max_sub_layers_minus1" );          pcSPS->setMaxTLayers   ( uiCode+1 );
-  CHECK(uiCode > 6, "Invalid maximum number of T-layer signalled");
-  READ_FLAG( uiCode, "sps_temporal_id_nesting_flag" );           pcSPS->setTemporalIdNestingFlag ( uiCode > 0 ? true : false );
-  if ( pcSPS->getMaxTLayers() == 1 )
-  {
-    // sps_temporal_id_nesting_flag must be 1 when sps_max_sub_layers_minus1 is 0
-    CHECK( uiCode != 1, "Invalid maximum number of T-layers" );
-  }
-  parsePTL(pcSPS->getPTL(), true, pcSPS->getMaxTLayers() - 1);
-#else
+  READ_CODE( 4,  uiCode, "sps_decoding_parameter_set_id");       pcSPS->setDecodingParameterSetId( uiCode );
+  READ_CODE( 4,  uiCode, "sps_video_parameter_set_id" );      pcSPS->setVPSId( uiCode );
   READ_CODE(3, uiCode, "sps_max_sub_layers_minus1");          pcSPS->setMaxTLayers   (uiCode + 1);
   CHECK(uiCode > 6, "Invalid maximum number of T-layer signalled");
   READ_CODE(5, uiCode, "sps_reserved_zero_5bits");
@@ -919,55 +1059,100 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
 
   parseProfileTierLevel(pcSPS->getProfileTierLevel(), pcSPS->getMaxTLayers() - 1);
 
-  READ_UVLC(uiCode, "sps_seq_parameter_set_id");           pcSPS->setSPSId(uiCode);
-#endif
+  READ_FLAG(uiCode, "gdr_enabled_flag");
+  pcSPS->setGDREnabledFlag(uiCode);
 
-  READ_UVLC(     uiCode, "chroma_format_idc" );                  pcSPS->setChromaFormatIdc( ChromaFormat(uiCode) );
-  CHECK(uiCode > 3, "Invalid chroma format signalled");
+  READ_CODE(4, uiCode, "sps_seq_parameter_set_id");              pcSPS->setSPSId(uiCode);
 
-  // KJS: ENABLE_CHROMA_422 does not exist anymore o.O
-  if( pcSPS->getChromaFormatIdc() == CHROMA_422 )
-  {
-    EXIT( "Error:  4:2:2 chroma sampling format not supported with current compiler setting."
-          "\n        Set compiler flag \"ENABLE_CHROMA_422\" equal to 1 for enabling 4:2:2.\n" );
-  }
+  READ_CODE(2, uiCode, "chroma_format_idc");                     pcSPS->setChromaFormatIdc( ChromaFormat(uiCode) );
 
   if( pcSPS->getChromaFormatIdc() == CHROMA_444 )
   {
     READ_FLAG(     uiCode, "separate_colour_plane_flag");        CHECK(uiCode != 0, "Invalid code");
+    pcSPS->setSeparateColourPlaneFlag( uiCode != 0 );
   }
 
-  READ_UVLC (    uiCode, "pic_width_in_luma_samples" );          pcSPS->setPicWidthInLumaSamples ( uiCode    );
-  READ_UVLC (    uiCode, "pic_height_in_luma_samples" );         pcSPS->setPicHeightInLumaSamples( uiCode    );
+  READ_FLAG( uiCode, "ref_pic_resampling_enabled_flag" );        pcSPS->setRprEnabledFlag( uiCode );
+
+  READ_UVLC( uiCode, "pic_width_max_in_luma_samples" );          pcSPS->setMaxPicWidthInLumaSamples( uiCode );
+  READ_UVLC( uiCode, "pic_height_max_in_luma_samples" );         pcSPS->setMaxPicHeightInLumaSamples( uiCode );
+
+  READ_CODE(2, uiCode, "sps_log2_ctu_size_minus5");              pcSPS->setCTUSize(1 << (uiCode + 5));
+  CHECK(uiCode > 2, "sps_log2_ctu_size_minus5 must be less than or equal to 2");
+  unsigned ctbLog2SizeY = uiCode + 5;
+  pcSPS->setMaxCodingDepth(uiCode+3);
+  pcSPS->setLog2DiffMaxMinCodingBlockSize(uiCode+3);
+  pcSPS->setMaxCUWidth(pcSPS->getCTUSize());
+  pcSPS->setMaxCUHeight(pcSPS->getCTUSize());
+  READ_FLAG( uiCode, "subpics_present_flag" );                   pcSPS->setSubPicPresentFlag(uiCode);
 
-  // KJS: not removing yet
-  READ_FLAG(     uiCode, "conformance_window_flag");
-  if (uiCode != 0)
+  if (pcSPS->getSubPicPresentFlag()) 
+  {
+    READ_CODE(8, uiCode, "sps_num_subpics_minus1"); pcSPS->setNumSubPics(uiCode + 1);
+    for (int picIdx = 0; picIdx < pcSPS->getNumSubPics(); picIdx++)
+    {
+      READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicWidthInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_ctu_top_left_x[ i ]");
+      pcSPS->setSubPicCtuTopLeftX(picIdx, uiCode);
+      READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicHeightInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_ctu_top_left_y[ i ]");
+      pcSPS->setSubPicCtuTopLeftY(picIdx, uiCode);
+      READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicWidthInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_width_minus1[ i ]");
+      pcSPS->setSubPicWidth(picIdx, uiCode + 1);
+      READ_CODE(std::max(1, ceilLog2(((pcSPS->getMaxPicHeightInLumaSamples() + pcSPS->getCTUSize() - 1) >> floorLog2(pcSPS->getCTUSize())))), uiCode, "subpic_height_minus1[ i ]");
+      pcSPS->setSubPicHeight(picIdx, uiCode + 1);
+      READ_FLAG(uiCode, "subpic_treated_as_pic_flag[ i ]");
+      pcSPS->setSubPicTreatedAsPicFlag(picIdx, uiCode);
+      READ_FLAG(uiCode, "loop_filter_across_subpic_enabled_flag[ i ]");
+      pcSPS->setLoopFilterAcrossSubpicEnabledFlag(picIdx, uiCode);
+    }
+  }
+  READ_FLAG(uiCode, "sps_subpic_id_present_flag");                           pcSPS->setSubPicIdPresentFlag( uiCode != 0 );
+  if( pcSPS->getSubPicIdPresentFlag() )
+  {
+    READ_FLAG(uiCode, "sps_subpic_id_signalling_present_flag");              pcSPS->setSubPicIdSignallingPresentFlag( uiCode != 0 );
+    if( pcSPS->getSubPicIdSignallingPresentFlag() )
+    {
+      READ_UVLC( uiCode, "sps_subpic_id_len_minus1" );                       pcSPS->setSubPicIdLen( uiCode + 1 );
+      CHECK( uiCode > 15, "Invalid sps_subpic_id_len_minus1 signalled");
+      for( int picIdx = 0; picIdx < pcSPS->getNumSubPics( ); picIdx++ )
+      {
+        READ_CODE( pcSPS->getSubPicIdLen( ), uiCode, "sps_subpic_id[i]" );   pcSPS->setSubPicId( picIdx, uiCode );
+      }
+    }
+  }
+  if( pcSPS->getSubPicIdPresentFlag() == false || pcSPS->getSubPicIdSignallingPresentFlag() == false )
   {
-    Window &conf = pcSPS->getConformanceWindow();
-    READ_UVLC(   uiCode, "conf_win_left_offset" );               conf.setWindowLeftOffset  ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc() ) );
-    READ_UVLC(   uiCode, "conf_win_right_offset" );              conf.setWindowRightOffset ( uiCode * SPS::getWinUnitX( pcSPS->getChromaFormatIdc() ) );
-    READ_UVLC(   uiCode, "conf_win_top_offset" );                conf.setWindowTopOffset   ( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc() ) );
-    READ_UVLC(   uiCode, "conf_win_bottom_offset" );             conf.setWindowBottomOffset( uiCode * SPS::getWinUnitY( pcSPS->getChromaFormatIdc() ) );
+    for( int picIdx = 0; picIdx < pcSPS->getNumSubPics( ); picIdx++ )
+    {
+      pcSPS->setSubPicId( picIdx, picIdx );
+    }
   }
 
-  READ_UVLC(     uiCode, "bit_depth_luma_minus8" );
-  CHECK(uiCode > 8, "Invalid luma bit depth signalled");
+  READ_UVLC(     uiCode, "bit_depth_minus8" );
+  CHECK(uiCode > 8, "Invalid bit depth signalled");
   pcSPS->setBitDepth(CHANNEL_TYPE_LUMA, 8 + uiCode);
-
+  pcSPS->setBitDepth(CHANNEL_TYPE_CHROMA, 8 + uiCode);
   pcSPS->setQpBDOffset(CHANNEL_TYPE_LUMA, (int) (6*uiCode) );
+  pcSPS->setQpBDOffset(CHANNEL_TYPE_CHROMA, (int) (6*uiCode) );
 
-  READ_UVLC( uiCode,    "bit_depth_chroma_minus8" );
-  CHECK(uiCode > 8, "Invalid chroma bit depth signalled");
-  pcSPS->setBitDepth(CHANNEL_TYPE_CHROMA, 8 + uiCode);
-  pcSPS->setQpBDOffset(CHANNEL_TYPE_CHROMA,  (int) (6*uiCode) );
+  READ_UVLC(     uiCode, "min_qp_prime_ts_minus4" );
+  pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA, uiCode);
+  CHECK(uiCode > 48, "Invalid min_qp_prime_ts_minus4 signalled");
+  pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_CHROMA, uiCode);
+  READ_FLAG( uiCode, "sps_weighted_pred_flag" );                    pcSPS->setUseWP( uiCode ? true : false );
+  READ_FLAG( uiCode, "sps_weighted_bipred_flag" );                  pcSPS->setUseWPBiPred( uiCode ? true : false );
 
-  READ_UVLC( uiCode,    "log2_max_pic_order_cnt_lsb_minus4" );   pcSPS->setBitsForPOC( 4 + uiCode );
+  READ_CODE(4, uiCode, "log2_max_pic_order_cnt_lsb_minus4");     pcSPS->setBitsForPOC( 4 + uiCode );
   CHECK(uiCode > 12, "Invalid code");
-
   // KJS: Marakech decision: sub-layers added back
   uint32_t subLayerOrderingInfoPresentFlag;
-  READ_FLAG(subLayerOrderingInfoPresentFlag, "sps_sub_layer_ordering_info_present_flag");
+  if (pcSPS->getMaxTLayers() > 1)
+  {
+    READ_FLAG(subLayerOrderingInfoPresentFlag, "sps_sub_layer_ordering_info_present_flag");
+  }
+  else
+  {
+    subLayerOrderingInfoPresentFlag = 0;
+  }
 
   for(uint32_t i=0; i <= pcSPS->getMaxTLayers()-1; i++)
   {
@@ -990,78 +1175,160 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
     }
   }
 
+  READ_FLAG(uiCode, "long_term_ref_pics_flag");          pcSPS->setLongTermRefsPresent(uiCode);
+  READ_FLAG( uiCode, "inter_layer_ref_pics_present_flag" );  pcSPS->setInterLayerPresentFlag( uiCode );
+  READ_FLAG( uiCode, "sps_idr_rpl_present_flag" );       pcSPS->setIDRRefParamListPresent( (bool) uiCode );
+  READ_FLAG(uiCode, "rpl1_copy_from_rpl0_flag");
+  pcSPS->setRPL1CopyFromRPL0Flag(uiCode);
+
+  //Read candidate for List0
+  READ_UVLC(uiCode, "num_ref_pic_lists_in_sps[0]");
+  uint32_t numberOfRPL = uiCode;
+  pcSPS->createRPLList0(numberOfRPL);
+  RPLList* rplList = pcSPS->getRPLList0();
+  ReferencePictureList* rpl;
+  for (uint32_t ii = 0; ii < numberOfRPL; ii++)
+  {
+    rpl = rplList->getReferencePictureList(ii);
+    parseRefPicList(pcSPS, rpl);
+  }
+
+  //Read candidate for List1
+  if (!pcSPS->getRPL1CopyFromRPL0Flag())
+  {
+    READ_UVLC(uiCode, "num_ref_pic_lists_in_sps[1]");
+    numberOfRPL = uiCode;
+    pcSPS->createRPLList1(numberOfRPL);
+    rplList = pcSPS->getRPLList1();
+    for (uint32_t ii = 0; ii < numberOfRPL; ii++)
+    {
+      rpl = rplList->getReferencePictureList(ii);
+      parseRefPicList(pcSPS, rpl);
+    }
+  }
+  else
+  {
+    numberOfRPL = pcSPS->getNumRPL0();
+    pcSPS->createRPLList1(numberOfRPL);
+    RPLList* rplListSource = pcSPS->getRPLList0();
+    RPLList* rplListDest = pcSPS->getRPLList1();
+    for (uint32_t ii = 0; ii < numberOfRPL; ii++)
+      copyRefPicList(pcSPS, rplListSource->getReferencePictureList(ii), rplListDest->getReferencePictureList(ii));
+  }
+
   unsigned  minQT[3] = { 0, 0, 0 };
   unsigned  maxBTD[3] = { 0, 0, 0 };
 
   unsigned  maxBTSize[3] = { 0, 0, 0 };
   unsigned  maxTTSize[3] = { 0, 0, 0 };
-  READ_FLAG(uiCode, "qtbtt_dual_tree_intra_flag");             pcSPS->setUseDualITree(uiCode);
-  READ_UVLC(uiCode, "log2_ctu_size_minus2");                   pcSPS->setCTUSize(1 << (uiCode + 2));
-  pcSPS->setMaxCodingDepth(uiCode);
-  pcSPS->setLog2DiffMaxMinCodingBlockSize(uiCode);
-  pcSPS->setMaxCUWidth(pcSPS->getCTUSize());
-  pcSPS->setMaxCUHeight(pcSPS->getCTUSize());
+  if( pcSPS->getChromaFormatIdc() != CHROMA_400 ) 
+  {
+    READ_FLAG(uiCode, "qtbtt_dual_tree_intra_flag");           pcSPS->setUseDualITree(uiCode);
+  }
+  else
+  {
+    pcSPS->setUseDualITree(0);
+  }
 
   READ_UVLC(uiCode, "log2_min_luma_coding_block_size_minus2");
   int log2MinCUSize = uiCode + 2;
   pcSPS->setLog2MinCodingBlockSize(log2MinCUSize);
+  CHECK(uiCode > ctbLog2SizeY - 2, "Invalid log2_min_luma_coding_block_size_minus2 signalled");
+
+  CHECK(log2MinCUSize > std::min(6, (int)(ctbLog2SizeY)), "log2_min_luma_coding_block_size_minus2 shall be in the range of 0 to min (4, log2_ctu_size - 2)");
+  CHECK( ( pcSPS->getMaxPicWidthInLumaSamples() % ( std::max( 8, int( pcSPS->getMaxCUWidth() >> ( pcSPS->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame width must be a multiple of Max(8, the minimum unit size)" );
+  CHECK( ( pcSPS->getMaxPicHeightInLumaSamples() % ( std::max( 8, int( pcSPS->getMaxCUHeight() >> ( pcSPS->getMaxCodingDepth() - 1 ) ) ) ) ) != 0, "Coded frame height must be a multiple of Max(8, the minimum unit size)" );
+
   READ_FLAG(uiCode, "partition_constraints_override_enabled_flag"); pcSPS->setSplitConsOverrideEnabledFlag(uiCode);
-  READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_tile_group_luma");      minQT[0] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize());
-  READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_inter_tile_group");      minQT[1] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize());
-  READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_inter_tile_group");     maxBTD[1] = uiCode;
-  READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_tile_group_luma");     maxBTD[0] = uiCode;
+  READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
+  unsigned minQtLog2SizeIntraY = uiCode + pcSPS->getLog2MinCodingBlockSize();
+  minQT[0] = 1 << minQtLog2SizeIntraY;
+  READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_inter_slice");
+  unsigned minQtLog2SizeInterY = uiCode + pcSPS->getLog2MinCodingBlockSize();
+  minQT[1] = 1 << minQtLog2SizeInterY;
+  READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_inter_slice");     maxBTD[1] = uiCode;
+  CHECK(uiCode > 2*(ctbLog2SizeY - log2MinCUSize), "sps_max_mtt_hierarchy_depth_inter_slice shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)");
+  READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_slice_luma");     maxBTD[0] = uiCode;
+  CHECK(uiCode > 2 * (ctbLog2SizeY - log2MinCUSize), "sps_max_mtt_hierarchy_depth_intra_slice_luma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)");
 
   maxTTSize[0] = maxBTSize[0] = minQT[0];
   if (maxBTD[0] != 0)
   {
-    READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma");     maxBTSize[0] <<= uiCode;
-    READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma");     maxTTSize[0] <<= uiCode;
+    READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_slice_luma");     maxBTSize[0] <<= uiCode;
+    CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code");
+    READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_slice_luma");     maxTTSize[0] <<= uiCode;
+    CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code");
   }
   maxTTSize[1] = maxBTSize[1] = minQT[1];
   if (maxBTD[1] != 0)
   {
-    READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_inter_tile_group");     maxBTSize[1] <<= uiCode;
-    READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_inter_tile_group");     maxTTSize[1] <<= uiCode;
+    READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_inter_slice");     maxBTSize[1] <<= uiCode;
+    CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code");
+    READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_inter_slice");     maxTTSize[1] <<= uiCode;
+    CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code");
   }
   if (pcSPS->getUseDualITree())
   {
-    READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma"); minQT[2] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize());
-    READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma"); maxBTD[2] = uiCode;
+    READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); minQT[2] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize());
+    READ_UVLC(uiCode, "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); maxBTD[2] = uiCode;
+    CHECK(uiCode > 2 * (ctbLog2SizeY - log2MinCUSize), "sps_max_mtt_hierarchy_depth_intra_slice_chroma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)");
     maxTTSize[2] = maxBTSize[2] = minQT[2];
     if (maxBTD[2] != 0)
     {
-      READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma");       maxBTSize[2] <<= uiCode;
-      READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma");       maxTTSize[2] <<= uiCode;
+      READ_UVLC(uiCode, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");       maxBTSize[2] <<= uiCode;
+      READ_UVLC(uiCode, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");       maxTTSize[2] <<= uiCode;
     }
 }
 
   pcSPS->setMinQTSizes(minQT);
-  pcSPS->setMaxBTDepth(maxBTD[1], maxBTD[0], maxBTD[2]);
+  pcSPS->setMaxMTTHierarchyDepth(maxBTD[1], maxBTD[0], maxBTD[2]);
   pcSPS->setMaxBTSize(maxBTSize[1], maxBTSize[0], maxBTSize[2]);
   pcSPS->setMaxTTSize(maxTTSize[1], maxTTSize[0], maxTTSize[2]);
 
-#if !JVET_M0101_HLS
-  if (pcSPS->getPTL()->getGeneralPTL()->getLevelIdc() >= Level::LEVEL5)
+
+  READ_FLAG( uiCode, "sps_max_luma_transform_size_64_flag");        pcSPS->setLog2MaxTbSize( (uiCode ? 1 : 0) + 5 );
+
+  READ_FLAG(uiCode, "sps_joint_cbcr_enabled_flag");                pcSPS->setJointCbCrEnabledFlag(uiCode ? true : false);
+  if (pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
-    CHECK(log2MinCUSize + pcSPS->getLog2DiffMaxMinCodingBlockSize() < 5, "Invalid code");
+    ChromaQpMappingTableParams chromaQpMappingTableParams;
+    READ_FLAG(uiCode, "same_qp_table_for_chroma");        chromaQpMappingTableParams.setSameCQPTableForAllChromaFlag(uiCode);
+    int numQpTables = chromaQpMappingTableParams.getSameCQPTableForAllChromaFlag() ? 1 : (pcSPS->getJointCbCrEnabledFlag() ? 3 : 2);
+    chromaQpMappingTableParams.setNumQpTables(numQpTables);
+    for (int i = 0; i < numQpTables; i++)
+    {
+      int32_t qpTableStart = 0;
+      READ_SVLC(qpTableStart, "qp_table_starts_minus26"); chromaQpMappingTableParams.setQpTableStartMinus26(i, qpTableStart);
+      READ_UVLC(uiCode, "num_points_in_qp_table_minus1"); chromaQpMappingTableParams.setNumPtsInCQPTableMinus1(i,uiCode);
+      std::vector<int> deltaQpInValMinus1(chromaQpMappingTableParams.getNumPtsInCQPTableMinus1(i) + 1);
+      std::vector<int> deltaQpOutVal(chromaQpMappingTableParams.getNumPtsInCQPTableMinus1(i) + 1);
+      for (int j = 0; j <= chromaQpMappingTableParams.getNumPtsInCQPTableMinus1(i); j++)
+      {
+        READ_UVLC(uiCode, "delta_qp_in_val_minus1");  deltaQpInValMinus1[j] = uiCode;
+        READ_UVLC(uiCode, "delta_qp_diff_val");
+        deltaQpOutVal[j] = uiCode ^ deltaQpInValMinus1[j];
+      }
+      chromaQpMappingTableParams.setDeltaQpInValMinus1(i, deltaQpInValMinus1);
+      chromaQpMappingTableParams.setDeltaQpOutVal(i, deltaQpOutVal);
+    }
+    pcSPS->setChromaQpMappingTableFromParams(chromaQpMappingTableParams, pcSPS->getQpBDOffset(CHANNEL_TYPE_CHROMA));
+    pcSPS->derivedChromaQPMappingTables();
   }
-#endif
 
-#if MAX_TB_SIZE_SIGNALLING
-  // KJS: Not in syntax
-  READ_UVLC( uiCode, "log2_max_luma_transform_block_size_minus2" ); pcSPS->setLog2MaxTbSize( uiCode + 2 );
-#endif
+
   READ_FLAG( uiCode, "sps_sao_enabled_flag" );                      pcSPS->setSAOEnabledFlag ( uiCode ? true : false );
   READ_FLAG( uiCode, "sps_alf_enabled_flag" );                      pcSPS->setALFEnabledFlag ( uiCode ? true : false );
 
-  READ_FLAG( uiCode, "sps_pcm_enabled_flag" );                          pcSPS->setPCMEnabledFlag( uiCode ? true : false );
-  if( pcSPS->getPCMEnabledFlag() )
+  READ_FLAG(uiCode, "sps_transform_skip_enabled_flag"); pcSPS->setTransformSkipEnabledFlag(uiCode ? true : false);
+  if (pcSPS->getTransformSkipEnabledFlag())
   {
-    READ_CODE( 4, uiCode, "pcm_sample_bit_depth_luma_minus1" );          pcSPS->setPCMBitDepth    ( CHANNEL_TYPE_LUMA, 1 + uiCode );
-    READ_CODE( 4, uiCode, "pcm_sample_bit_depth_chroma_minus1" );        pcSPS->setPCMBitDepth    ( CHANNEL_TYPE_CHROMA, 1 + uiCode );
-    READ_UVLC( uiCode, "log2_min_pcm_luma_coding_block_size_minus3" );   pcSPS->setPCMLog2MinSize ( uiCode+3 );
-    READ_UVLC( uiCode, "log2_diff_max_min_pcm_luma_coding_block_size" ); pcSPS->setPCMLog2MaxSize ( uiCode+pcSPS->getPCMLog2MinSize() );
-    READ_FLAG( uiCode, "pcm_loop_filter_disable_flag" );                 pcSPS->setPCMFilterDisableFlag ( uiCode ? true : false );
+      READ_FLAG(uiCode, "sps_bdpcm_enabled_flag");
+      if (uiCode && pcSPS->getChromaFormatIdc() == CHROMA_444 )
+      {
+          READ_FLAG(uiCode, "sps_bdpcm_enabled_chroma_flag");
+          uiCode++;
+      }
+      pcSPS->setBDPCMEnabled(uiCode);
   }
 
   READ_FLAG(uiCode, "sps_ref_wraparound_enabled_flag");                  pcSPS->setWrapAroundEnabledFlag( uiCode ? true : false );
@@ -1085,47 +1352,91 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_FLAG( uiCode,  "sps_amvr_enabled_flag" );                     pcSPS->setAMVREnabledFlag ( uiCode != 0 );
 
   READ_FLAG( uiCode, "sps_bdof_enabled_flag" );                      pcSPS->setBDOFEnabledFlag ( uiCode != 0 );
-
-  READ_FLAG( uiCode,  "sps_affine_amvr_enabled_flag" );             pcSPS->setAffineAmvrEnabledFlag ( uiCode != 0 );
-
-  READ_FLAG(uiCode, "sps_dmvr_enable_flag");                        pcSPS->setUseDMVR(uiCode != 0);
-
-  // KJS: sps_cclm_enabled_flag
-  READ_FLAG( uiCode,    "lm_chroma_enabled_flag" );                 pcSPS->setUseLMChroma            ( uiCode != 0 );
-  if ( pcSPS->getUseLMChroma() && pcSPS->getChromaFormatIdc() == CHROMA_420 )
+  if (pcSPS->getBDOFEnabledFlag())
+  {
+    READ_FLAG(uiCode, "sps_bdof_pic_present_flag");                 pcSPS->setBdofControlPresentFlag( uiCode != 0 );
+  }
+  else {
+    pcSPS->setBdofControlPresentFlag( false );
+  }
+  READ_FLAG(uiCode, "sps_smvd_enabled_flag");                       pcSPS->setUseSMVD( uiCode != 0 );
+  READ_FLAG(uiCode, "sps_dmvr_enabled_flag");                        pcSPS->setUseDMVR(uiCode != 0);
+  if (pcSPS->getUseDMVR())
   {
-    READ_FLAG( uiCode,  "sps_cclm_collocated_chroma_flag" );        pcSPS->setCclmCollocatedChromaFlag( uiCode != 0 );
+    READ_FLAG(uiCode, "sps_dmvr_pic_present_flag");                 pcSPS->setDmvrControlPresentFlag( uiCode != 0 );
+  }
+  else {
+    pcSPS->setDmvrControlPresentFlag( false );
+  }
+  READ_FLAG(uiCode, "sps_mmvd_enabled_flag");                        pcSPS->setUseMMVD(uiCode != 0);
+  READ_FLAG(uiCode, "sps_isp_enabled_flag");                        pcSPS->setUseISP( uiCode != 0 );
+  READ_FLAG(uiCode, "sps_mrl_enabled_flag");                        pcSPS->setUseMRL( uiCode != 0 );
+  READ_FLAG(uiCode, "sps_mip_enabled_flag");                        pcSPS->setUseMIP( uiCode != 0 );
+  if( pcSPS->getChromaFormatIdc() != CHROMA_400) 
+  {
+    READ_FLAG( uiCode, "sps_cclm_enabled_flag" );                   pcSPS->setUseLMChroma( uiCode != 0 );
+  }
+  else
+  {
+    pcSPS->setUseLMChroma(0);
+  }
+  if( pcSPS->getChromaFormatIdc() == CHROMA_420 )
+  {
+    READ_FLAG( uiCode, "sps_chroma_horizontal_collocated_flag" );   pcSPS->setHorCollocatedChromaFlag( uiCode != 0 );
+    READ_FLAG( uiCode, "sps_chroma_vertical_collocated_flag" );     pcSPS->setVerCollocatedChromaFlag( uiCode != 0 );
   }
 
-  READ_FLAG( uiCode,    "mts_enabled_flag" );                       pcSPS->setUseMTS                 ( uiCode != 0 );
+  READ_FLAG( uiCode,    "sps_mts_enabled_flag" );                       pcSPS->setUseMTS                 ( uiCode != 0 );
   if ( pcSPS->getUseMTS() )
   {
-    READ_FLAG( uiCode,    "mts_intra_enabled_flag" );               pcSPS->setUseIntraMTS            ( uiCode != 0 );
-    READ_FLAG( uiCode,    "mts_inter_enabled_flag" );               pcSPS->setUseInterMTS            ( uiCode != 0 );
+    READ_FLAG( uiCode,    "sps_explicit_mts_intra_enabled_flag" );               pcSPS->setUseIntraMTS            ( uiCode != 0 );
+    READ_FLAG( uiCode,    "sps_explicit_mts_inter_enabled_flag" );               pcSPS->setUseInterMTS            ( uiCode != 0 );
   }
-  // KJS: sps_affine_enabled_flag
-  READ_FLAG( uiCode,    "affine_flag" );                            pcSPS->setUseAffine              ( uiCode != 0 );
+  READ_FLAG(uiCode, "sps_sbt_enabled_flag");                        pcSPS->setUseSBT                 ( uiCode != 0 );
+  READ_FLAG( uiCode,    "sps_affine_enabled_flag" );                            pcSPS->setUseAffine              ( uiCode != 0 );
   if ( pcSPS->getUseAffine() )
   {
-    READ_FLAG( uiCode,  "affine_type_flag" );                       pcSPS->setUseAffineType          ( uiCode != 0 );
+    READ_FLAG( uiCode,  "sps_affine_type_flag" );                       pcSPS->setUseAffineType          ( uiCode != 0 );
+    READ_FLAG( uiCode, "sps_affine_amvr_enabled_flag" );            pcSPS->setAffineAmvrEnabledFlag  ( uiCode != 0 );
+    READ_FLAG( uiCode, "sps_affine_prof_enabled_flag" );            pcSPS->setUsePROF                ( uiCode != 0 );
+    if (pcSPS->getUsePROF())
+    {
+      READ_FLAG(uiCode, "sps_prof_pic_present_flag");               pcSPS->setProfControlPresentFlag ( uiCode != 0 );
+    }
+    else {
+      pcSPS->setProfControlPresentFlag( false );
+    }
+  }
+  if (pcSPS->getChromaFormatIdc() == CHROMA_444)
+  {
+    READ_FLAG(uiCode, "sps_act_enabled_flag");                                  pcSPS->setUseColorTrans(uiCode != 0);
+  }
+  else
+  {
+    pcSPS->setUseColorTrans(false);
   }
-  READ_FLAG( uiCode,    "gbi_flag" );                               pcSPS->setUseGBi                 ( uiCode != 0 );
-  READ_FLAG(uiCode, "ibc_flag");                                    pcSPS->setIBCFlag(uiCode);
+  if (pcSPS->getChromaFormatIdc() == CHROMA_444)
+  {
+    READ_FLAG( uiCode,  "sps_palette_enabled_flag");                                pcSPS->setPLTMode                ( uiCode != 0 );
+  }
+  else
+  {
+    pcSPS->setPLTMode(false);
+  }
+  READ_FLAG( uiCode,    "sps_bcw_enabled_flag" );                   pcSPS->setUseBcw( uiCode != 0 );
+  READ_FLAG(uiCode, "sps_ibc_enabled_flag");                                    pcSPS->setIBCFlag(uiCode);
   // KJS: sps_ciip_enabled_flag
-  READ_FLAG( uiCode,     "mhintra_flag" );                           pcSPS->setUseMHIntra             ( uiCode != 0 );
-
-  READ_FLAG( uiCode,    "triangle_flag" );                          pcSPS->setUseTriangle            ( uiCode != 0 );
+  READ_FLAG( uiCode,     "sps_ciip_enabled_flag" );                           pcSPS->setUseCiip             ( uiCode != 0 );
 
-  // KJS: not in draft yet
-  READ_FLAG( uiCode,  "sps_fracmmvd_disabled_flag" );               pcSPS->setDisFracMmvdEnabledFlag ( uiCode != 0 );
-  // KJS: not in draft yet
-  READ_FLAG(uiCode, "sbt_enable_flag");                             pcSPS->setUseSBT(uiCode != 0);
-  if( pcSPS->getUseSBT() )
+  if ( pcSPS->getUseMMVD() )
   {
-    READ_FLAG(uiCode, "max_sbt_size_64_flag");                      pcSPS->setMaxSbtSize(uiCode != 0 ? 64 : 32);
+    READ_FLAG( uiCode,  "sps_fpel_mmvd_enabled_flag" );             pcSPS->setFpelMmvdEnabledFlag ( uiCode != 0 );
   }
-  // KJS: not in draft yet
-  READ_FLAG(uiCode, "sps_reshaper_enable_flag");                   pcSPS->setUseReshaper(uiCode == 1);
+
+  READ_FLAG( uiCode,    "triangle_flag" );                          pcSPS->setUseTriangle            ( uiCode != 0 );
+
+  READ_FLAG(uiCode, "sps_lmcs_enable_flag");                   pcSPS->setUseLmcs(uiCode == 1);
+  READ_FLAG( uiCode, "sps_lfnst_enabled_flag" );                    pcSPS->setUseLFNST( uiCode != 0 );
 
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   READ_FLAG( uiCode, "sps_ladf_enabled_flag" );                     pcSPS->setLadfEnabled( uiCode != 0 );
@@ -1144,51 +1455,50 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
 #endif
 
   // KJS: reference picture sets to be replaced
-  READ_UVLC( uiCode, "num_short_term_ref_pic_sets" );
-  CHECK(uiCode > 64, "Invalid code");
-  pcSPS->createRPSList(uiCode);
 
-  RPSList* rpsList = pcSPS->getRPSList();
-  ReferencePictureSet* rps;
+  // KJS: not found in draft -> does not exist
 
-  for(uint32_t i=0; i< rpsList->getNumberOfReferencePictureSets(); i++)
-  {
-    rps = rpsList->getReferencePictureSet(i);
-    parseShortTermRefPicSet(pcSPS,rps,i);
-  }
-  READ_FLAG( uiCode, "long_term_ref_pics_present_flag" );          pcSPS->setLongTermRefsPresent(uiCode);
-  if (pcSPS->getLongTermRefsPresent())
+  // KJS: remove scaling lists?
+  READ_FLAG( uiCode, "scaling_list_enabled_flag" );                 pcSPS->setScalingListFlag ( uiCode );
+
+  READ_FLAG( uiCode, "sps_loop_filter_across_virtual_boundaries_disabled_present_flag" ); pcSPS->setLoopFilterAcrossVirtualBoundariesDisabledFlag( uiCode != 0 );
+  if( pcSPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
   {
-    READ_UVLC( uiCode, "num_long_term_ref_pics_sps" );
-    pcSPS->setNumLongTermRefPicSPS(uiCode);
-    for (uint32_t k = 0; k < pcSPS->getNumLongTermRefPicSPS(); k++)
+    READ_CODE( 2, uiCode, "sps_num_ver_virtual_boundaries");        pcSPS->setNumVerVirtualBoundaries( uiCode );
+    for( unsigned i = 0; i < pcSPS->getNumVerVirtualBoundaries(); i++ )
     {
-      READ_CODE( pcSPS->getBitsForPOC(), uiCode, "lt_ref_pic_poc_lsb_sps" );
-      pcSPS->setLtRefPicPocLsbSps(k, uiCode);
-      READ_FLAG( uiCode,  "used_by_curr_pic_lt_sps_flag[i]");
-      pcSPS->setUsedByCurrPicLtSPSFlag(k, uiCode?1:0);
+      READ_CODE(13, uiCode, "sps_virtual_boundaries_pos_x");        pcSPS->setVirtualBoundariesPosX(uiCode << 3, i);
+    }
+    READ_CODE( 2, uiCode, "sps_num_hor_virtual_boundaries");        pcSPS->setNumHorVirtualBoundaries( uiCode );
+    for( unsigned i = 0; i < pcSPS->getNumHorVirtualBoundaries(); i++ )
+    {
+      READ_CODE(13, uiCode, "sps_virtual_boundaries_pos_y");        pcSPS->setVirtualBoundariesPosY(uiCode << 3, i);
     }
   }
+  else
+  {
+    pcSPS->setNumVerVirtualBoundaries( 0 );
+    pcSPS->setNumHorVirtualBoundaries( 0 );
+  }
 
-  // KJS: not found in draft -> does not exist
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  READ_FLAG( uiCode, "strong_intra_smoothing_enable_flag" );      pcSPS->setUseStrongIntraSmoothing(uiCode);
-#endif
-
-  // KJS: remove scaling lists?
-#if HEVC_USE_SCALING_LISTS
-  READ_FLAG( uiCode, "scaling_list_enabled_flag" );                 pcSPS->setScalingListFlag ( uiCode );
-  if(pcSPS->getScalingListFlag())
+  TimingInfo *timingInfo = pcSPS->getTimingInfo();
+  READ_FLAG(     uiCode, "general_hrd_parameters_present_flag");        pcSPS->setHrdParametersPresentFlag(uiCode);
+  if( pcSPS->getHrdParametersPresentFlag() )
   {
-    READ_FLAG( uiCode, "sps_scaling_list_data_present_flag" );                 pcSPS->setScalingListPresentFlag ( uiCode );
-    if(pcSPS->getScalingListPresentFlag ())
+    READ_CODE( 32, uiCode, "num_units_in_tick");                timingInfo->setNumUnitsInTick             (uiCode);
+    READ_CODE( 32, uiCode, "time_scale");                       timingInfo->setTimeScale                  (uiCode);
+
+    READ_FLAG( uiCode, "sub_layer_cpb_parameters_present_flag");  pcSPS->setSubLayerParametersPresentFlag(uiCode);
+    if (pcSPS->getSubLayerParametersPresentFlag())
+    {
+      parseHrdParameters(pcSPS->getHrdParameters(), 0, pcSPS->getMaxTLayers() - 1);
+    }
+    else
     {
-      parseScalingList( &(pcSPS->getScalingList()) );
+      parseHrdParameters(pcSPS->getHrdParameters(), pcSPS->getMaxTLayers() - 1, pcSPS->getMaxTLayers() - 1);
     }
   }
-#endif
 
-  // KJS: no VUI defined yet
   READ_FLAG( uiCode, "vui_parameters_present_flag" );             pcSPS->setVuiParametersPresentFlag(uiCode);
 
   if (pcSPS->getVuiParametersPresentFlag())
@@ -1258,517 +1568,1207 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   xReadRbspTrailingBits();
 }
 
-#if HEVC_VPS
-void HLSyntaxReader::parseVPS(VPS* pcVPS)
+void HLSyntaxReader::parseDPS(DPS* dps)
 {
 #if ENABLE_TRACING
-  xTraceVPSHeader ();
+  xTraceDPSHeader ();
 #endif
-  uint32_t  uiCode;
+  uint32_t  symbol;
 
-  READ_CODE( 4,  uiCode,  "vps_video_parameter_set_id" );         pcVPS->setVPSId( uiCode );
-  READ_FLAG( uiCode,      "vps_base_layer_internal_flag" );       CHECK(uiCode != 1, "Invalid code");
-  READ_FLAG( uiCode,      "vps_base_layer_available_flag" );      CHECK(uiCode != 1, "Invalid code");
-  READ_CODE( 6,  uiCode,  "vps_max_layers_minus1" );
-  READ_CODE( 3,  uiCode,  "vps_max_sub_layers_minus1" );          pcVPS->setMaxTLayers( uiCode + 1 );    CHECK(uiCode+1 > MAX_TLAYER, "Invalid code");
-  READ_FLAG(     uiCode,  "vps_temporal_id_nesting_flag" );       pcVPS->setTemporalNestingFlag( uiCode ? true:false );
-  CHECK (pcVPS->getMaxTLayers()<=1&&!pcVPS->getTemporalNestingFlag(), "Invalid VPS state");
-  READ_CODE( 16, uiCode,  "vps_reserved_0xffff_16bits" );         CHECK(uiCode != 0xffff, "Invalid value for reserved bits");
-  parsePTL ( pcVPS->getPTL(), true, pcVPS->getMaxTLayers()-1);
-  uint32_t subLayerOrderingInfoPresentFlag;
-  READ_FLAG(subLayerOrderingInfoPresentFlag, "vps_sub_layer_ordering_info_present_flag");
-  for(uint32_t i = 0; i <= pcVPS->getMaxTLayers()-1; i++)
+  READ_CODE( 4,  symbol,  "dps_decoding_parameter_set_id" );
+  CHECK(symbol == 0, "dps_decoding_parameter_set_id equal to zero is reserved and should not be use in a bitstream");
+  dps->setDecodingParameterSetId( symbol );
+
+  READ_CODE( 3,  symbol,  "dps_max_sub_layers_minus1" );          dps->setMaxSubLayersMinus1( symbol );
+  READ_CODE( 5, symbol,       "dps_reserved_zero_5bits" );              CHECK(symbol != 0, "dps_reserved_zero_5bits must be equal to zero");
+  
+  uint32_t numPTLs;
+  READ_CODE( 4, numPTLs,       "dps_num_ptls_minus1" );
+  numPTLs += 1;
+
+  std::vector<ProfileTierLevel> ptls;
+  ptls.resize(numPTLs);
+  for (int i=0; i<numPTLs; i++)
   {
-    READ_UVLC( uiCode,  "vps_max_dec_pic_buffering_minus1[i]" );    pcVPS->setMaxDecPicBuffering( uiCode + 1, i );
-    READ_UVLC( uiCode,  "vps_max_num_reorder_pics[i]" );            pcVPS->setNumReorderPics( uiCode, i );
-    READ_UVLC( uiCode,  "vps_max_latency_increase_plus1[i]" );      pcVPS->setMaxLatencyIncrease( uiCode, i );
+     parseProfileTierLevel(&ptls[i], dps->getMaxSubLayersMinus1());
+  }
+  dps->setProfileTierLevel(ptls);
 
-    if (!subLayerOrderingInfoPresentFlag)
+  READ_FLAG( symbol,      "dps_extension_flag" );
+  if (symbol)
+  {
+    while ( xMoreRbspData() )
     {
-      for (i++; i <= pcVPS->getMaxTLayers()-1; i++)
-      {
-        pcVPS->setMaxDecPicBuffering(pcVPS->getMaxDecPicBuffering(0), i);
-        pcVPS->setNumReorderPics(pcVPS->getNumReorderPics(0), i);
-        pcVPS->setMaxLatencyIncrease(pcVPS->getMaxLatencyIncrease(0), i);
-      }
-      break;
+      READ_FLAG( symbol, "dps_extension_data_flag");
     }
   }
+  xReadRbspTrailingBits();
+}
+
+void HLSyntaxReader::parseVPS(VPS* pcVPS)
+{
+#if ENABLE_TRACING
+  xTraceVPSHeader();
+#endif
+  uint32_t  uiCode;
 
-  CHECK( pcVPS->getNumHrdParameters() >= MAX_VPS_OP_SETS_PLUS1, "Too many HDR parameters" );
-  CHECK( pcVPS->getMaxNuhReservedZeroLayerId() >= MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1, "Reserved zero layer id too big" );
-  READ_CODE( 6, uiCode, "vps_max_layer_id" );                        pcVPS->setMaxNuhReservedZeroLayerId( uiCode );
-  READ_UVLC(    uiCode, "vps_num_layer_sets_minus1" );               pcVPS->setMaxOpSets( uiCode + 1 );
-  for( uint32_t opsIdx = 1; opsIdx <= ( pcVPS->getMaxOpSets() - 1 ); opsIdx ++ )
+  READ_CODE(4, uiCode, "vps_video_parameter_set_id");         
+  CHECK( uiCode == 0, "vps_video_parameter_set_id equal to zero is reserved and shall not be used in a bitstream" );
+  pcVPS->setVPSId(uiCode);
+
+  READ_CODE(6, uiCode, "vps_max_layers_minus1");              pcVPS->setMaxLayers(uiCode + 1);    CHECK(uiCode + 1 > MAX_VPS_LAYERS, "Invalid code");
+  if (pcVPS->getMaxLayers() - 1 == 0)
+  {
+    pcVPS->setEachLayerIsAnOlsFlag(1);
+  }
+  READ_CODE(3, uiCode, "vps_max_sublayers_minus1");           pcVPS->setMaxSubLayers(uiCode + 1); CHECK(uiCode + 1 > MAX_VPS_SUBLAYERS, "Invalid code");
+  if( pcVPS->getMaxLayers() > 1 && pcVPS->getMaxSubLayers() > 1)
+  {
+    READ_FLAG(uiCode, "vps_all_layers_same_num_sublayers_flag"); pcVPS->setAllLayersSameNumSublayersFlag(uiCode);
+  }
+  else
+  {
+    pcVPS->setAllLayersSameNumSublayersFlag(1);
+  }
+  if( pcVPS->getMaxLayers() > 1 )
   {
-    // Operation point set
-    for( uint32_t i = 0; i <= pcVPS->getMaxNuhReservedZeroLayerId(); i ++ )
+    READ_FLAG(uiCode, "vps_all_independent_layers_flag");  pcVPS->setAllIndependentLayersFlag(uiCode);
+    if (pcVPS->getAllIndependentLayersFlag() == 0)
     {
-      READ_FLAG( uiCode, "layer_id_included_flag[opsIdx][i]" );   pcVPS->setLayerIdIncludedFlag( uiCode == 1 ? true : false, opsIdx, i );
+      pcVPS->setEachLayerIsAnOlsFlag(0);
     }
   }
-
-  TimingInfo *timingInfo = pcVPS->getTimingInfo();
-  READ_FLAG(       uiCode, "vps_timing_info_present_flag");         timingInfo->setTimingInfoPresentFlag      (uiCode ? true : false);
-  if(timingInfo->getTimingInfoPresentFlag())
+  for (uint32_t i = 0; i < pcVPS->getMaxLayers(); i++)
   {
-    READ_CODE( 32, uiCode, "vps_num_units_in_tick");                timingInfo->setNumUnitsInTick             (uiCode);
-    READ_CODE( 32, uiCode, "vps_time_scale");                       timingInfo->setTimeScale                  (uiCode);
-    READ_FLAG(     uiCode, "vps_poc_proportional_to_timing_flag");  timingInfo->setPocProportionalToTimingFlag(uiCode ? true : false);
-    if(timingInfo->getPocProportionalToTimingFlag())
+    READ_CODE(6, uiCode, "vps_layer_id");                     pcVPS->setLayerId(i, uiCode);
+    pcVPS->setGeneralLayerIdx(uiCode, i);
+
+    if (i > 0 && !pcVPS->getAllIndependentLayersFlag())
     {
-      READ_UVLC(   uiCode, "vps_num_ticks_poc_diff_one_minus1");    timingInfo->setNumTicksPocDiffOneMinus1   (uiCode);
+      READ_FLAG(uiCode, "vps_independent_layer_flag");     pcVPS->setIndependentLayerFlag(i, uiCode);
+      if (!pcVPS->getIndependentLayerFlag(i))
+      {
+        uint16_t sumUiCode = 0;
+        for (int j = 0, k = 0; j < i; j++)
+        {
+          READ_FLAG(uiCode, "vps_direct_dependency_flag"); pcVPS->setDirectRefLayerFlag(i, j, uiCode);
+          if( uiCode )
+          {
+            pcVPS->setInterLayerRefIdc( i, j, k );
+            pcVPS->setDirectRefLayerIdx( i, k++, j );
+            sumUiCode++;
+          }
+        }
+        CHECK(sumUiCode == 0, "There has to be at least one value of j such that the value of vps_direct_dependency_flag[ i ][ j ] is equal to 1,when vps_independent_layer_flag[ i ] is equal to 0 ");
+      }
     }
+  }
 
-    READ_UVLC( uiCode, "vps_num_hrd_parameters" );                  pcVPS->setNumHrdParameters( uiCode );
-
-    if( pcVPS->getNumHrdParameters() > 0 )
+  if (pcVPS->getMaxLayers() > 1)
+  {
+    if (pcVPS->getAllIndependentLayersFlag())
     {
-      pcVPS->createHrdParamBuffer();
+      READ_FLAG(uiCode, "vps_each_layer_is_an_ols_flag");  pcVPS->setEachLayerIsAnOlsFlag(uiCode);
+      if (pcVPS->getEachLayerIsAnOlsFlag() == 0)
+      {
+        pcVPS->setOlsModeIdc(2);
+      }
     }
-    for( uint32_t i = 0; i < pcVPS->getNumHrdParameters(); i ++ )
+    if (!pcVPS->getEachLayerIsAnOlsFlag())
     {
-      READ_UVLC( uiCode, "hrd_layer_set_idx[i]" );                  pcVPS->setHrdOpSetIdx( uiCode, i );
-      if( i > 0 )
+      if (!pcVPS->getAllIndependentLayersFlag())
       {
-        READ_FLAG( uiCode, "cprms_present_flag[i]" );               pcVPS->setCprmsPresentFlag( uiCode == 1 ? true : false, i );
+        READ_CODE(2, uiCode, "vps_ols_mode_idc");             pcVPS->setOlsModeIdc(uiCode); CHECK(uiCode > MAX_VPS_OLS_MODE_IDC, "Invalid code");
       }
-      else
+      if (pcVPS->getOlsModeIdc() == 2)
       {
-        pcVPS->setCprmsPresentFlag( true, i );
+        READ_CODE(8, uiCode, "num_output_layer_sets_minus1");   pcVPS->setNumOutputLayerSets(uiCode + 1);
+        for (uint32_t i = 1; i <= pcVPS->getNumOutputLayerSets() - 1; i++)
+        {
+          for (uint32_t j = 0; j < pcVPS->getMaxLayers(); j++)
+          {
+            READ_FLAG(uiCode, "vps_ols_output_layer_flag");        pcVPS->setOlsOutputLayerFlag(i, j, uiCode);
+          }
+        }
       }
-
-      parseHrdParameters(pcVPS->getHrdParameters(i), pcVPS->getCprmsPresentFlag( i ), pcVPS->getMaxTLayers() - 1);
     }
   }
-
-  READ_FLAG( uiCode,  "vps_extension_flag" );
+  READ_FLAG(uiCode, "vps_extension_flag");
   if (uiCode)
   {
-    while ( xMoreRbspData() )
+    while (xMoreRbspData())
     {
-      READ_FLAG( uiCode, "vps_extension_data_flag");
+      READ_FLAG(uiCode, "vps_extension_data_flag");
     }
   }
 
   xReadRbspTrailingBits();
 }
-#endif
 
-void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *parameterSetManager, const int prevTid0POC)
+void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManager *parameterSetManager )
 {
-  uint32_t  uiCode;
-  int   iCode;
-
+  uint32_t  uiCode; 
+  int       iCode;
+  PPS*      pps = NULL;
+  SPS*      sps = NULL;
+  
 #if ENABLE_TRACING
-  xTraceSliceHeader();
+  xTracePictureHeader();
 #endif
-  PPS* pps = NULL;
-  SPS* sps = NULL;
 
-  uint32_t firstSliceSegmentInPic;
-  READ_FLAG( firstSliceSegmentInPic, "first_slice_segment_in_pic_flag" );
-  if( pcSlice->getRapPicFlag())
+  READ_FLAG(uiCode, "non_reference_picture_flag");       picHeader->setNonReferencePictureFlag( uiCode != 0 );
+  READ_FLAG(uiCode, "gdr_pic_flag");                     picHeader->setGdrPicFlag( uiCode != 0 );
+  READ_FLAG(uiCode, "no_output_of_prior_pics_flag");     picHeader->setNoOutputOfPriorPicsFlag( uiCode != 0 );
+  if( picHeader->getGdrPicFlag() ) 
   {
-    READ_FLAG( uiCode, "no_output_of_prior_pics_flag" );  //ignored -- updated already
-    pcSlice->setNoOutputPriorPicsFlag(uiCode ? true : false);
+    READ_UVLC(uiCode, "recovery_poc_cnt");               picHeader->setRecoveryPocCnt( uiCode );
   }
-  READ_UVLC (    uiCode, "slice_pic_parameter_set_id" );  pcSlice->setPPSId(uiCode);
-  pps = parameterSetManager->getPPS(uiCode);
-  //!KS: need to add error handling code here, if PPS is not available
-  CHECK(pps==0, "Invalid PPS");
-  sps = parameterSetManager->getSPS(pps->getSPSId());
-  //!KS: need to add error handling code here, if SPS is not available
-  CHECK(sps==0, "Invalid SPS");
-
-  const ChromaFormat chFmt = sps->getChromaFormatIdc();
-  const uint32_t numValidComp=getNumberValidComponents(chFmt);
-  const bool bChroma=(chFmt!=CHROMA_400);
-
-#if HEVC_DEPENDENT_SLICES
-  if( pps->getDependentSliceSegmentsEnabledFlag() && ( !firstSliceSegmentInPic ))
+  else 
   {
-    READ_FLAG( uiCode, "dependent_slice_segment_flag" );       pcSlice->setDependentSliceSegmentFlag(uiCode ? true : false);
+    picHeader->setRecoveryPocCnt( 0 );
   }
-  else
+  
+  // parameter sets
+  READ_UVLC(uiCode, "ph_pic_parameter_set_id");
+  picHeader->setPPSId( uiCode );
+  pps = parameterSetManager->getPPS(picHeader->getPPSId());
+  CHECK(pps==0, "Invalid PPS");  
+  picHeader->setSPSId( pps->getSPSId() );
+  sps = parameterSetManager->getSPS(picHeader->getSPSId());
+  CHECK(sps==0, "Invalid SPS");
+  
+  // initialize tile/slice info for no partitioning case
+  if( pps->getNoPicPartitionFlag() )
   {
-    pcSlice->setDependentSliceSegmentFlag(false);
+    pps->resetTileSliceInfo();
+    pps->setLog2CtuSize( ceilLog2(sps->getCTUSize()) );
+    pps->setNumExpTileColumns(1);
+    pps->setNumExpTileRows(1);
+    pps->addTileColumnWidth( pps->getPicWidthInCtu( ) );
+    pps->addTileRowHeight( pps->getPicHeightInCtu( ) );
+    pps->initTiles();
+    pps->setRectSliceFlag( 1 );
+    pps->setNumSlicesInPic( 1 );
+    pps->initRectSlices( );
+    pps->setTileIdxDeltaPresentFlag( 0 );
+    pps->setSliceTileIdx( 0, 0 );
+    pps->initRectSliceMap( );
   }
-#endif
-  int numCTUs = ((sps->getPicWidthInLumaSamples()+sps->getMaxCUWidth()-1)/sps->getMaxCUWidth())*((sps->getPicHeightInLumaSamples()+sps->getMaxCUHeight()-1)/sps->getMaxCUHeight());
-  uint32_t sliceSegmentAddress = 0;
-  int bitsSliceSegmentAddress = 0;
-  while(numCTUs>(1<<bitsSliceSegmentAddress))
+  else 
   {
-    bitsSliceSegmentAddress++;
+    CHECK(pps->getCtuSize() != sps->getCTUSize(), "PPS CTU size does not match CTU size in SPS");
   }
 
-  if(!firstSliceSegmentInPic)
+  // sub-picture IDs
+  if( sps->getSubPicIdPresentFlag() ) 
   {
-    READ_CODE( bitsSliceSegmentAddress, sliceSegmentAddress, "slice_segment_address" );
+    if( sps->getSubPicIdSignallingPresentFlag() ) 
+    {
+      for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+      {
+        picHeader->setSubPicId( picIdx, sps->getSubPicId( picIdx ) );
+      }
+    }
+    else 
+    {
+      READ_FLAG(uiCode, "ph_subpic_id_signalling_present_flag");                 picHeader->setSubPicIdSignallingPresentFlag( uiCode != 0 );
+      if( picHeader->getSubPicIdSignallingPresentFlag() )
+      {
+        READ_UVLC( uiCode, "ph_subpic_id_len_minus1" );                          picHeader->setSubPicIdLen( uiCode + 1 );
+        CHECK( uiCode > 15, "Invalid ph_subpic_id_len_minus1 signalled");
+        for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+        {
+          READ_CODE( picHeader->getSubPicIdLen( ), uiCode, "ph_subpic_id[i]" );   picHeader->setSubPicId( picIdx, uiCode );
+        }
+      }
+      else 
+      {
+        for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+        {
+          picHeader->setSubPicId( picIdx, pps->getSubPicId( picIdx ) );
+        }
+      }
+    }
   }
-  //set uiCode to equal slice start address (or dependent slice start address)
-#if HEVC_DEPENDENT_SLICES
-  pcSlice->setSliceSegmentCurStartCtuTsAddr( sliceSegmentAddress );// this is actually a Raster-Scan (RS) address, but we do not have the RS->TS conversion table defined yet.
-  pcSlice->setSliceSegmentCurEndCtuTsAddr(numCTUs);                // Set end as the last CTU of the picture.
-
-  if (!pcSlice->getDependentSliceSegmentFlag())
+  else 
   {
-#endif
-    pcSlice->setSliceCurStartCtuTsAddr(sliceSegmentAddress); // this is actually a Raster-Scan (RS) address, but we do not have the RS->TS conversion table defined yet.
-    pcSlice->setSliceCurEndCtuTsAddr(numCTUs);
-#if HEVC_DEPENDENT_SLICES
+    for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+    {
+      picHeader->setSubPicId( picIdx, picIdx );
+    }
   }
 
-  if(!pcSlice->getDependentSliceSegmentFlag())
+  // virtual boundaries
+  if( !sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
   {
-#endif
-    for (int i = 0; i < pps->getNumExtraSliceHeaderBits(); i++)
+    READ_FLAG( uiCode, "ph_loop_filter_across_virtual_boundaries_disabled_present_flag" ); picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( uiCode != 0 );
+    if( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
     {
-      READ_FLAG(uiCode, "slice_reserved_flag[]"); // ignored
+      READ_CODE( 2, uiCode, "ph_num_ver_virtual_boundaries");        picHeader->setNumVerVirtualBoundaries( uiCode );
+      for( unsigned i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++ )
+      {
+        READ_CODE(13, uiCode, "ph_virtual_boundaries_pos_x");        picHeader->setVirtualBoundariesPosX(uiCode << 3, i);
+      }
+      READ_CODE( 2, uiCode, "ph_num_hor_virtual_boundaries");        picHeader->setNumHorVirtualBoundaries( uiCode );
+      for( unsigned i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++ )
+      {
+        READ_CODE(13, uiCode, "ph_virtual_boundaries_pos_y");        picHeader->setVirtualBoundariesPosY(uiCode << 3, i);
+      }
     }
-
-    READ_UVLC (    uiCode, "slice_type" );            pcSlice->setSliceType((SliceType)uiCode);
-    if( pps->getOutputFlagPresentFlag() )
+    else
     {
-      READ_FLAG( uiCode, "pic_output_flag" );    pcSlice->setPicOutputFlag( uiCode ? true : false );
+      picHeader->setNumVerVirtualBoundaries( 0 );
+      picHeader->setNumHorVirtualBoundaries( 0 );
     }
-    else
+  }
+  else
+  {
+    picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() );
+    picHeader->setNumVerVirtualBoundaries( sps->getNumVerVirtualBoundaries() );
+    picHeader->setNumHorVirtualBoundaries( sps->getNumHorVirtualBoundaries() );
+    for( unsigned i = 0; i < 3; i++ ) 
     {
-      pcSlice->setPicOutputFlag( true );
+      picHeader->setVirtualBoundariesPosX( sps->getVirtualBoundariesPosX(i), i );
+      picHeader->setVirtualBoundariesPosY( sps->getVirtualBoundariesPosY(i), i );
     }
+  }
+  
+  // 4:4:4 colour plane ID
+  if( sps->getSeparateColourPlaneFlag() )
+  {
+    READ_CODE( 2, uiCode, "colour_plane_id" ); picHeader->setColourPlaneId( uiCode );
+    CHECK(uiCode > 2, "colour_plane_id exceeds valid range");
+  }
+  else 
+  {
+    picHeader->setColourPlaneId( 0 );
+  }
 
-    // if (separate_colour_plane_flag == 1)
-    //   read colour_plane_id
-    //   (separate_colour_plane_flag == 1) is not supported in this version of the standard.
+  // picture output flag
+  if( pps->getOutputFlagPresentFlag() )
+  {
+    READ_FLAG( uiCode, "pic_output_flag" ); picHeader->setPicOutputFlag( uiCode != 0 );
+  }
+  else 
+  {
+    picHeader->setPicOutputFlag( true );
+  }
 
-    if( pcSlice->getIdrPicFlag() )
-    {
-      READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb");
-      pcSlice->setPOC(uiCode);
-      ReferencePictureSet* rps = pcSlice->getLocalRPS();
-      (*rps)=ReferencePictureSet();
-      pcSlice->setRPS(rps);
-    }
-    else
-    {
-      READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb");
-      int iPOClsb = uiCode;
-      int iPrevPOC = prevTid0POC;
-      int iMaxPOClsb = 1<< sps->getBitsForPOC();
-      int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1);
-      int iPrevPOCmsb = iPrevPOC-iPrevPOClsb;
-      int iPOCmsb;
-      if( ( iPOClsb  <  iPrevPOClsb ) && ( ( iPrevPOClsb - iPOClsb )  >=  ( iMaxPOClsb / 2 ) ) )
+  // reference picture lists
+  READ_FLAG( uiCode, "pic_rpl_present_flag" ); picHeader->setPicRplPresentFlag( uiCode != 0 );
+  if( picHeader->getPicRplPresentFlag() )
+  {
+    // List0 and List1
+    for(int listIdx = 0; listIdx < 2; listIdx++) 
+    {                 
+      // copy L1 index from L0 index
+      if (listIdx == 1 && !pps->getRpl1IdxPresentFlag())
       {
-        iPOCmsb = iPrevPOCmsb + iMaxPOClsb;
+        picHeader->setRPL1idx(picHeader->getRPL0idx());
+        uiCode = (picHeader->getRPL0idx() != -1);
       }
-      else if( (iPOClsb  >  iPrevPOClsb )  && ( (iPOClsb - iPrevPOClsb )  >  ( iMaxPOClsb / 2 ) ) )
+      // RPL in picture header or SPS
+      else if (sps->getNumRPL( listIdx ) == 0)
       {
-        iPOCmsb = iPrevPOCmsb - iMaxPOClsb;
+        uiCode = 0;
       }
-      else
+      else if (!pps->getPPSRefPicListSPSIdc( listIdx ))
       {
-        iPOCmsb = iPrevPOCmsb;
+        READ_FLAG(uiCode, "pic_rpl_sps_flag[i]");
       }
-#if !JVET_M0101_HLS
-      if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-           || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-           || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP )
+      else
       {
-        // For BLA picture types, POCmsb is set to 0.
-        iPOCmsb = 0;
+        uiCode = pps->getPPSRefPicListSPSIdc( listIdx ) - 1;
       }
-#endif
-      pcSlice->setPOC              (iPOCmsb+iPOClsb);
-
-      ReferencePictureSet* rps;
-      rps = pcSlice->getLocalRPS();
-      (*rps)=ReferencePictureSet();
 
-      pcSlice->setRPS(rps);
-      READ_FLAG( uiCode, "short_term_ref_pic_set_sps_flag" );
-      if(uiCode == 0) // use short-term reference picture set explicitly signalled in slice header
+      // explicit RPL in picture header
+      if (!uiCode)
       {
-        parseShortTermRefPicSet(sps,rps, sps->getRPSList()->getNumberOfReferencePictureSets());
+        ReferencePictureList* rpl = picHeader->getLocalRPL( listIdx );
+        (*rpl) = ReferencePictureList();
+        parseRefPicList(sps, rpl);
+        picHeader->setRPLIdx(listIdx, -1);
+        picHeader->setRPL(listIdx, rpl);
       }
-      else // use reference to short-term reference picture set in PPS
-      {
-        int numBits = 0;
-        while ((1 << numBits) < sps->getRPSList()->getNumberOfReferencePictureSets())
+      // use list from SPS
+      else 
+      { 
+        if (listIdx == 1 && !pps->getRpl1IdxPresentFlag())
         {
-          numBits++;
+          picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(picHeader->getRPLIdx( listIdx )));
         }
-        if (numBits > 0)
+        else if (sps->getNumRPL( listIdx ) > 1)
         {
-          READ_CODE( numBits, uiCode, "short_term_ref_pic_set_idx");
+          int numBits = ceilLog2(sps->getNumRPL( listIdx ));
+          READ_CODE(numBits, uiCode, "pic_rpl_idx[i]");
+          picHeader->setRPLIdx( listIdx, uiCode );
+          picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(uiCode));
         }
         else
         {
-          uiCode = 0;
-
+          picHeader->setRPLIdx( listIdx, 0 );
+          picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(0));
         }
-        *rps = *(sps->getRPSList()->getReferencePictureSet(uiCode));
       }
-      if(sps->getLongTermRefsPresent())
+
+      // POC MSB cycle signalling for LTRP
+      for (int i = 0; i < picHeader->getRPL( listIdx )->getNumberOfLongtermPictures() + picHeader->getRPL( listIdx )->getNumberOfShorttermPictures(); i++)
+      {
+        picHeader->getLocalRPL( listIdx )->setDeltaPocMSBPresentFlag(i, false);
+        picHeader->getLocalRPL( listIdx )->setDeltaPocMSBCycleLT(i, 0);
+      }
+      if (picHeader->getRPL( listIdx )->getNumberOfLongtermPictures())
       {
-        int offset = rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures();
-        uint32_t numOfLtrp = 0;
-        uint32_t numLtrpInSPS = 0;
-        if (sps->getNumLongTermRefPicSPS() > 0)
+        for (int i = 0; i < picHeader->getRPL( listIdx )->getNumberOfLongtermPictures() + picHeader->getRPL( listIdx )->getNumberOfShorttermPictures(); i++)
         {
-          READ_UVLC( uiCode, "num_long_term_sps");
-          numLtrpInSPS = uiCode;
-          numOfLtrp += numLtrpInSPS;
-          rps->setNumberOfLongtermPictures(numOfLtrp);
-        }
-        int bitsForLtrpInSPS = 0;
-        while (sps->getNumLongTermRefPicSPS() > (1 << bitsForLtrpInSPS))
-        {
-          bitsForLtrpInSPS++;
-        }
-        READ_UVLC( uiCode, "num_long_term_pics");             rps->setNumberOfLongtermPictures(uiCode);
-        numOfLtrp += uiCode;
-        rps->setNumberOfLongtermPictures(numOfLtrp);
-        int maxPicOrderCntLSB = 1 << sps->getBitsForPOC();
-        int prevDeltaMSB = 0, deltaPocMSBCycleLT = 0;
-        for(int j=offset+rps->getNumberOfLongtermPictures()-1, k = 0; k < numOfLtrp; j--, k++)
-        {
-          int pocLsbLt;
-          if (k < numLtrpInSPS)
+          if (picHeader->getRPL( listIdx )->isRefPicLongterm(i))
           {
-            uiCode = 0;
-            if (bitsForLtrpInSPS > 0)
+            if (picHeader->getRPL( listIdx )->getLtrpInSliceHeaderFlag())
             {
-              READ_CODE(bitsForLtrpInSPS, uiCode, "lt_idx_sps[i]");
+              READ_CODE(sps->getBitsForPOC(), uiCode, "pic_poc_lsb_lt[i][j]");
+              picHeader->getLocalRPL( listIdx )->setRefPicIdentifier( i, uiCode, true, false, 0 );
             }
-            bool usedByCurrFromSPS=sps->getUsedByCurrPicLtSPSFlag(uiCode);
-
-            pocLsbLt = sps->getLtRefPicPocLsbSps(uiCode);
-            rps->setUsed(j,usedByCurrFromSPS);
-          }
-          else
-          {
-            READ_CODE(sps->getBitsForPOC(), uiCode, "poc_lsb_lt"); pocLsbLt= uiCode;
-            READ_FLAG( uiCode, "used_by_curr_pic_lt_flag");     rps->setUsed(j,uiCode);
-          }
-          READ_FLAG(uiCode,"delta_poc_msb_present_flag");
-          bool mSBPresentFlag = uiCode ? true : false;
-          if(mSBPresentFlag)
-          {
-            READ_UVLC( uiCode, "delta_poc_msb_cycle_lt[i]" );
-            bool deltaFlag = false;
-            //            First LTRP                               || First LTRP from SH
-            if( (j == offset+rps->getNumberOfLongtermPictures()-1) || (j == offset+(numOfLtrp-numLtrpInSPS)-1) )
+            READ_FLAG(uiCode, "pic_delta_poc_msb_present_flag[i][j]");
+            picHeader->getLocalRPL( listIdx )->setDeltaPocMSBPresentFlag(i, uiCode ? true : false);
+            if (uiCode)
             {
-              deltaFlag = true;
+              READ_UVLC(uiCode, "pic_delta_poc_msb_cycle_lt[i][j]");
+              picHeader->getLocalRPL( listIdx )->setDeltaPocMSBCycleLT(i, uiCode);
             }
-            if(deltaFlag)
-            {
-              deltaPocMSBCycleLT = uiCode;
-            }
-            else
-            {
-              deltaPocMSBCycleLT = uiCode + prevDeltaMSB;
-            }
-
-            int pocLTCurr = pcSlice->getPOC() - deltaPocMSBCycleLT * maxPicOrderCntLSB
-              - iPOClsb + pocLsbLt;
-            rps->setPOC     (j, pocLTCurr);
-            rps->setDeltaPOC(j, - pcSlice->getPOC() + pocLTCurr);
-            rps->setCheckLTMSBPresent(j,true);
           }
-          else
-          {
-            rps->setPOC     (j, pocLsbLt);
-            rps->setDeltaPOC(j, - pcSlice->getPOC() + pocLsbLt);
-            rps->setCheckLTMSBPresent(j,false);
+        }
+      }
+    }
+  }
 
-            // reset deltaPocMSBCycleLT for first LTRP from slice header if MSB not present
-            if( j == offset+(numOfLtrp-numLtrpInSPS)-1 )
-            {
-              deltaPocMSBCycleLT = 0;
-            }
-          }
-          prevDeltaMSB = deltaPocMSBCycleLT;
+  // partitioning constraint overrides
+  if (sps->getSplitConsOverrideEnabledFlag())
+  {
+    READ_FLAG(uiCode, "partition_constraints_override_flag");  picHeader->setSplitConsOverrideFlag( uiCode != 0 );
+    if (picHeader->getSplitConsOverrideFlag())
+    {
+      unsigned  minQT[3]     = { 0, 0, 0 };
+      unsigned  maxBTD[3]    = { 0, 0, 0 };
+      unsigned  maxBTSize[3] = { 0, 0, 0 };
+      unsigned  maxTTSize[3] = { 0, 0, 0 };
+      unsigned  ctbLog2SizeY = floorLog2(sps->getCTUSize());
+
+      READ_UVLC(uiCode, "pic_log2_diff_min_qt_min_cb_intra_slice_luma");
+      unsigned minQtLog2SizeIntraY = uiCode + sps->getLog2MinCodingBlockSize();
+      minQT[0] = 1 << minQtLog2SizeIntraY;
+      READ_UVLC(uiCode, "pic_log2_diff_min_qt_min_cb_inter_slice");
+      unsigned minQtLog2SizeInterY = uiCode + sps->getLog2MinCodingBlockSize();
+      minQT[1] = 1 << minQtLog2SizeInterY;
+      READ_UVLC(uiCode, "pic_max_mtt_hierarchy_depth_inter_slice");              maxBTD[1] = uiCode;
+      READ_UVLC(uiCode, "pic_max_mtt_hierarchy_depth_intra_slice_luma");         maxBTD[0] = uiCode;
+
+      maxTTSize[0] = maxBTSize[0] = minQT[0];
+      if (maxBTD[0] != 0)
+      {
+        READ_UVLC(uiCode, "pic_log2_diff_max_bt_min_qt_intra_slice_luma");       maxBTSize[0] <<= uiCode;
+        CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code");
+        READ_UVLC(uiCode, "pic_log2_diff_max_tt_min_qt_intra_slice_luma");       maxTTSize[0] <<= uiCode;
+        CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeIntraY, "Invalid code");
+      }
+      maxTTSize[1] = maxBTSize[1] = minQT[1];
+      if (maxBTD[1] != 0)
+      {
+        READ_UVLC(uiCode, "pic_log2_diff_max_bt_min_qt_inter_slice");            maxBTSize[1] <<= uiCode;
+        CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code");
+        READ_UVLC(uiCode, "pic_log2_diff_max_tt_min_qt_inter_slice");            maxTTSize[1] <<= uiCode;
+        CHECK(uiCode > ctbLog2SizeY - minQtLog2SizeInterY, "Invalid code");
+      }
+      if (sps->getUseDualITree())
+      {
+        READ_UVLC(uiCode, "pic_log2_diff_min_qt_min_cb_intra_slice_chroma");     minQT[2] = 1 << (uiCode + sps->getLog2MinCodingBlockSize());
+        READ_UVLC(uiCode, "pic_max_mtt_hierarchy_depth_intra_slice_chroma");     maxBTD[2] = uiCode;
+        maxTTSize[2] = maxBTSize[2] = minQT[2];
+        if (maxBTD[2] != 0)
+        {
+          READ_UVLC(uiCode, "pic_log2_diff_max_bt_min_qt_intra_slice_chroma");   maxBTSize[2] <<= uiCode;
+          READ_UVLC(uiCode, "pic_log2_diff_max_tt_min_qt_intra_slice_chroma");   maxTTSize[2] <<= uiCode;
         }
-        offset += rps->getNumberOfLongtermPictures();
-        rps->setNumberOfPictures(offset);
       }
-#if !JVET_M0101_HLS
-      if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-           || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-           || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP )
+
+      picHeader->setMinQTSizes(minQT);
+      picHeader->setMaxMTTHierarchyDepths(maxBTD);
+      picHeader->setMaxBTSizes(maxBTSize);
+      picHeader->setMaxTTSizes(maxTTSize);
+    }
+  }
+  else
+  {
+    picHeader->setSplitConsOverrideFlag(0);
+  }
+
+  // inherit constraint values from SPS
+  if (!sps->getSplitConsOverrideEnabledFlag() || !picHeader->getSplitConsOverrideFlag()) 
+  {
+    picHeader->setMinQTSizes(sps->getMinQTSizes());
+    picHeader->setMaxMTTHierarchyDepths(sps->getMaxMTTHierarchyDepths());
+    picHeader->setMaxBTSizes(sps->getMaxBTSizes());
+    picHeader->setMaxTTSizes(sps->getMaxTTSizes());
+  }
+
+  // delta quantization and chrom and chroma offset
+  if (pps->getUseDQP())
+  {
+    READ_UVLC( uiCode, "pic_cu_qp_delta_subdiv_intra_slice" );   picHeader->setCuQpDeltaSubdivIntra( uiCode );
+    READ_UVLC( uiCode, "pic_cu_qp_delta_subdiv_inter_slice" );   picHeader->setCuQpDeltaSubdivInter( uiCode );
+  }
+  else 
+  {
+    picHeader->setCuQpDeltaSubdivIntra( 0 );
+    picHeader->setCuQpDeltaSubdivInter( 0 );
+  }
+  if (pps->getCuChromaQpOffsetEnabledFlag())
+  {
+    READ_UVLC( uiCode, "pic_cu_chroma_qp_offset_subdiv_intra_slice" );   picHeader->setCuChromaQpOffsetSubdivIntra( uiCode );
+    READ_UVLC( uiCode, "pic_cu_chroma_qp_offset_subdiv_inter_slice" );   picHeader->setCuChromaQpOffsetSubdivInter( uiCode );
+  }
+  else 
+  {
+    picHeader->setCuChromaQpOffsetSubdivIntra( 0 );
+    picHeader->setCuChromaQpOffsetSubdivInter( 0 );
+  }
+  
+  // temporal motion vector prediction
+  if (sps->getSPSTemporalMVPEnabledFlag())
+  {
+    READ_FLAG( uiCode, "pic_temporal_mvp_enabled_flag" );
+    picHeader->setEnableTMVPFlag( uiCode != 0 );
+  }
+  else
+  {
+    picHeader->setEnableTMVPFlag(false);
+  }
+
+  // mvd L1 zero flag
+  if (!pps->getPPSMvdL1ZeroIdc())
+  {
+    READ_FLAG(uiCode, "pic_mvd_l1_zero_flag");
+  }
+  else
+  {
+    uiCode = pps->getPPSMvdL1ZeroIdc() - 1;
+  }
+  picHeader->setMvdL1ZeroFlag( uiCode != 0 );
+     
+  // merge candidate list size
+  if (!pps->getPPSSixMinusMaxNumMergeCandPlus1())
+  {
+    READ_UVLC(uiCode, "pic_six_minus_max_num_merge_cand");
+  }
+  else
+  {
+    uiCode = pps->getPPSSixMinusMaxNumMergeCandPlus1() - 1;
+  }
+  CHECK(MRG_MAX_NUM_CANDS <= uiCode, "Incorrrect max number of merge candidates!");
+  picHeader->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - uiCode);
+
+  // subblock merge candidate list size
+  if ( sps->getUseAffine() )
+  {
+    READ_UVLC(uiCode, "pic_five_minus_max_num_subblock_merge_cand");
+    CHECK(AFFINE_MRG_MAX_NUM_CANDS < uiCode, "Incorrrect max number of affine merge candidates!");
+    picHeader->setMaxNumAffineMergeCand( AFFINE_MRG_MAX_NUM_CANDS - uiCode );
+  }
+  else
+  {
+    picHeader->setMaxNumAffineMergeCand( sps->getSBTMVPEnabledFlag() && picHeader->getEnableTMVPFlag() );
+  }
+
+  // full-pel MMVD flag
+  if (sps->getFpelMmvdEnabledFlag())
+  {
+    READ_FLAG( uiCode, "pic_fpel_mmvd_enabled_flag" );
+    picHeader->setDisFracMMVD( uiCode != 0 );
+  }
+  else
+  {
+    picHeader->setDisFracMMVD(false);
+  }
+  
+  // picture level BDOF disable flags
+  if (sps->getBdofControlPresentFlag())
+  {
+    READ_FLAG(uiCode, "pic_disable_bdof_flag");  picHeader->setDisBdofFlag(uiCode != 0);
+  }
+  else
+  {
+    picHeader->setDisBdofFlag(0);
+  }
+
+  // picture level DMVR disable flags
+  if (sps->getDmvrControlPresentFlag())
+  {
+    READ_FLAG(uiCode, "pic_disable_dmvr_flag");  picHeader->setDisDmvrFlag(uiCode != 0);
+  }
+  else
+  {
+    picHeader->setDisDmvrFlag(0);
+  }
+
+  // picture level PROF disable flags
+  if (sps->getProfControlPresentFlag())
+  {
+    READ_FLAG(uiCode, "pic_disable_prof_flag");  picHeader->setDisProfFlag(uiCode != 0);
+  }
+  else
+  {
+    picHeader->setDisProfFlag(0);
+  }
+
+  // triangle merge candidate list size
+  if (sps->getUseTriangle() && picHeader->getMaxNumMergeCand() >= 2)
+  {
+    if (!pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1())
+    {
+      READ_UVLC(uiCode, "pic_max_num_merge_cand_minus_max_num_triangle_cand");
+    }
+    else
+    {
+      uiCode = pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1() - 1;
+    }
+    CHECK(picHeader->getMaxNumMergeCand() < uiCode, "Incorrrect max number of triangle candidates!");
+    picHeader->setMaxNumTriangleCand((uint32_t)(picHeader->getMaxNumMergeCand() - uiCode));
+  }
+  else
+  {
+    picHeader->setMaxNumTriangleCand(0);
+  }
+
+  // ibc merge candidate list size
+  if (sps->getIBCFlag())
+  {
+    READ_UVLC(uiCode, "pic_six_minus_max_num_ibc_merge_cand");
+    CHECK(IBC_MRG_MAX_NUM_CANDS <= uiCode, "Incorrrect max number of IBC merge candidates!");
+    picHeader->setMaxNumIBCMergeCand(IBC_MRG_MAX_NUM_CANDS - uiCode);
+  }
+  else 
+   {
+    picHeader->setMaxNumIBCMergeCand(0);
+  }
+
+  // joint Cb/Cr sign flag
+  if (sps->getJointCbCrEnabledFlag())
+  {
+    READ_FLAG( uiCode, "pic_joint_cbcr_sign_flag" ); 
+    picHeader->setJointCbCrSignFlag(uiCode != 0);
+  }
+  else
+  {
+    picHeader->setJointCbCrSignFlag(false);
+  }
+
+  // sao enable flags
+  if(sps->getSAOEnabledFlag())
+  {
+    READ_FLAG(uiCode, "pic_sao_enabled_present_flag");  
+    picHeader->setSaoEnabledPresentFlag(uiCode != 0);
+
+    if (picHeader->getSaoEnabledPresentFlag())
+    {    
+      READ_FLAG(uiCode, "slice_sao_luma_flag");  
+      picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, uiCode != 0);
+
+      if (sps->getChromaFormatIdc() != CHROMA_400)
       {
-        // In the case of BLA picture types, rps data is read from slice header but ignored
-        rps = pcSlice->getLocalRPS();
-        (*rps)=ReferencePictureSet();
-        pcSlice->setRPS(rps);
+        READ_FLAG(uiCode, "slice_sao_chroma_flag");  
+        picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, uiCode != 0);
       }
-#endif
-      if (sps->getSPSTemporalMVPEnabledFlag())
+    }
+    else 
+    {
+      picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA,   true);
+      picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, sps->getChromaFormatIdc() != CHROMA_400);
+    }
+  }
+  else 
+  {
+    picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA,   false);
+    picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false);
+  }
+  
+  // alf enable flags and aps IDs
+  if( sps->getALFEnabledFlag() )
+  {
+    READ_FLAG(uiCode, "pic_alf_enabled_present_flag");  
+    picHeader->setAlfEnabledPresentFlag(uiCode != 0);
+
+    if (picHeader->getAlfEnabledPresentFlag()) 
+    {
+      READ_FLAG(uiCode, "pic_alf_enabled_flag");
+      picHeader->setAlfEnabledFlag(COMPONENT_Y, uiCode);
+
+      int alfChromaIdc = 0;
+      if (uiCode)
       {
-        READ_FLAG( uiCode, "slice_temporal_mvp_enabled_flag" );
-        pcSlice->setEnableTMVPFlag( uiCode == 1 ? true : false );
+        READ_CODE(3, uiCode, "pic_num_alf_aps_ids_luma");
+        int numAps = uiCode;
+        picHeader->setNumAlfAps(numAps);
+
+        std::vector<int> apsId(numAps, -1);
+        for (int i = 0; i < numAps; i++)
+        {
+          READ_CODE(3, uiCode, "pic_alf_aps_id_luma");
+          apsId[i] = uiCode;
+        }
+        picHeader->setAlfAPSs(apsId);
+
+        if (sps->getChromaFormatIdc() != CHROMA_400)
+        {
+          READ_CODE(2, uiCode, "pic_alf_chroma_idc");
+          alfChromaIdc = uiCode;
+        }
+        else
+        {
+          alfChromaIdc = 0;
+        }
+        if (alfChromaIdc)
+        {
+          READ_CODE(3, uiCode, "pic_alf_aps_id_chroma");
+          picHeader->setAlfApsIdChroma(uiCode);
+        }
       }
       else
       {
-        pcSlice->setEnableTMVPFlag(false);
+        picHeader->setNumAlfAps(0);
       }
+      picHeader->setAlfEnabledFlag(COMPONENT_Cb, alfChromaIdc & 1);
+      picHeader->setAlfEnabledFlag(COMPONENT_Cr, alfChromaIdc >> 1);
     }
-    if(sps->getSAOEnabledFlag())
+    else 
     {
-      READ_FLAG(uiCode, "slice_sao_luma_flag");  pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, (bool)uiCode);
+      picHeader->setAlfEnabledFlag(COMPONENT_Y,  true);
+      picHeader->setAlfEnabledFlag(COMPONENT_Cb, true);
+      picHeader->setAlfEnabledFlag(COMPONENT_Cr, true);
+    }
+  }
+  else 
+  {
+    picHeader->setAlfEnabledFlag(COMPONENT_Y,  false);
+    picHeader->setAlfEnabledFlag(COMPONENT_Cb, false);
+    picHeader->setAlfEnabledFlag(COMPONENT_Cr, false);
+  }
 
-      if (bChroma)
+  // dependent quantization
+  if (!pps->getPPSDepQuantEnabledIdc())
+  {
+    READ_FLAG(uiCode, "pic_dep_quant_enabled_flag");
+  }
+  else
+  {
+    uiCode = pps->getPPSDepQuantEnabledIdc() - 1;
+  }
+  picHeader->setDepQuantEnabledFlag( uiCode != 0 );
+
+  // sign data hiding
+  if( !picHeader->getDepQuantEnabledFlag() )
+  {
+    READ_FLAG( uiCode, "pic_sign_data_hiding_enabled_flag" );
+    picHeader->setSignDataHidingEnabledFlag( uiCode != 0 );
+  }
+  else
+  {
+    picHeader->setSignDataHidingEnabledFlag(false);
+  }
+
+  // deblocking filter controls
+  if (pps->getDeblockingFilterControlPresentFlag())
+  {
+    if(pps->getDeblockingFilterOverrideEnabledFlag())
+    {
+      READ_FLAG ( uiCode, "pic_deblocking_filter_override_present_flag" );
+      picHeader->setDeblockingFilterOverridePresentFlag(uiCode != 0);
+    
+      if( picHeader->getDeblockingFilterOverridePresentFlag() ) 
       {
-        READ_FLAG(uiCode, "slice_sao_chroma_flag");  pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, (bool)uiCode);
+        READ_FLAG ( uiCode, "pic_deblocking_filter_override_flag" );
+        picHeader->setDeblockingFilterOverrideFlag(uiCode != 0);
       }
+      else
+      {
+        picHeader->setDeblockingFilterOverrideFlag(false);
+      }
+    }
+    else
+    {
+      picHeader->setDeblockingFilterOverridePresentFlag(false);
+      picHeader->setDeblockingFilterOverrideFlag(false);
     }
 
-    if( sps->getALFEnabledFlag() )
+    if(picHeader->getDeblockingFilterOverrideFlag())
     {
-      READ_FLAG(uiCode, "tile_group_alf_enabled_flag");
-      if (uiCode)
+      READ_FLAG ( uiCode, "pic_deblocking_filter_disabled_flag" );
+      picHeader->setDeblockingFilterDisable(uiCode != 0);
+      if(!picHeader->getDeblockingFilterDisable())
       {
-        READ_CODE(5, uiCode, "tile_group_aps_id");
-        pcSlice->setAPSId(uiCode);
-        pcSlice->setAPS(parameterSetManager->getAPS(uiCode));
-        pcSlice->setTileGroupAlfEnabledFlag(true);
+        READ_SVLC( iCode, "pic_beta_offset_div2" );
+        picHeader->setDeblockingFilterBetaOffsetDiv2(iCode);
+        CHECK(  picHeader->getDeblockingFilterBetaOffsetDiv2() < -6 &&
+                picHeader->getDeblockingFilterBetaOffsetDiv2() >  6, "Invalid deblocking filter configuration");
+
+        READ_SVLC( iCode, "pic_tc_offset_div2" );
+        picHeader->setDeblockingFilterTcOffsetDiv2(iCode);
+        CHECK  (picHeader->getDeblockingFilterTcOffsetDiv2() < -6 &&
+                picHeader->getDeblockingFilterTcOffsetDiv2() >  6, "Invalid deblocking filter configuration");
+      }
+    }
+    else
+    {
+      picHeader->setDeblockingFilterDisable       ( pps->getPPSDeblockingFilterDisabledFlag() );
+      picHeader->setDeblockingFilterBetaOffsetDiv2( pps->getDeblockingFilterBetaOffsetDiv2() );
+      picHeader->setDeblockingFilterTcOffsetDiv2  ( pps->getDeblockingFilterTcOffsetDiv2() );
+    }
+  }
+  else
+  {
+    picHeader->setDeblockingFilterDisable       ( false );
+    picHeader->setDeblockingFilterBetaOffsetDiv2( 0 );
+    picHeader->setDeblockingFilterTcOffsetDiv2  ( 0 );
+  }
+
+  // luma mapping / chroma scaling controls
+  if (sps->getUseLmcs())
+  {
+    READ_FLAG(uiCode, "pic_lmcs_enabled_flag");
+    picHeader->setLmcsEnabledFlag(uiCode != 0);
+
+    if (picHeader->getLmcsEnabledFlag())
+    {
+      READ_CODE(2, uiCode, "pic_lmcs_aps_id");
+      picHeader->setLmcsAPSId(uiCode);
+
+      if (sps->getChromaFormatIdc() != CHROMA_400)
+      {
+        READ_FLAG(uiCode, "pic_chroma_residual_scale_flag");
+        picHeader->setLmcsChromaResidualScaleFlag(uiCode != 0);
       }
       else
       {
-        pcSlice->setTileGroupAlfEnabledFlag(false);
-        pcSlice->setAPSId(-1);
-        pcSlice->setAPS(nullptr);
+        picHeader->setLmcsChromaResidualScaleFlag(false);
       }
     }
+  }
+  else
+  {
+    picHeader->setLmcsEnabledFlag(false);
+    picHeader->setLmcsChromaResidualScaleFlag(false);
+  }
 
-    if (pcSlice->getIdrPicFlag())
+  // quantization scaling lists
+  if( sps->getScalingListFlag() )
+  {
+    READ_FLAG( uiCode, "pic_scaling_list_present_flag" );
+    picHeader->setScalingListPresentFlag( uiCode );
+    if( picHeader->getScalingListPresentFlag() )
     {
-      pcSlice->setEnableTMVPFlag(false);
+      READ_CODE( 3, uiCode, "pic_scaling_list_aps_id" );
+      picHeader->setScalingListAPSId( uiCode );
     }
-    if (!pcSlice->isIntra())
+  }
+  else 
+  {
+    picHeader->setScalingListPresentFlag( false );
+  }
+
+  // picture header extension
+  if(pps->getPictureHeaderExtensionPresentFlag())
+  {
+    READ_UVLC(uiCode,"pic_segment_header_extension_length");
+    for(int i=0; i<uiCode; i++)
     {
+      uint32_t ignore_;
+      READ_CODE(8,ignore_,"pic_segment_header_extension_data_byte");
+    }
+  }
 
-      READ_FLAG( uiCode, "num_ref_idx_active_override_flag");
-      if (uiCode)
+  xReadRbspTrailingBits();
+}
+
+void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC)
+{
+  uint32_t  uiCode;
+  int   iCode;
+
+#if ENABLE_TRACING
+  xTraceSliceHeader();
+#endif
+  PPS* pps = NULL;
+  SPS* sps = NULL;
+
+  CHECK(picHeader==0, "Invalid Picture Header");
+  CHECK(picHeader->isValid()==false, "Invalid Picture Header");
+  pps = parameterSetManager->getPPS( picHeader->getPPSId() );
+  //!KS: need to add error handling code here, if PPS is not available
+  CHECK(pps==0, "Invalid PPS");
+  sps = parameterSetManager->getSPS(pps->getSPSId());
+  //!KS: need to add error handling code here, if SPS is not available
+  CHECK(sps==0, "Invalid SPS");
+
+  const ChromaFormat chFmt = sps->getChromaFormatIdc();
+  const uint32_t numValidComp=getNumberValidComponents(chFmt);
+  const bool bChroma=(chFmt!=CHROMA_400);
+
+  // picture order count
+  READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb");
+  if (pcSlice->getIdrPicFlag())
+  {
+    pcSlice->setPOC(uiCode);
+  }
+  else
+  {
+    int iPOClsb = uiCode;
+    int iPrevPOC = prevTid0POC;
+    int iMaxPOClsb = 1 << sps->getBitsForPOC();
+    int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1);
+    int iPrevPOCmsb = iPrevPOC - iPrevPOClsb;
+    int iPOCmsb;
+    if ((iPOClsb  <  iPrevPOClsb) && ((iPrevPOClsb - iPOClsb) >= (iMaxPOClsb / 2)))
+    {
+      iPOCmsb = iPrevPOCmsb + iMaxPOClsb;
+    }
+    else if ((iPOClsb  >  iPrevPOClsb) && ((iPOClsb - iPrevPOClsb)  >  (iMaxPOClsb / 2)))
+    {
+      iPOCmsb = iPrevPOCmsb - iMaxPOClsb;
+    }
+    else
+    {
+      iPOCmsb = iPrevPOCmsb;
+    }
+    pcSlice->setPOC(iPOCmsb + iPOClsb);
+  }
+
+  if (sps->getSubPicPresentFlag())
+  {
+    uint32_t bitsSubPicId;
+    if (sps->getSubPicIdSignallingPresentFlag())
+    {
+      bitsSubPicId = sps->getSubPicIdLen();
+    }
+    else if (picHeader->getSubPicIdSignallingPresentFlag())
+    {
+      bitsSubPicId = picHeader->getSubPicIdLen();
+    }
+    else if (pps->getSubPicIdSignallingPresentFlag())
+    {
+      bitsSubPicId = pps->getSubPicIdLen();
+    }
+    else
+    {
+      bitsSubPicId = ceilLog2(sps->getNumSubPics());
+    }
+    READ_CODE(bitsSubPicId, uiCode, "slice_subpic_id");    pcSlice->setSliceSubPicId(uiCode);
+  }
+
+  // raster scan slices
+  if(pps->getRectSliceFlag() == 0) 
+  {
+    uint32_t sliceAddr, numTilesInSlice;
+
+    // slice address is the raster scan tile index of first tile in slice
+    if( pps->getNumTiles() > 1 ) 
+    {      
+      int bitsSliceAddress = ceilLog2(pps->getNumTiles());
+      READ_CODE(bitsSliceAddress, uiCode, "slice_address");  sliceAddr = uiCode;
+      READ_UVLC(uiCode, "num_tiles_in_slice_minus1");        numTilesInSlice = uiCode + 1;      
+    }
+    else {
+      sliceAddr = 0;
+      numTilesInSlice = 1;
+    }
+    CHECK(sliceAddr >= pps->getNumTiles(), "Invalid slice address");
+    pcSlice->initSliceMap();
+    pcSlice->setSliceID(sliceAddr);
+    
+    for( uint32_t tileIdx = sliceAddr; tileIdx < sliceAddr + numTilesInSlice; tileIdx++ )
+    {
+      uint32_t tileX = tileIdx % pps->getNumTileColumns();
+      uint32_t tileY = tileIdx / pps->getNumTileColumns();
+      CHECK(tileY >= pps->getNumTileRows(), "Number of tiles in slice exceeds the remaining number of tiles in picture");
+
+      pcSlice->addCtusToSlice(pps->getTileColumnBd(tileX), pps->getTileColumnBd(tileX + 1),
+                              pps->getTileRowBd(tileY), pps->getTileRowBd(tileY + 1), pps->getPicWidthInCtu());
+   }
+  }
+  // rectangular slices
+  else 
+  {
+    uint32_t sliceAddr;
+
+    // slice address is the index of the slice within the current sub-picture
+    if( pps->getNumSlicesInPic() > 1 ) 
+    {
+      int bitsSliceAddress = ceilLog2(pps->getNumSlicesInPic());  // change to NumSlicesInSubPic when available
+      READ_CODE(bitsSliceAddress, uiCode, "slice_address");  sliceAddr = uiCode;
+      CHECK(sliceAddr >= pps->getNumSlicesInPic(), "Invalid slice address");
+    }
+    else {
+      sliceAddr = 0;
+    }
+    pcSlice->setSliceMap( pps->getSliceMap(sliceAddr) );
+    pcSlice->setSliceID(sliceAddr);
+  }
+
+
+    READ_UVLC (    uiCode, "slice_type" );            pcSlice->setSliceType((SliceType)uiCode);
+
+    // inherit values from picture header
+    //   set default values in case slice overrides are disabled
+    pcSlice->inheritFromPicHeader( picHeader, pps, sps );
+
+
+    if( picHeader->getPicRplPresentFlag() )
+    {
+      pcSlice->setRPL0(picHeader->getRPL0());
+      pcSlice->setRPL1(picHeader->getRPL1());
+      *pcSlice->getLocalRPL0() = *picHeader->getLocalRPL0();
+      *pcSlice->getLocalRPL1() = *picHeader->getLocalRPL1();
+    }
+    else if( pcSlice->getIdrPicFlag() && !(sps->getIDRRefParamListPresent()) )
+    {
+      ReferencePictureList* rpl0 = pcSlice->getLocalRPL0();
+      (*rpl0) = ReferencePictureList();
+      pcSlice->setRPL0(rpl0);
+      ReferencePictureList* rpl1 = pcSlice->getLocalRPL1();
+      (*rpl1) = ReferencePictureList();
+      pcSlice->setRPL1(rpl1);
+    }
+    else
+    {
+      //Read L0 related syntax elements
+      if (sps->getNumRPL0() > 0)
       {
-        READ_UVLC (uiCode, "num_ref_idx_l0_active_minus1" );  pcSlice->setNumRefIdx( REF_PIC_LIST_0, uiCode + 1 );
-        if (pcSlice->isInterB())
+        if (!pps->getPPSRefPicListSPSIdc0())
         {
-          READ_UVLC (uiCode, "num_ref_idx_l1_active_minus1" );  pcSlice->setNumRefIdx( REF_PIC_LIST_1, uiCode + 1 );
+          READ_FLAG(uiCode, "ref_pic_list_sps_flag[0]");
         }
         else
         {
-          pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0);
+          uiCode = pps->getPPSRefPicListSPSIdc0() - 1;
         }
       }
       else
       {
-        pcSlice->setNumRefIdx(REF_PIC_LIST_0, pps->getNumRefIdxL0DefaultActive());
-        if (pcSlice->isInterB())
+        uiCode = 0;
+      }
+
+      if (!uiCode) //explicitly carried in this SH
+      {
+        ReferencePictureList* rpl0 = pcSlice->getLocalRPL0();
+        (*rpl0) = ReferencePictureList();
+        parseRefPicList(sps, rpl0);
+        pcSlice->setRPL0idx(-1);
+        pcSlice->setRPL0(rpl0);
+      }
+      else    //Refer to list in SPS
+      {
+        if (sps->getNumRPL0() > 1)
         {
-          pcSlice->setNumRefIdx(REF_PIC_LIST_1, pps->getNumRefIdxL1DefaultActive());
+          int numBits = ceilLog2(sps->getNumRPL0());
+          READ_CODE(numBits, uiCode, "ref_pic_list_idx[0]");
+          pcSlice->setRPL0idx(uiCode);
+          pcSlice->setRPL0(sps->getRPLList0()->getReferencePictureList(uiCode));
         }
         else
         {
-          pcSlice->setNumRefIdx(REF_PIC_LIST_1,0);
+          pcSlice->setRPL0idx(0);
+          pcSlice->setRPL0(sps->getRPLList0()->getReferencePictureList(0));
         }
       }
-    }
-    // }
-    RefPicListModification* refPicListModification = pcSlice->getRefPicListModification();
-    if(!pcSlice->isIntra())
-    {
-      if( !pps->getListsModificationPresentFlag() || pcSlice->getNumRpsCurrTempList() <= 1 )
+      //Deal POC Msb cycle signalling for LTRP
+      for (int i = 0; i < pcSlice->getRPL0()->getNumberOfLongtermPictures() + pcSlice->getRPL0()->getNumberOfShorttermPictures(); i++)
       {
-        refPicListModification->setRefPicListModificationFlagL0( 0 );
+        pcSlice->getLocalRPL0()->setDeltaPocMSBPresentFlag(i, false);
+        pcSlice->getLocalRPL0()->setDeltaPocMSBCycleLT(i, 0);
       }
-      else
+      if (pcSlice->getRPL0()->getNumberOfLongtermPictures())
       {
-        READ_FLAG( uiCode, "ref_pic_list_modification_flag_l0" ); refPicListModification->setRefPicListModificationFlagL0( uiCode ? 1 : 0 );
+        for (int i = 0; i < pcSlice->getRPL0()->getNumberOfLongtermPictures() + pcSlice->getRPL0()->getNumberOfShorttermPictures(); i++)
+        {
+          if (pcSlice->getRPL0()->isRefPicLongterm(i))
+          {
+            if (pcSlice->getRPL0()->getLtrpInSliceHeaderFlag())
+            {
+              READ_CODE(sps->getBitsForPOC(), uiCode, "slice_poc_lsb_lt[i][j]");
+              pcSlice->getLocalRPL0()->setRefPicIdentifier( i, uiCode, true, false, 0 );
+            }
+            READ_FLAG(uiCode, "delta_poc_msb_present_flag[i][j]");
+            pcSlice->getLocalRPL0()->setDeltaPocMSBPresentFlag(i, uiCode ? true : false);
+            if (uiCode)
+            {
+              READ_UVLC(uiCode, "slice_delta_poc_msb_cycle_lt[i][j]");
+              pcSlice->getLocalRPL0()->setDeltaPocMSBCycleLT(i, uiCode);
+            }
+          }
+        }
       }
 
-      if(refPicListModification->getRefPicListModificationFlagL0())
+      //Read L1 related syntax elements
+      if (!pps->getRpl1IdxPresentFlag())
       {
-        uiCode = 0;
-        int i = 0;
-        int numRpsCurrTempList0 = pcSlice->getNumRpsCurrTempList();
-        if ( numRpsCurrTempList0 > 1 )
+        pcSlice->setRPL1idx(pcSlice->getRPL0idx());
+        if (pcSlice->getRPL1idx() != -1)
+          pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(pcSlice->getRPL0idx()));
+      }
+      else
+      {
+        if (sps->getNumRPL1() > 0)
         {
-          int length = 1;
-          numRpsCurrTempList0 --;
-          while ( numRpsCurrTempList0 >>= 1)
+          if (!pps->getPPSRefPicListSPSIdc1())
           {
-            length ++;
+            READ_FLAG(uiCode, "ref_pic_list_sps_flag[1]");
           }
-          for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_0); i ++)
+          else
           {
-            READ_CODE( length, uiCode, "list_entry_l0" );
-            refPicListModification->setRefPicSetIdxL0(i, uiCode );
+            uiCode = pps->getPPSRefPicListSPSIdc1() - 1;
           }
         }
         else
         {
-          for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_0); i ++)
+          uiCode = 0;
+        }
+        if (uiCode == 1)
+        {
+          if (sps->getNumRPL1() > 1)
           {
-            refPicListModification->setRefPicSetIdxL0(i, 0 );
+            int numBits = ceilLog2(sps->getNumRPL1());
+            READ_CODE(numBits, uiCode, "ref_pic_list_idx[1]");
+            pcSlice->setRPL1idx(uiCode);
+            pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(uiCode));
           }
+          else
+          {
+            pcSlice->setRPL1idx(0);
+            pcSlice->setRPL1(sps->getRPLList1()->getReferencePictureList(0));
+          }
+        }
+        else
+        {
+          pcSlice->setRPL1idx(-1);
         }
       }
-    }
-    else
-    {
-      refPicListModification->setRefPicListModificationFlagL0(0);
-    }
-    if(pcSlice->isInterB())
-    {
-      if( !pps->getListsModificationPresentFlag() || pcSlice->getNumRpsCurrTempList() <= 1 )
+      if (pcSlice->getRPL1idx() == -1) //explicitly carried in this SH
       {
-        refPicListModification->setRefPicListModificationFlagL1( 0 );
+        ReferencePictureList* rpl1 = pcSlice->getLocalRPL1();
+        (*rpl1) = ReferencePictureList();
+        parseRefPicList(sps, rpl1);
+        pcSlice->setRPL1idx(-1);
+        pcSlice->setRPL1(rpl1);
       }
-      else
+
+      //Deal POC Msb cycle signalling for LTRP
+      for (int i = 0; i < pcSlice->getRPL1()->getNumberOfLongtermPictures() + pcSlice->getRPL1()->getNumberOfShorttermPictures(); i++)
       {
-        READ_FLAG( uiCode, "ref_pic_list_modification_flag_l1" ); refPicListModification->setRefPicListModificationFlagL1( uiCode ? 1 : 0 );
+        pcSlice->getLocalRPL1()->setDeltaPocMSBPresentFlag(i, false);
+        pcSlice->getLocalRPL1()->setDeltaPocMSBCycleLT(i, 0);
       }
-      if(refPicListModification->getRefPicListModificationFlagL1())
+      if (pcSlice->getRPL1()->getNumberOfLongtermPictures())
       {
-        uiCode = 0;
-        int i = 0;
-        int numRpsCurrTempList1 = pcSlice->getNumRpsCurrTempList();
-        if ( numRpsCurrTempList1 > 1 )
+        for (int i = 0; i < pcSlice->getRPL1()->getNumberOfLongtermPictures() + pcSlice->getRPL1()->getNumberOfShorttermPictures(); i++)
         {
-          int length = 1;
-          numRpsCurrTempList1 --;
-          while ( numRpsCurrTempList1 >>= 1)
+          if (pcSlice->getRPL1()->isRefPicLongterm(i))
           {
-            length ++;
+            if (pcSlice->getRPL1()->getLtrpInSliceHeaderFlag())
+            {
+              READ_CODE(sps->getBitsForPOC(), uiCode, "slice_poc_lsb_lt[i][j]");
+              pcSlice->getLocalRPL1()->setRefPicIdentifier( i, uiCode, true, false, 0 );
+            }
+            READ_FLAG(uiCode, "delta_poc_msb_present_flag[i][j]");
+            pcSlice->getLocalRPL1()->setDeltaPocMSBPresentFlag(i, uiCode ? true : false);
+            if (uiCode)
+            {
+              READ_UVLC(uiCode, "slice_delta_poc_msb_cycle_lt[i][j]");
+              pcSlice->getLocalRPL1()->setDeltaPocMSBCycleLT(i, uiCode);
+            }
+          }
+        }
+      }
+
+    }
+    if( !picHeader->getPicRplPresentFlag() && pcSlice->getIdrPicFlag() && !(sps->getIDRRefParamListPresent()))
+    {
+      pcSlice->setNumRefIdx(REF_PIC_LIST_0, 0);
+      pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0);
+    }
+    else
+    {
+      if ((!pcSlice->isIntra() && pcSlice->getRPL0()->getNumRefEntries() > 1) ||
+          (pcSlice->isInterB() && pcSlice->getRPL1()->getNumRefEntries() > 1) )
+      {
+        READ_FLAG( uiCode, "num_ref_idx_active_override_flag");
+        if (uiCode)
+        {
+          if(pcSlice->getRPL0()->getNumRefEntries() > 1)
+          {
+            READ_UVLC (uiCode, "num_ref_idx_l0_active_minus1" );
+          }
+          else
+          {
+            uiCode = 0;
           }
-          for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_1); i ++)
+          pcSlice->setNumRefIdx( REF_PIC_LIST_0, uiCode + 1 );
+          if (pcSlice->isInterB())
           {
-            READ_CODE( length, uiCode, "list_entry_l1" );
-            refPicListModification->setRefPicSetIdxL1(i, uiCode );
+            if(pcSlice->getRPL1()->getNumRefEntries() > 1)
+            {
+              READ_UVLC (uiCode, "num_ref_idx_l1_active_minus1" );
+            }
+            else
+            {
+              uiCode = 0;
+            }
+            pcSlice->setNumRefIdx(REF_PIC_LIST_1, uiCode + 1);
+          }
+          else
+          {
+            pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0);
+          }
+        }
+        else
+        {
+          if(pcSlice->getRPL0()->getNumRefEntries() >= pps->getNumRefIdxL0DefaultActive())
+          {
+            pcSlice->setNumRefIdx(REF_PIC_LIST_0, pps->getNumRefIdxL0DefaultActive());
+          }
+          else
+          {
+            pcSlice->setNumRefIdx(REF_PIC_LIST_0, pcSlice->getRPL0()->getNumRefEntries());
+          }
+
+          if (pcSlice->isInterB())
+          {
+            if(pcSlice->getRPL1()->getNumRefEntries() >= pps->getNumRefIdxL1DefaultActive())
+            {
+              pcSlice->setNumRefIdx(REF_PIC_LIST_1, pps->getNumRefIdxL1DefaultActive());
+            }
+            else
+            {
+              pcSlice->setNumRefIdx(REF_PIC_LIST_1, pcSlice->getRPL1()->getNumRefEntries());
+            }
           }
-        }
-        else
-        {
-          for (i = 0; i < pcSlice->getNumRefIdx(REF_PIC_LIST_1); i ++)
+          else
           {
-            refPicListModification->setRefPicSetIdxL1(i, 0 );
+            pcSlice->setNumRefIdx(REF_PIC_LIST_1, 0);
           }
         }
       }
+      else
+      {
+        pcSlice->setNumRefIdx( REF_PIC_LIST_0, pcSlice->isIntra() ? 0 : 1 );
+        pcSlice->setNumRefIdx( REF_PIC_LIST_1, pcSlice->isInterB() ? 1 : 0 );
+      }
     }
-    else
-    {
-      refPicListModification->setRefPicListModificationFlagL1(0);
-    }
-    if (pcSlice->isInterB())
+
+    if (pcSlice->isInterP() || pcSlice->isInterB())
     {
-      READ_FLAG( uiCode, "mvd_l1_zero_flag" );       pcSlice->setMvdL1ZeroFlag( (uiCode ? true : false) );
+      CHECK(pcSlice->getNumRefIdx(REF_PIC_LIST_0) == 0, "Number of active entries in RPL0 of P or B picture shall be greater than 0");
+      if (pcSlice->isInterB())
+        CHECK(pcSlice->getNumRefIdx(REF_PIC_LIST_1) == 0, "Number of active entries in RPL1 of B picture shall be greater than 0");
     }
 
+
     pcSlice->setCabacInitFlag( false ); // default
     if(pps->getCabacInitPresentFlag() && !pcSlice->isIntra())
     {
@@ -1777,11 +2777,18 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       pcSlice->setEncCABACTableIdx( pcSlice->getSliceType() == B_SLICE ? ( uiCode ? P_SLICE : B_SLICE ) : ( uiCode ? B_SLICE : P_SLICE ) );
     }
 
-    if ( pcSlice->getEnableTMVPFlag() )
+    if ( pcSlice->getPicHeader()->getEnableTMVPFlag() )
     {
       if ( pcSlice->getSliceType() == B_SLICE )
       {
-        READ_FLAG( uiCode, "collocated_from_l0_flag" );
+        if (!pps->getPPSCollocatedFromL0Idc())
+        {
+          READ_FLAG(uiCode, "collocated_from_l0_flag");
+        }
+        else
+        {
+          uiCode = pps->getPPSCollocatedFromL0Idc() - 1;
+        }
         pcSlice->setColFromL0Flag(uiCode);
       }
       else
@@ -1806,87 +2813,22 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       parsePredWeightTable(pcSlice, sps);
       pcSlice->initWpScaling(sps);
     }
-    READ_FLAG( uiCode, "dep_quant_enabled_flag" );
-    pcSlice->setDepQuantEnabledFlag( uiCode != 0 );
-#if HEVC_USE_SIGN_HIDING
-    if( !pcSlice->getDepQuantEnabledFlag() )
-    {
-      READ_FLAG( uiCode, "sign_data_hiding_enabled_flag" );
-      pcSlice->setSignDataHidingEnabledFlag( uiCode != 0 );
-    }
     else
     {
-      pcSlice->setSignDataHidingEnabledFlag( 0 );
-    }
-#endif
-    if (
-      sps->getSplitConsOverrideEnabledFlag()
-      )
-    {
-      READ_FLAG(uiCode, "partition_constrainst_override_flag");        pcSlice->setSplitConsOverrideFlag(uiCode ? true : false);
-      if (pcSlice->getSplitConsOverrideFlag())
+      WPScalingParam *wp;
+      for ( int iNumRef=0 ; iNumRef<((pcSlice->getSliceType() == B_SLICE )?2:1); iNumRef++ )
       {
-        READ_UVLC(uiCode, "log2_diff_min_qt_min_cb");                 pcSlice->setMinQTSize(1 << (uiCode + sps->getLog2MinCodingBlockSize()));
-        READ_UVLC(uiCode, "max_mtt_hierarchy_depth");                 pcSlice->setMaxBTDepth(uiCode);
-        if (pcSlice->getMaxBTDepth() != 0)
+        RefPicList  eRefPicList = ( iNumRef ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
+        for ( int iRefIdx=0 ; iRefIdx<pcSlice->getNumRefIdx(eRefPicList) ; iRefIdx++ )
         {
-          READ_UVLC(uiCode, "log2_diff_max_bt_min_qt");             pcSlice->setMaxBTSize(pcSlice->getMinQTSize() << uiCode);
-          READ_UVLC(uiCode, "log2_diff_max_tt_min_qt");             pcSlice->setMaxTTSize(pcSlice->getMinQTSize() << uiCode);
-        }
-        else
-        {
-          pcSlice->setMaxBTSize(pcSlice->getMinQTSize());
-          pcSlice->setMaxTTSize(pcSlice->getMinQTSize());
-        }
-        if (
-          pcSlice->isIntra() && sps->getUseDualITree()
-          )
-        {
-          READ_UVLC(uiCode, "log2_diff_min_qt_min_cb_chroma");                 pcSlice->setMinQTSizeIChroma(1 << (uiCode + sps->getLog2MinCodingBlockSize()));
-          READ_UVLC(uiCode, "max_mtt_hierarchy_depth_chroma");                            pcSlice->setMaxBTDepthIChroma(uiCode);
-          if (pcSlice->getMaxBTDepthIChroma() != 0)
-          {
-            READ_UVLC(uiCode, "log2_diff_max_bt_min_qt_chroma");             pcSlice->setMaxBTSizeIChroma(pcSlice->getMinQTSizeIChroma() << uiCode);
-            READ_UVLC(uiCode, "log2_diff_max_tt_min_qt_chroma");             pcSlice->setMaxTTSizeIChroma(pcSlice->getMinQTSizeIChroma() << uiCode);
-          }
-          else
-          {
-            pcSlice->setMaxBTSizeIChroma(pcSlice->getMinQTSizeIChroma());
-            pcSlice->setMaxTTSizeIChroma(pcSlice->getMinQTSizeIChroma());
-          }
+          pcSlice->getWpScaling(eRefPicList, iRefIdx, wp);
+          wp[0].bPresentFlag = false;
+          wp[1].bPresentFlag = false;
+          wp[2].bPresentFlag = false;
         }
       }
     }
 
-    if (!pcSlice->isIntra() || sps->getIBCFlag())
-    {
-      READ_UVLC(uiCode, "six_minus_max_num_merge_cand");
-      pcSlice->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - uiCode);
-    }
-
-    if (!pcSlice->isIntra())
-    {
-
-      if ( sps->getSBTMVPEnabledFlag() && !sps->getUseAffine() ) // ATMVP only
-      {
-        pcSlice->setMaxNumAffineMergeCand( 1 );
-      }
-      else if ( !sps->getSBTMVPEnabledFlag() && !sps->getUseAffine() ) // both off
-      {
-        pcSlice->setMaxNumAffineMergeCand( 0 );
-      }
-      else
-      if ( sps->getUseAffine() )
-      {
-        READ_UVLC( uiCode, "five_minus_max_num_affine_merge_cand" );
-        pcSlice->setMaxNumAffineMergeCand( AFFINE_MRG_MAX_NUM_CANDS - uiCode );
-      }
-      if ( sps->getDisFracMmvdEnabledFlag() )
-      {
-        READ_FLAG( uiCode, "tile_group_fracmmvd_disabled_flag" );
-        pcSlice->setDisFracMMVD( uiCode ? true : false );
-      }
-    }
 
     READ_SVLC( iCode, "slice_qp_delta" );
     pcSlice->setSliceQp (26 + pps->getPicInitQPMinus26() + iCode);
@@ -1915,10 +2857,19 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
         CHECK( pcSlice->getSliceChromaQpDelta(COMPONENT_Cr) >  12, "Invalid chroma QP offset" );
         CHECK( (pps->getQpOffset(COMPONENT_Cr) + pcSlice->getSliceChromaQpDelta(COMPONENT_Cr)) < -12, "Invalid chroma QP offset" );
         CHECK( (pps->getQpOffset(COMPONENT_Cr) + pcSlice->getSliceChromaQpDelta(COMPONENT_Cr)) >  12, "Invalid chroma QP offset" );
+        if (sps->getJointCbCrEnabledFlag())
+        {
+          READ_SVLC(iCode, "slice_joint_cbcr_qp_offset" );
+          pcSlice->setSliceChromaQpDelta(JOINT_CbCr, iCode);
+          CHECK( pcSlice->getSliceChromaQpDelta(JOINT_CbCr) < -12, "Invalid chroma QP offset");
+          CHECK( pcSlice->getSliceChromaQpDelta(JOINT_CbCr) >  12, "Invalid chroma QP offset");
+          CHECK( (pps->getQpOffset(JOINT_CbCr) + pcSlice->getSliceChromaQpDelta(JOINT_CbCr)) < -12, "Invalid chroma QP offset");
+          CHECK( (pps->getQpOffset(JOINT_CbCr) + pcSlice->getSliceChromaQpDelta(JOINT_CbCr)) >  12, "Invalid chroma QP offset");
+        }
       }
     }
 
-    if (pps->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag())
+    if (pps->getCuChromaQpOffsetEnabledFlag())
     {
       READ_FLAG(uiCode, "cu_chroma_qp_offset_enabled_flag"); pcSlice->setUseChromaQpAdj(uiCode != 0);
     }
@@ -1927,11 +2878,67 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       pcSlice->setUseChromaQpAdj(false);
     }
 
+    if( sps->getSAOEnabledFlag() && !picHeader->getSaoEnabledPresentFlag() )
+    {
+      READ_FLAG(uiCode, "slice_sao_luma_flag");  pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, (bool)uiCode);
+
+      if (bChroma)
+      {
+        READ_FLAG(uiCode, "slice_sao_chroma_flag");  pcSlice->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, (bool)uiCode);
+      }
+    }
+
+    if( sps->getALFEnabledFlag() && !picHeader->getAlfEnabledPresentFlag() )
+    {
+      READ_FLAG(uiCode, "slice_alf_enabled_flag");
+      pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Y, uiCode);
+      int alfChromaIdc = 0;
+      if (uiCode)
+      {
+        READ_CODE(3, uiCode, "slice_num_alf_aps_ids_luma");
+        int numAps = uiCode;
+        pcSlice->setTileGroupNumAps(numAps);
+        std::vector<int> apsId(numAps, -1);
+        for (int i = 0; i < numAps; i++)
+        {
+          READ_CODE(3, uiCode, "slice_alf_aps_id_luma");
+          apsId[i] = uiCode;
+          APS* APStoCheckLuma = parameterSetManager->getAPS(apsId[i], ALF_APS);
+          CHECK(APStoCheckLuma->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] != 1, "bitstream conformance error, alf_luma_filter_signal_flag shall be equal to 1");
+        }
+
+
+        pcSlice->setAlfAPSs(apsId);
+        if (bChroma)
+        {
+          READ_CODE(2, uiCode, "slice_alf_chroma_idc");   alfChromaIdc = uiCode;
+        }
+        else
+        {
+          alfChromaIdc = 0;
+        }
+        if (alfChromaIdc)
+        {
+          READ_CODE(3, uiCode, "slice_alf_aps_id_chroma");
+          pcSlice->setTileGroupApsIdChroma(uiCode);
+          APS* APStoCheckChroma = parameterSetManager->getAPS(uiCode, ALF_APS);
+          CHECK(APStoCheckChroma->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] != 1, "bitstream conformance error, alf_chroma_filter_signal_flag shall be equal to 1");
+        }
+      }
+      else
+      {
+        pcSlice->setTileGroupNumAps(0);
+      }
+      pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, alfChromaIdc & 1);
+      pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, alfChromaIdc >> 1);
+    }
+
+
     if (pps->getDeblockingFilterControlPresentFlag())
     {
-      if(pps->getDeblockingFilterOverrideEnabledFlag())
+      if( pps->getDeblockingFilterOverrideEnabledFlag() && !picHeader->getDeblockingFilterOverridePresentFlag() )
       {
-        READ_FLAG ( uiCode, "deblocking_filter_override_flag" );        pcSlice->setDeblockingFilterOverrideFlag(uiCode ? true : false);
+        READ_FLAG ( uiCode, "slice_deblocking_filter_override_flag" );        pcSlice->setDeblockingFilterOverrideFlag(uiCode ? true : false);
       }
       else
       {
@@ -1952,9 +2959,9 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       }
       else
       {
-        pcSlice->setDeblockingFilterDisable       ( pps->getPPSDeblockingFilterDisabledFlag() );
-        pcSlice->setDeblockingFilterBetaOffsetDiv2( pps->getDeblockingFilterBetaOffsetDiv2() );
-        pcSlice->setDeblockingFilterTcOffsetDiv2  ( pps->getDeblockingFilterTcOffsetDiv2() );
+        pcSlice->setDeblockingFilterDisable       ( picHeader->getDeblockingFilterDisable() );
+        pcSlice->setDeblockingFilterBetaOffsetDiv2( picHeader->getDeblockingFilterBetaOffsetDiv2() );
+        pcSlice->setDeblockingFilterTcOffsetDiv2  ( picHeader->getDeblockingFilterTcOffsetDiv2() );
       }
     }
     else
@@ -1964,28 +2971,9 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       pcSlice->setDeblockingFilterTcOffsetDiv2  ( 0 );
     }
 
-    bool isSAOEnabled = sps->getSAOEnabledFlag() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (bChroma && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA)));
-    bool isDBFEnabled = (!pcSlice->getDeblockingFilterDisable());
-
-    if(pps->getLoopFilterAcrossSlicesEnabledFlag() && ( isSAOEnabled || isDBFEnabled ))
-    {
-      READ_FLAG( uiCode, "slice_loop_filter_across_slices_enabled_flag");
-    }
-    else
-    {
-      uiCode = pps->getLoopFilterAcrossSlicesEnabledFlag()?1:0;
-    }
-    pcSlice->setLFCrossSliceBoundaryFlag( (uiCode==1)?true:false);
 
-    if (sps->getUseReshaper())
-    {
-      parseReshaper(pcSlice->getReshapeInfo(), sps, pcSlice->isIntra());
-    }
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
 
-  if( firstSliceSegmentInPic )
+  if( pcSlice->getFirstCtuRsAddrInSlice() == 0 )
   {
     pcSlice->setDefaultClpRng( *sps );
 
@@ -2001,26 +2989,20 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
     }
   }
 
-
-#if HEVC_TILES_WPP
   std::vector<uint32_t> entryPointOffset;
-  if( pps->getTilesEnabledFlag() || pps->getEntropyCodingSyncEnabledFlag() )
+
+  pcSlice->setNumEntryPoints( pps );
+  if( pcSlice->getNumEntryPoints() > 0 )
   {
-    uint32_t numEntryPointOffsets;
     uint32_t offsetLenMinus1;
-    READ_UVLC( numEntryPointOffsets, "num_entry_point_offsets" );
-    if( numEntryPointOffsets > 0 )
+    READ_UVLC( offsetLenMinus1, "offset_len_minus1" );
+    entryPointOffset.resize( pcSlice->getNumEntryPoints() );
+    for( uint32_t idx = 0; idx < pcSlice->getNumEntryPoints(); idx++ )
     {
-      READ_UVLC( offsetLenMinus1, "offset_len_minus1" );
-      entryPointOffset.resize( numEntryPointOffsets );
-      for( uint32_t idx = 0; idx < numEntryPointOffsets; idx++ )
-      {
-        READ_CODE( offsetLenMinus1 + 1, uiCode, "entry_point_offset_minus1" );
-        entryPointOffset[idx] = uiCode + 1;
-      }
+      READ_CODE( offsetLenMinus1 + 1, uiCode, "entry_point_offset_minus1" );
+      entryPointOffset[idx] = uiCode + 1;
     }
   }
-#endif
 
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
   CodingStatistics::IncrementStatisticEP(STATS__BYTE_ALIGNMENT_BITS,m_pcBitstream->readByteAlignment(),0);
@@ -2030,8 +3012,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
 
   pcSlice->clearSubstreamSizes();
 
-#if HEVC_TILES_WPP
-  if( pps->getTilesEnabledFlag() || pps->getEntropyCodingSyncEnabledFlag() )
+  if( pcSlice->getNumEntryPoints() > 0 )
   {
     int endOfSliceHeaderLocation = m_pcBitstream->getByteLocation();
 
@@ -2065,11 +3046,54 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       pcSlice->addSubstreamSize(entryPointOffset [ idx ] );
     }
   }
-#endif
   return;
 }
 
-#if JVET_M0101_HLS
+void HLSyntaxReader::parseSliceHeaderToPoc (Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC)
+{
+  uint32_t  uiCode;
+  PPS* pps = NULL;
+  SPS* sps = NULL;
+
+  CHECK(picHeader==0, "Invalid Picture Header");
+  CHECK(picHeader->isValid()==false, "Invalid Picture Header");
+  pps = parameterSetManager->getPPS( picHeader->getPPSId() );
+  //!KS: need to add error handling code here, if PPS is not available
+  CHECK(pps==0, "Invalid PPS");
+  sps = parameterSetManager->getSPS(pps->getSPSId());
+  //!KS: need to add error handling code here, if SPS is not available
+  CHECK(sps==0, "Invalid SPS");
+  
+  // picture order count
+  READ_CODE(sps->getBitsForPOC(), uiCode, "slice_pic_order_cnt_lsb");
+  if (pcSlice->getIdrPicFlag())
+  {
+    pcSlice->setPOC(uiCode);
+  }
+  else
+  {
+    int iPOClsb = uiCode;
+    int iPrevPOC = prevTid0POC;
+    int iMaxPOClsb = 1 << sps->getBitsForPOC();
+    int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1);
+    int iPrevPOCmsb = iPrevPOC - iPrevPOClsb;
+    int iPOCmsb;
+    if ((iPOClsb  <  iPrevPOClsb) && ((iPrevPOClsb - iPOClsb) >= (iMaxPOClsb / 2)))
+    {
+      iPOCmsb = iPrevPOCmsb + iMaxPOClsb;
+    }
+    else if ((iPOClsb  >  iPrevPOClsb) && ((iPOClsb - iPrevPOClsb)  >  (iMaxPOClsb / 2)))
+    {
+      iPOCmsb = iPrevPOCmsb - iMaxPOClsb;
+    }
+    else
+    {
+      iPOCmsb = iPrevPOCmsb;
+    }
+    pcSlice->setPOC(iPOCmsb + iPOClsb);
+  }
+}
+
 void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
 {
   uint32_t symbol;
@@ -2081,27 +3105,42 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
 
   READ_CODE(4, symbol,  "max_bitdepth_constraint_idc"              ); cinfo->setMaxBitDepthConstraintIdc(symbol);
   READ_CODE(2, symbol,  "max_chroma_format_constraint_idc"         ); cinfo->setMaxChromaFormatConstraintIdc((ChromaFormat)symbol);
-  
+
   READ_FLAG(symbol,  "no_qtbtt_dual_tree_intra_constraint_flag" ); cinfo->setNoQtbttDualTreeIntraConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_partition_constraints_override_constraint_flag"); cinfo->setNoPartitionConstraintsOverrideConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol,  "no_sao_constraint_flag");                    cinfo->setNoSaoConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol,  "no_alf_constraint_flag");                    cinfo->setNoAlfConstraintFlag(symbol > 0 ? true : false);
-  READ_FLAG(symbol,  "no_pcm_constraint_flag");                    cinfo->setNoPcmConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol,  "no_joint_cbcr_constraint_flag");             cinfo->setNoJointCbCrConstraintFlag(symbol > 0 ? true : false);
+
   READ_FLAG(symbol,  "no_ref_wraparound_constraint_flag");         cinfo->setNoRefWraparoundConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol,  "no_temporal_mvp_constraint_flag");           cinfo->setNoTemporalMvpConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol,  "no_sbtmvp_constraint_flag");                 cinfo->setNoSbtmvpConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol,  "no_amvr_constraint_flag");                   cinfo->setNoAmvrConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol,  "no_bdof_constraint_flag");                   cinfo->setNoBdofConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_dmvr_constraint_flag");                    cinfo->setNoDmvrConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_cclm_constraint_flag");                    cinfo->setNoCclmConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_mts_constraint_flag");                     cinfo->setNoMtsConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_sbt_constraint_flag");                     cinfo->setNoSbtConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_affine_motion_constraint_flag");           cinfo->setNoAffineMotionConstraintFlag(symbol > 0 ? true : false);
-  READ_FLAG(symbol, "no_gbi_constraint_flag");                     cinfo->setNoGbiConstraintFlag(symbol > 0 ? true : false);
-  READ_FLAG(symbol, "no_mh_intra_constraint_flag");                cinfo->setNoMhIntraConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_bcw_constraint_flag");                     cinfo->setNoBcwConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_ibc_constraint_flag");                     cinfo->setNoIbcConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_ciip_constraint_flag");                    cinfo->setNoCiipConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_fpel_mmvd_constraint_flag");               cinfo->setNoFPelMmvdConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_triangle_constraint_flag");                cinfo->setNoTriangleConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_ladf_constraint_flag");                    cinfo->setNoLadfConstraintFlag(symbol > 0 ? true : false);
-  READ_FLAG(symbol, "no_curr_pic_ref_constraint_flag");            cinfo->setNoCurrPicRefConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_transform_skip_constraint_flag");          cinfo->setNoTransformSkipConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_bdpcm_constraint_flag");                   cinfo->setNoBDPCMConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_qp_delta_constraint_flag");                cinfo->setNoQpDeltaConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_dep_quant_constraint_flag");               cinfo->setNoDepQuantConstraintFlag(symbol > 0 ? true : false);
   READ_FLAG(symbol, "no_sign_data_hiding_constraint_flag");        cinfo->setNoSignDataHidingConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_trail_constraint_flag");                   cinfo->setNoTrailConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_stsa_constraint_flag");                    cinfo->setNoStsaConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_rasl_constraint_flag");                    cinfo->setNoRaslConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_radl_constraint_flag");                    cinfo->setNoRadlConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_idr_constraint_flag");                     cinfo->setNoIdrConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_cra_constraint_flag");                     cinfo->setNoCraConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_gdr_constraint_flag");                     cinfo->setNoGdrConstraintFlag(symbol > 0 ? true : false);
+  READ_FLAG(symbol, "no_aps_constraint_flag");                     cinfo->setNoApsConstraintFlag(symbol > 0 ? true : false);
 }
 
 
@@ -2113,7 +3152,16 @@ void HLSyntaxReader::parseProfileTierLevel(ProfileTierLevel *ptl, int maxNumSubL
 
   parseConstraintInfo( ptl->getConstraintInfo() );
 
-  READ_CODE(8 , symbol,   "general_level_idc"                ); ptl->setLevelIdc    (Level::Name(symbol));
+  READ_CODE( 8, symbol, "general_level_idc" ); ptl->setLevelIdc( Level::Name( symbol ) );
+
+  READ_CODE(8, symbol, "num_sub_profiles");
+  uint8_t numSubProfiles = symbol;
+  ptl->setNumSubProfile( numSubProfiles );
+  for (int i = 0; i < numSubProfiles; i++)
+  {
+    READ_CODE(32, symbol, "general_sub_profile_idc[i]"); ptl->setSubProfileIdc(i, symbol);
+  }
+
 
   for (int i = 0; i < maxNumSubLayersMinus1; i++)
   {
@@ -2132,118 +3180,17 @@ void HLSyntaxReader::parseProfileTierLevel(ProfileTierLevel *ptl, int maxNumSubL
       READ_CODE(8 , symbol,   "sub_layer_level_idc"                ); ptl->setSubLayerLevelIdc    (i, Level::Name(symbol));
     }
   }
-}
-
-
-#else
-void HLSyntaxReader::parsePTL( PTL *rpcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1 )
-{
-  uint32_t uiCode;
-  if(profilePresentFlag)
-  {
-    parseProfileTier(rpcPTL->getGeneralPTL(), false);
-  }
-  READ_CODE( 8, uiCode, "general_level_idc" );    rpcPTL->getGeneralPTL()->setLevelIdc(Level::Name(uiCode));
-
-  for (int i = 0; i < maxNumSubLayersMinus1; i++)
-  {
-    READ_FLAG( uiCode, "sub_layer_profile_present_flag[i]" ); rpcPTL->setSubLayerProfilePresentFlag(i, uiCode);
-    READ_FLAG( uiCode, "sub_layer_level_present_flag[i]"   ); rpcPTL->setSubLayerLevelPresentFlag  (i, uiCode);
-  }
-
-  if (maxNumSubLayersMinus1 > 0)
-  {
-    for (int i = maxNumSubLayersMinus1; i < 8; i++)
-    {
-      READ_CODE(2, uiCode, "reserved_zero_2bits");
-      CHECK(uiCode != 0, "Invalid code");
-    }
-  }
-
-  for(int i = 0; i < maxNumSubLayersMinus1; i++)
+  ptl->setSubLayerLevelIdc(maxNumSubLayersMinus1, ptl->getLevelIdc());
+  for( int i = maxNumSubLayersMinus1 - 1; i >= 0; i-- )
   {
-    if( rpcPTL->getSubLayerProfilePresentFlag(i) )
-    {
-      parseProfileTier(rpcPTL->getSubLayerPTL(i), true);
-    }
-    if(rpcPTL->getSubLayerLevelPresentFlag(i))
+    if( !ptl->getSubLayerLevelPresentFlag( i ) )
     {
-      READ_CODE( 8, uiCode, "sub_layer_level_idc[i]" );   rpcPTL->getSubLayerPTL(i)->setLevelIdc(Level::Name(uiCode));
+      ptl->setSubLayerLevelIdc( i, ptl->getSubLayerLevelIdc( i + 1 ) );
     }
   }
 }
 
-#if ENABLE_TRACING|| RExt__DECODER_DEBUG_BIT_STATISTICS
-void HLSyntaxReader::parseProfileTier(ProfileTierLevel *ptl, const bool bIsSubLayer)
-#define PTL_TRACE_TEXT(txt) bIsSubLayer?("sub_layer_" txt) : ("general_" txt)
-#else
-void HLSyntaxReader::parseProfileTier(ProfileTierLevel *ptl, const bool /*bIsSubLayer*/)
-#define PTL_TRACE_TEXT(txt) txt
-#endif
-{
-  uint32_t uiCode;
-  READ_CODE(2 , uiCode,   PTL_TRACE_TEXT("profile_space"                   )); ptl->setProfileSpace(uiCode);
-  READ_FLAG(    uiCode,   PTL_TRACE_TEXT("tier_flag"                       )); ptl->setTierFlag    (uiCode ? Level::HIGH : Level::MAIN);
-  READ_CODE(5 , uiCode,   PTL_TRACE_TEXT("profile_idc"                     )); ptl->setProfileIdc  (Profile::Name(uiCode));
-  for(int j = 0; j < 32; j++)
-  {
-    READ_FLAG(  uiCode,   PTL_TRACE_TEXT("profile_compatibility_flag[][j]" )); ptl->setProfileCompatibilityFlag(j, uiCode ? 1 : 0);
-  }
-  READ_FLAG(uiCode,       PTL_TRACE_TEXT("progressive_source_flag"         )); ptl->setProgressiveSourceFlag(uiCode ? true : false);
-
-  READ_FLAG(uiCode,       PTL_TRACE_TEXT("interlaced_source_flag"          )); ptl->setInterlacedSourceFlag(uiCode ? true : false);
-
-  READ_FLAG(uiCode,       PTL_TRACE_TEXT("non_packed_constraint_flag"      )); ptl->setNonPackedConstraintFlag(uiCode ? true : false);
-
-  READ_FLAG(uiCode,       PTL_TRACE_TEXT("frame_only_constraint_flag"      )); ptl->setFrameOnlyConstraintFlag(uiCode ? true : false);
-
-  if (ptl->getProfileIdc() == Profile::MAINREXT           || ptl->getProfileCompatibilityFlag(Profile::MAINREXT) ||
-      ptl->getProfileIdc() == Profile::HIGHTHROUGHPUTREXT || ptl->getProfileCompatibilityFlag(Profile::HIGHTHROUGHPUTREXT))
-  {
-    uint32_t maxBitDepth=16;
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("max_12bit_constraint_flag"       )); if (uiCode) maxBitDepth=12;
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("max_10bit_constraint_flag"       )); if (uiCode) maxBitDepth=10;
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("max_8bit_constraint_flag"        )); if (uiCode) maxBitDepth=8;
-    ptl->setBitDepthConstraint(maxBitDepth);
-    ChromaFormat chromaFmtConstraint=CHROMA_444;
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("max_422chroma_constraint_flag"   )); if (uiCode) chromaFmtConstraint=CHROMA_422;
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("max_420chroma_constraint_flag"   )); if (uiCode) chromaFmtConstraint=CHROMA_420;
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("max_monochrome_constraint_flag"  )); if (uiCode) chromaFmtConstraint=CHROMA_400;
-    ptl->setChromaFormatConstraint(chromaFmtConstraint);
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("intra_constraint_flag"           )); ptl->setIntraConstraintFlag(uiCode != 0);
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("one_picture_only_constraint_flag")); ptl->setOnePictureOnlyConstraintFlag(uiCode != 0);
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("lower_bit_rate_constraint_flag"  )); ptl->setLowerBitRateConstraintFlag(uiCode != 0);
-    READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_34bits[0..15]"     ));
-    READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_34bits[16..31]"    ));
-    READ_CODE(2,  uiCode, PTL_TRACE_TEXT("reserved_zero_34bits[32..33]"    ));
-  }
-  else
-  {
-    ptl->setBitDepthConstraint( ( ptl->getProfileIdc() == Profile::MAIN10 || ptl->getProfileIdc() == Profile::NEXT ) ? 10 : 8 );
-    ptl->setChromaFormatConstraint(CHROMA_420);
-    ptl->setIntraConstraintFlag(false);
-    ptl->setLowerBitRateConstraintFlag(true);
-    READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_43bits[0..15]"     ));
-    READ_CODE(16, uiCode, PTL_TRACE_TEXT("reserved_zero_43bits[16..31]"    ));
-    READ_CODE(11, uiCode, PTL_TRACE_TEXT("reserved_zero_43bits[32..42]"    ));
-  }
 
-  if ((ptl->getProfileIdc() >= Profile::MAIN && ptl->getProfileIdc() <= Profile::HIGHTHROUGHPUTREXT) ||
-       ptl->getProfileCompatibilityFlag(Profile::MAIN) ||
-       ptl->getProfileCompatibilityFlag(Profile::MAIN10) ||
-       ptl->getProfileCompatibilityFlag(Profile::MAINSTILLPICTURE) ||
-       ptl->getProfileCompatibilityFlag(Profile::MAINREXT) ||
-       ptl->getProfileCompatibilityFlag(Profile::HIGHTHROUGHPUTREXT) )
-  {
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("inbld_flag"                      )); CHECK(uiCode != 0, "Invalid code");
-  }
-  else
-  {
-    READ_FLAG(    uiCode, PTL_TRACE_TEXT("reserved_zero_bit"               ));
-  }
-#undef PTL_TRACE_TEXT
-}
-#endif
 
 void HLSyntaxReader::parseTerminatingBit( uint32_t& ruiBit )
 {
@@ -2406,57 +3353,49 @@ void HLSyntaxReader::parsePredWeightTable( Slice* pcSlice, const SPS *sps )
   CHECK(uiTotalSignalledWeightFlags>24, "Too many weight flag signalled");
 }
 
-#if HEVC_USE_SCALING_LISTS
 /** decode quantization matrix
 * \param scalingList quantization matrix information
 */
 void HLSyntaxReader::parseScalingList(ScalingList* scalingList)
 {
-  uint32_t  code, sizeId, listId;
-  bool scalingListPredModeFlag;
-  //for each size
-  for(sizeId = SCALING_LIST_FIRST_CODED; sizeId <= SCALING_LIST_LAST_CODED; sizeId++)
+  uint32_t  code;
+  bool scalingListCopyModeFlag;
+  READ_FLAG(code, "scaling_matrix_for_lfnst_disabled_flag"); scalingList->setDisableScalingMatrixForLfnstBlks(code ? true : false);
+  for (int scalingListId = 0; scalingListId < 28; scalingListId++)
   {
-    for(listId = 0; listId <  SCALING_LIST_NUM; listId++)
-    {
-      if ((sizeId==SCALING_LIST_32x32) && (listId%(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) != 0))
-      {
-        int *src = scalingList->getScalingListAddress(sizeId, listId);
-        const int size = std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]);
-        const int *srcNextSmallerSize = scalingList->getScalingListAddress(sizeId-1, listId);
-        for(int i=0; i<size; i++)
-        {
-          src[i] = srcNextSmallerSize[i];
-        }
-        scalingList->setScalingListDC(sizeId,listId,(sizeId > SCALING_LIST_8x8) ? scalingList->getScalingListDC(sizeId-1, listId) : src[0]);
-      }
-      else
-      {
-        READ_FLAG( code, "scaling_list_pred_mode_flag");
-        scalingListPredModeFlag = (code) ? true : false;
-        scalingList->setScalingListPredModeFlag(sizeId, listId, scalingListPredModeFlag);
-        if(!scalingListPredModeFlag) //Copy Mode
-        {
-          READ_UVLC( code, "scaling_list_pred_matrix_id_delta");
-
-          if (sizeId==SCALING_LIST_32x32)
-          {
-            code*=(SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES); // Adjust the decoded code for this size, to cope with the missing 32x32 chroma entries.
-          }
+    READ_FLAG(code, "scaling_list_copy_mode_flag");
+    scalingListCopyModeFlag = (code) ? true : false;
+    scalingList->setScalingListCopyModeFlag(scalingListId, scalingListCopyModeFlag);
 
-          scalingList->setRefMatrixId (sizeId,listId,(uint32_t)((int)(listId)-(code)));
-          if( sizeId > SCALING_LIST_8x8 )
-          {
-            scalingList->setScalingListDC(sizeId,listId,((listId == scalingList->getRefMatrixId (sizeId,listId))? 16 :scalingList->getScalingListDC(sizeId, scalingList->getRefMatrixId (sizeId,listId))));
-          }
-          scalingList->processRefMatrix( sizeId, listId, scalingList->getRefMatrixId (sizeId,listId));
+    scalingList->setScalingListPreditorModeFlag(scalingListId, false);
+    if (!scalingListCopyModeFlag)
+    {
+      READ_FLAG(code, "scaling_list_predictor_mode_flag");
+      scalingList->setScalingListPreditorModeFlag(scalingListId, code);
+    }
 
-        }
-        else //DPCM Mode
-        {
-          decodeScalingList(scalingList, sizeId, listId);
-        }
+    if ((scalingListCopyModeFlag || scalingList->getScalingListPreditorModeFlag(scalingListId)) && scalingListId!= SCALING_LIST_1D_START_2x2 && scalingListId!= SCALING_LIST_1D_START_4x4 && scalingListId!= SCALING_LIST_1D_START_8x8) //Copy Mode
+    {
+      READ_UVLC(code, "scaling_list_pred_matrix_id_delta");
+      scalingList->setRefMatrixId(scalingListId, (uint32_t)((int)(scalingListId)-(code)));
+    }    
+    else if (scalingListCopyModeFlag || scalingList->getScalingListPreditorModeFlag(scalingListId))
+    {
+      scalingList->setRefMatrixId(scalingListId, (uint32_t)((int)(scalingListId)));
+    }
+    if (scalingListCopyModeFlag)//copy
+    {
+      if (scalingListId >= SCALING_LIST_1D_START_16x16)
+      {
+        scalingList->setScalingListDC(scalingListId,
+          ((scalingListId == scalingList->getRefMatrixId(scalingListId)) ? 16
+            : (scalingList->getRefMatrixId(scalingListId) < SCALING_LIST_1D_START_16x16) ? scalingList->getScalingListAddress(scalingList->getRefMatrixId(scalingListId))[0] : scalingList->getScalingListDC(scalingList->getRefMatrixId(scalingListId))));
       }
+      scalingList->processRefMatrix(scalingListId, scalingList->getRefMatrixId(scalingListId));
+    }
+    else
+    {
+      decodeScalingList(scalingList, scalingListId, scalingList->getScalingListPreditorModeFlag(scalingListId));
     }
   }
 
@@ -2468,30 +3407,47 @@ void HLSyntaxReader::parseScalingList(ScalingList* scalingList)
 * \param sizeId size index
 * \param listId list index
 */
-void HLSyntaxReader::decodeScalingList(ScalingList *scalingList, uint32_t sizeId, uint32_t listId)
+void HLSyntaxReader::decodeScalingList(ScalingList *scalingList, uint32_t scalingListId, bool isPredictor)
 {
-  int i,coefNum = std::min(MAX_MATRIX_COEF_NUM,(int)g_scalingListSize[sizeId]);
+  int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
+  int i, coefNum = matrixSize * matrixSize;
   int data;
   int scalingListDcCoefMinus8 = 0;
-  int nextCoef = SCALING_LIST_START_VALUE;
-  uint32_t* scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )];
-  int *dst = scalingList->getScalingListAddress(sizeId, listId);
+  int nextCoef = (isPredictor) ? 0 : SCALING_LIST_START_VALUE;
+  ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)];
+  int *dst = scalingList->getScalingListAddress(scalingListId);
+
+  int PredListId = scalingList->getRefMatrixId(scalingListId);
+  CHECK(isPredictor && PredListId > scalingListId, "Scaling List error predictor!");
+  const int *srcPred = (isPredictor) ? ((scalingListId == PredListId) ? scalingList->getScalingListDefaultAddress(scalingListId) : scalingList->getScalingListAddress(PredListId)) : NULL;
+  if(isPredictor && scalingListId == PredListId)
+    scalingList->setScalingListDC(PredListId, SCALING_LIST_DC);
+  int predCoef = 0;
 
-  if( sizeId > SCALING_LIST_8x8 )
+  if (scalingListId >= SCALING_LIST_1D_START_16x16)
   {
-    READ_SVLC( scalingListDcCoefMinus8, "scaling_list_dc_coef_minus8");
-    scalingList->setScalingListDC(sizeId,listId,scalingListDcCoefMinus8 + 8);
-    nextCoef = scalingList->getScalingListDC(sizeId,listId);
+    READ_SVLC(scalingListDcCoefMinus8, "scaling_list_dc_coef_minus8");
+    nextCoef += scalingListDcCoefMinus8;
+    if (isPredictor)
+    {
+      predCoef = (PredListId >= SCALING_LIST_1D_START_16x16) ? scalingList->getScalingListDC(PredListId) : srcPred[0];
+    }
+    scalingList->setScalingListDC(scalingListId, (nextCoef + predCoef + 256) & 255);
   }
 
   for(i = 0; i < coefNum; i++)
   {
+    if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4)
+    {
+      dst[scan[i].idx] = 0;
+      continue;
+    }
     READ_SVLC( data, "scaling_list_delta_coef");
-    nextCoef = (nextCoef + data + 256 ) % 256;
-    dst[scan[i]] = nextCoef;
+    nextCoef += data;  
+    predCoef = (isPredictor) ? srcPred[scan[i].idx] : 0;
+    dst[scan[i].idx] = (nextCoef + predCoef + 256) & 255;
   }
 }
-#endif
 
 bool HLSyntaxReader::xMoreRbspData()
 {
@@ -2522,183 +3478,91 @@ bool HLSyntaxReader::xMoreRbspData()
   return (cnt>0);
 }
 
-
-int HLSyntaxReader::alfGolombDecode( const int k )
+int HLSyntaxReader::alfGolombDecode( const int k, const bool signed_val )
 {
-  uint32_t uiSymbol;
-  int q = -1;
-  int nr = 0;
-  int m = (int)pow( 2.0, k );
-  int a;
-
-  uiSymbol = 1;
-  while( uiSymbol )
+  int numLeadingBits = -1;
+  uint32_t b = 0;
+  for (; !b; numLeadingBits++)
   {
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
-    xReadFlag( uiSymbol, "" );
+    xReadFlag( b, "" );
 #else
-    xReadFlag( uiSymbol );
+    READ_FLAG( b, "alf_coeff_abs_prefix");
 #endif
-    q++;
   }
 
-  for( a = 0; a < k; ++a )          // read out the sequential log2(M) bits
+  int symbol = ( ( 1 << numLeadingBits ) - 1 ) << k;
+  if ( numLeadingBits + k > 0)
   {
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-    xReadFlag( uiSymbol, "" );
-#else
-    xReadFlag( uiSymbol );
-#endif
-    if( uiSymbol )
-    {
-      nr += 1 << a;
-    }
+    uint32_t bins;
+    READ_CODE( numLeadingBits + k, bins, "alf_coeff_abs_suffix" );
+    symbol += bins;
   }
-  nr += q * m;                    // add the bits and the multiple of M
-  if( nr != 0 )
+
+  if ( signed_val && symbol != 0 )
   {
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
-    xReadFlag( uiSymbol, "" );
+    xReadFlag( b, "" );
 #else
-    xReadFlag( uiSymbol );
+    READ_FLAG( b, "alf_coeff_sign" );
 #endif
-    nr = ( uiSymbol ) ? nr : -nr;
+    symbol = ( b ) ? -symbol : symbol;
   }
-  return nr;
+  return symbol;
 }
 
-void HLSyntaxReader::alfFilter( AlfSliceParam& alfSliceParam, const bool isChroma )
+void HLSyntaxReader::alfFilter( AlfParam& alfParam, const bool isChroma, const int altIdx )
 {
   uint32_t code;
-  if( !isChroma )
-  {
-    READ_FLAG( code, "alf_luma_coeff_delta_flag" );
-    alfSliceParam.alfLumaCoeffDeltaFlag = code;
-
-    if( !alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      std::memset( alfSliceParam.alfLumaCoeffFlag, true, sizeof( alfSliceParam.alfLumaCoeffFlag ) );
-
-      if( alfSliceParam.numLumaFilters > 1 )
-      {
-        READ_FLAG( code, "alf_luma_coeff_delta_prediction_flag" );
-        alfSliceParam.alfLumaCoeffDeltaPredictionFlag = code;
-      }
-      else
-      {
-        alfSliceParam.alfLumaCoeffDeltaPredictionFlag = 0;
-      }
-    }
-    else
-    {
-      alfSliceParam.alfLumaCoeffDeltaPredictionFlag = 0;
-    }
-  }
 
   // derive maxGolombIdx
   AlfFilterShape alfShape( isChroma ? 5 : 7 );
-  const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType );
-  READ_UVLC( code, isChroma ? "alf_chroma_min_eg_order_minus1" : "alf_luma_min_eg_order_minus1" );
-
-  int kMin = code + 1;
-  static int kMinTab[MAX_NUM_ALF_COEFF];
-  const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters;
-  short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff;
-
-  for( int idx = 0; idx < maxGolombIdx; idx++ )
-  {
-    READ_FLAG( code, isChroma ? "alf_chroma_eg_order_increase_flag"  : "alf_luma_eg_order_increase_flag" );
-    CHECK( code > 1, "Wrong golomb_order_increase_flag" );
-    kMinTab[idx] = kMin + code;
-    kMin = kMinTab[idx];
-  }
+  const int numFilters = isChroma ? 1 : alfParam.numLumaFilters;
+  short* coeff = isChroma ? alfParam.chromaCoeff[altIdx] : alfParam.lumaCoeff;
+  short* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp;
 
-  if( !isChroma )
-  {
-    if( alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      for( int ind = 0; ind < alfSliceParam.numLumaFilters; ++ind )
-      {
-        READ_FLAG( code, "alf_luma_coeff_flag[i]" );
-        alfSliceParam.alfLumaCoeffFlag[ind] = code;
-      }
-    }
-  }
 
   // Filter coefficients
   for( int ind = 0; ind < numFilters; ++ind )
   {
-    if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      memset( coeff + ind * MAX_NUM_ALF_LUMA_COEFF, 0, sizeof( *coeff ) * alfShape.numCoeff );
-      continue;
-    }
 
     for( int i = 0; i < alfShape.numCoeff - 1; i++ )
     {
-      coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = alfGolombDecode( kMinTab[alfShape.golombIdx[i]] );
+      coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = alfGolombDecode( 3 );
+      CHECK( isChroma &&
+             ( coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] > 127 || coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] < -127 )
+             , "AlfCoeffC shall be in the range of -127 to 127, inclusive" );
     }
   }
-}
 
-int HLSyntaxReader::truncatedUnaryEqProb( const int maxSymbol )
-{
-  for( int k = 0; k < maxSymbol; k++ )
-  {
-    uint32_t symbol;
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-    xReadFlag( symbol, "" );
+  // Clipping values coding
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  if ( alfParam.nonLinearFlag[isChroma] )
 #else
-    xReadFlag( symbol );
+  if ( alfParam.nonLinearFlag[isChroma][altIdx] )
 #endif
+  {
 
-    if( !symbol )
+    // Filter coefficients
+    for( int ind = 0; ind < numFilters; ++ind )
     {
-      return k;
-    }
-  }
-  return maxSymbol;
-}
 
-void HLSyntaxReader::xReadTruncBinCode( uint32_t& ruiSymbol, const int uiMaxSymbol )
-{
-  int uiThresh;
-  if( uiMaxSymbol > 256 )
-  {
-    int uiThreshVal = 1 << 8;
-    uiThresh = 8;
-    while( uiThreshVal <= uiMaxSymbol )
-    {
-      uiThresh++;
-      uiThreshVal <<= 1;
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        READ_CODE(2, code, "alf_clipping_index");
+        clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = code;
+      }
     }
-    uiThresh--;
   }
   else
   {
-    uiThresh = g_tbMax[uiMaxSymbol];
-  }
-
-  int uiVal = 1 << uiThresh;
-  int b = uiMaxSymbol - uiVal;
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-  xReadCode( uiThresh, ruiSymbol, "" );
-#else
-  xReadCode( uiThresh, ruiSymbol );
-#endif
-  if( ruiSymbol >= uiVal - b )
-  {
-    uint32_t uiSymbol;
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
-    xReadFlag( uiSymbol, "" );
-#else
-    xReadFlag( uiSymbol );
-#endif
-    ruiSymbol <<= 1;
-    ruiSymbol += uiSymbol;
-    ruiSymbol -= ( uiVal - b );
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      std::fill_n( clipp + ind * MAX_NUM_ALF_LUMA_COEFF, alfShape.numCoeff, 0 );
+    }
   }
 }
 
+
 //! \}
 
diff --git a/source/Lib/DecoderLib/VLCReader.h b/source/Lib/DecoderLib/VLCReader.h
index 01117b9f861d9c1892180848f638c93bf79b6178..69e3f479ca770b34ac1efdb3ebb54d0bb22d7ad3 100644
--- a/source/Lib/DecoderLib/VLCReader.h
+++ b/source/Lib/DecoderLib/VLCReader.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
 
 #if ENABLE_TRACING
 
+#define READ_SCODE(length, code, name)    xReadSCode  ( length, code, name )
 #define READ_CODE(length, code, name)     xReadCodeTr ( length, code, name )
 #define READ_UVLC(        code, name)     xReadUvlcTr (         code, name )
 #define READ_SVLC(        code, name)     xReadSvlcTr (         code, name )
@@ -55,6 +56,7 @@
 
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
 
+#define READ_SCODE(length, code, name)    xReadSCode( length, code, name )
 #define READ_CODE(length, code, name)     xReadCode ( length, code, name )
 #define READ_UVLC(        code, name)     xReadUvlc (         code, name )
 #define READ_SVLC(        code, name)     xReadSvlc (         code, name )
@@ -62,6 +64,7 @@
 
 #else
 
+#define READ_SCODE(length, code, name)    xReadSCode ( length, code )
 #define READ_CODE(length, code, name)     xReadCode ( length, code )
 #define READ_UVLC(        code, name)     xReadUvlc (         code )
 #define READ_SVLC(        code, name)     xReadSvlc (         code )
@@ -103,15 +106,19 @@ protected:
   void  xReadSvlcTr  (                int& rValue, const char *pSymbolName );
   void  xReadFlagTr  (               uint32_t& rValue, const char *pSymbolName );
 #endif
+#if RExt__DECODER_DEBUG_BIT_STATISTICS || ENABLE_TRACING
+  void  xReadSCode   ( uint32_t  length, int& val, const char *pSymbolName );
+#else
+  void  xReadSCode   ( uint32_t  length, int& val );
+#endif
+
 public:
   void  setBitstream ( InputBitstream* p )   { m_pcBitstream = p; }
   InputBitstream* getBitstream() { return m_pcBitstream; }
 
 protected:
   void xReadRbspTrailingBits();
-#if JVET_M0101_HLS
   bool isByteAligned() { return (m_pcBitstream->getNumBitsUntilByteAligned() == 0 ); }
-#endif
 };
 
 
@@ -143,41 +150,37 @@ public:
   virtual ~HLSyntaxReader();
 
 protected:
-  void  parseShortTermRefPicSet            (SPS* pcSPS, ReferencePictureSet* pcRPS, int idx);
+  void  copyRefPicList(SPS* pcSPS, ReferencePictureList* source_rpl, ReferencePictureList* dest_rpl);
+  void  parseRefPicList(SPS* pcSPS, ReferencePictureList* rpl);
 
 public:
   void  setBitstream        ( InputBitstream* p )   { m_pcBitstream = p; }
-#if HEVC_VPS
   void  parseVPS            ( VPS* pcVPS );
-#endif
+  void  parseDPS            ( DPS* dps );
   void  parseSPS            ( SPS* pcSPS );
-  void  parsePPS            ( PPS* pcPPS );
-  void  parseAPS            ( APS* pcAPS);
+  void  parsePPS            ( PPS* pcPPS, ParameterSetManager *parameterSetManager );
+  void  parseAPS            ( APS* pcAPS );
+  void  parseAlfAps         ( APS* pcAPS );
+  void  parseLmcsAps        ( APS* pcAPS );
+  void  parseScalingListAps ( APS* pcAPS );
   void  parseVUI            ( VUI* pcVUI, SPS* pcSPS );
-#if !JVET_M0101_HLS
-  void  parsePTL            ( PTL *rpcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1 );
-  void  parseProfileTier    ( ProfileTierLevel *ptl, const bool bIsSubLayer );
-#else
   void  parseConstraintInfo   (ConstraintInfo *cinfo);
   void  parseProfileTierLevel ( ProfileTierLevel *ptl, int maxNumSubLayersMinus1);
-#endif
-  void  parseHrdParameters  ( HRD *hrd, bool cprms_present_flag, uint32_t tempLevelHigh );
-  void  parseSliceHeader    ( Slice* pcSlice, ParameterSetManager *parameterSetManager, const int prevTid0POC );
+  void  parseHrdParameters  ( HRDParameters *hrd, uint32_t firstSubLayer, uint32_t tempLevelHigh );
+  void  parsePictureHeader  ( PicHeader* picHeader, ParameterSetManager *parameterSetManager );
+  void  parseSliceHeader    ( Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC );
+  void  parseSliceHeaderToPoc ( Slice* pcSlice, PicHeader* picHeader, ParameterSetManager *parameterSetManager, const int prevTid0POC );
   void  parseTerminatingBit ( uint32_t& ruiBit );
   void  parseRemainingBytes ( bool noTrailingBytesExpected );
 
   void  parsePredWeightTable( Slice* pcSlice, const SPS *sps );
-#if HEVC_USE_SCALING_LISTS
   void  parseScalingList    ( ScalingList* scalingList );
-  void  decodeScalingList   ( ScalingList *scalingList, uint32_t sizeId, uint32_t listId);
-#endif
+  void  decodeScalingList   ( ScalingList *scalingList, uint32_t scalingListId, bool isPredictor);
   void parseReshaper        ( SliceReshapeInfo& sliceReshaperInfo, const SPS* pcSPS, const bool isIntra );
-  void alfFilter( AlfSliceParam& alfSliceParam, const bool isChroma );
+  void alfFilter( AlfParam& alfParam, const bool isChroma, const int altIdx );
 
 private:
-  int truncatedUnaryEqProb( const int maxSymbol );
-  void xReadTruncBinCode( uint32_t& ruiSymbol, const int uiMaxSymbol );
-  int  alfGolombDecode( const int k );
+  int  alfGolombDecode( const int k, const bool signed_val=true );
 
 protected:
   bool  xMoreRbspData();
diff --git a/source/Lib/EncoderLib/AQp.cpp b/source/Lib/EncoderLib/AQp.cpp
index bb6effc838cb7f97af26a933aff6bed0f7d5be7e..a157e1c854416e142fb11b39f1b5c27472732f29 100644
--- a/source/Lib/EncoderLib/AQp.cpp
+++ b/source/Lib/EncoderLib/AQp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/AQp.h b/source/Lib/EncoderLib/AQp.h
index b64d66a7dfe7b40bea3d2b63d87f5fafd8275e02..4e62ed1eee3034cd005c2e036041e12e59cebfdd 100644
--- a/source/Lib/EncoderLib/AQp.h
+++ b/source/Lib/EncoderLib/AQp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/Analyze.h b/source/Lib/EncoderLib/Analyze.h
index 086c834bf6c9bcd3a4306d526c5e40c2a5c904fe..c4faaad1c5eb3aed1bfdd79e9ab131332cc11bb5 100644
--- a/source/Lib/EncoderLib/Analyze.h
+++ b/source/Lib/EncoderLib/Analyze.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -69,15 +69,21 @@ private:
   uint32_t      m_uiNumPic;
   double    m_dFrmRate; //--CFG_KDY
   double    m_MSEyuvframe[MAX_NUM_COMPONENT]; // sum of MSEs
+  double    m_upscaledPSNR[MAX_NUM_COMPONENT];
 #if EXTENSION_360_VIDEO
   TExt360EncAnalyze m_ext360;
 #endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  double    m_logDeltaESum[hdrtoolslib::NB_REF_WHITE];
+  double    m_psnrLSum[hdrtoolslib::NB_REF_WHITE];
+#endif
 
 public:
   virtual ~Analyze()  {}
   Analyze() { clear(); }
 
   void  addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT]
+    , const double upscaledPSNR[MAX_NUM_COMPONENT]
     , bool isEncodeLtRef
   )
   {
@@ -88,6 +94,7 @@ public:
     {
       m_dPSNRSum[i] += psnr[i];
       m_MSEyuvframe[i] += MSEyuvframe[i];
+      m_upscaledPSNR[i] += upscaledPSNR[i];
     }
 
     m_uiNumPic++;
@@ -96,12 +103,26 @@ public:
   double  getWPSNR      (const ComponentID compID) const { return m_dPSNRSum[compID] / (double)m_uiNumPic; }
 #endif
   double  getPsnr(ComponentID compID) const { return  m_dPSNRSum[compID];  }
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  double getDeltaE()                  const { return m_logDeltaESum[0];  }
+  double getPsnrL()                   const { return m_psnrLSum[0];  }
+#endif
   double  getBits()                   const { return  m_dAddBits;   }
   void    setBits(double numBits)     { m_dAddBits = numBits; }
   uint32_t    getNumPic()                 const { return  m_uiNumPic;   }
 #if EXTENSION_360_VIDEO
   TExt360EncAnalyze& getExt360Info() { return m_ext360; }
 #endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  void addHDRMetricsResult(double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE])
+  {
+    for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++)
+    {
+      m_logDeltaESum[i] += deltaE[i];
+      m_psnrLSum[i] += psnrL[i];
+    }
+  }
+#endif
 
   void    setFrmRate  (double dFrameRate) { m_dFrmRate = dFrameRate; } //--CFG_KDY
   void    clear()
@@ -111,10 +132,18 @@ public:
     {
       m_dPSNRSum[i] = 0;
       m_MSEyuvframe[i] = 0;
+      m_upscaledPSNR[i] = 0;
     }
     m_uiNumPic = 0;
 #if EXTENSION_360_VIDEO
     m_ext360.clear();
+#endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++)
+    {
+      m_logDeltaESum[i] = 0.0;
+      m_psnrLSum[i] = 0.0;
+    }
 #endif
   }
 
@@ -159,9 +188,17 @@ public:
   }
 
 #if ENABLE_QPA || WCG_WPSNR
-  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const bool useWPSNR = false )
+  void    printOut( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths, const bool useWPSNR = false
+#if JVET_O0756_CALCULATE_HDRMETRICS
+      , const bool printHdrMetrics = false
+#endif
+  )
 #else
-  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths )
+  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths
+#if JVET_O0756_CALCULATE_HDRMETRICS
+      , const bool printHdrMetrics = false
+#endif
+  )
 #endif
   {
 #if !WCG_WPSNR
@@ -405,10 +442,16 @@ public:
           {
 #if ENABLE_QPA || WCG_WPSNR
             if (useWPSNR) {
-              msg( e_msg_level, "\tTotal Frames |   "   "Bitrate     "  "Y-WPSNR   "  "U-WPSNR   "  "V-WPSNR   "  "YUV-WPSNR" );
+              msg( e_msg_level, "\tTotal Frames |   "   "Bitrate     "  "Y-WPSNR   "  "U-WPSNR   "  "V-WPSNR   "  "YUV-WPSNR   " );
             } else
 #endif
-            msg( e_msg_level, "\tTotal Frames |   "   "Bitrate     "  "Y-PSNR    "  "U-PSNR    "  "V-PSNR    "  "YUV-PSNR " );
+            msg( e_msg_level, "\tTotal Frames |   "   "Bitrate     "  "Y-PSNR    "  "U-PSNR    "  "V-PSNR    "  "YUV-PSNR   " );
+#if JVET_O0756_CALCULATE_HDRMETRICS
+            if (printHdrMetrics)
+            {
+              msg(e_msg_level, "DeltaE   "  "PSNRL      ");
+            }
+#endif
 #if EXTENSION_360_VIDEO
             m_ext360.printHeader(e_msg_level);
 #endif
@@ -417,7 +460,12 @@ public:
             {
               msg(e_msg_level, "xY-PSNR           "  "xU-PSNR           "  "xV-PSNR           ");
             }
-
+#if JVET_O0756_CALCULATE_HDRMETRICS
+            if (printHdrMetrics && printHexPsnr)
+            {
+              msg(e_msg_level, "xDeltaE           "  "xPSNRL           ");
+            }
+#endif
             if (printSequenceMSE)
             {
               msg( e_msg_level, " Y-MSE     "  "U-MSE     "  "V-MSE    "  "YUV-MSE \n" );
@@ -442,8 +490,14 @@ public:
 #if ENABLE_QPA
                    useWPSNR ? getWPSNR(COMPONENT_Cr) :
 #endif
-                   getPsnr(COMPONENT_Cr) / (double)getNumPic(),
-                   PSNRyuv );
+              getPsnr(COMPONENT_Cr) / (double)getNumPic(),
+              PSNRyuv );
+#if JVET_O0756_CALCULATE_HDRMETRICS
+            if (printHdrMetrics)
+            {
+              msg( e_msg_level, "  %8.4lf  " "%8.4lf  ", getDeltaE()/(double)getNumPic(), getPsnrL()/(double)getNumPic());
+            }
+#endif
 
 #if EXTENSION_360_VIDEO
             m_ext360.printPSNRs(getNumPic(), e_msg_level);
@@ -463,7 +517,33 @@ public:
               }
               msg(e_msg_level, "   %16" PRIx64 "  %16" PRIx64 "  %16" PRIx64 , xPsnr[COMPONENT_Y], xPsnr[COMPONENT_Cb], xPsnr[COMPONENT_Cr]);
             }
+#if JVET_O0756_CALCULATE_HDRMETRICS
+            if (printHexPsnr && printHdrMetrics)
+            {
+              double dDeltaE[MAX_NUM_COMPONENT];
+              uint64_t xDeltaE[MAX_NUM_COMPONENT];
+              for (int i = 0; i < 1; i++)
+              {
+                dDeltaE[i] = getDeltaE() / (double)getNumPic();
 
+                copy(reinterpret_cast<uint8_t *>(&dDeltaE[i]),
+                     reinterpret_cast<uint8_t *>(&dDeltaE[i]) + sizeof(dDeltaE[i]),
+                     reinterpret_cast<uint8_t *>(&xDeltaE[i]));
+              }
+
+              double dPsnrL[MAX_NUM_COMPONENT];
+              uint64_t xPsnrL[MAX_NUM_COMPONENT];
+              for (int i = 0; i < 1; i++)
+              {
+                dPsnrL[i] = getPsnrL() / (double)getNumPic();
+
+                copy(reinterpret_cast<uint8_t *>(&dPsnrL[i]),
+                     reinterpret_cast<uint8_t *>(&dPsnrL[i]) + sizeof(dPsnrL[i]),
+                     reinterpret_cast<uint8_t *>(&xPsnrL[i]));
+              }
+              msg(e_msg_level, "   %16" PRIx64 "  %16" PRIx64 , xDeltaE[0], xPsnrL[0]);
+            }
+#endif
             if (printSequenceMSE)
             {
               msg( e_msg_level, "  %8.4lf  "   "%8.4lf  "    "%8.4lf  "   "%8.4lf\n",
@@ -476,6 +556,34 @@ public:
             {
               msg( e_msg_level, "\n");
             }
+            if( printRprPSNR )
+            {
+              double psnr[MAX_NUM_COMPONENT];
+              for( uint32_t componentIndex = 0; componentIndex < MAX_NUM_COMPONENT; componentIndex++ )
+              {
+                const ComponentID compID = ComponentID( componentIndex );
+
+                if( getNumPic() == 0 )
+                {
+                  psnr[compID] = 0.0;
+                }
+                else
+                {
+                  const uint32_t maxval = 255 << ( bitDepths.recon[toChannelType( compID )] - 8 );
+                  psnr[compID] = ( m_MSEyuvframe[compID] == 0 ) ? 999.99 : 10.0 * log10( ( maxval * maxval ) / ( m_MSEyuvframe[compID] / (double)getNumPic() ) );
+                }
+              }
+
+              msg( e_msg_level, "\nPSNR1 Y-PSNR     "  "U-PSNR     "  "V-PSNR\n" );
+              msg( e_msg_level, "     %8.4lf  "     " %8.4lf  "     " %8.4lf\n",
+                psnr[COMPONENT_Y], psnr[COMPONENT_Cb], psnr[COMPONENT_Cr] );
+
+              msg( e_msg_level, "PSNR2 Y-PSNR     "  "U-PSNR     "  "V-PSNR\n" );
+              msg( e_msg_level, "     %8.4lf  "     " %8.4lf  "     " %8.4lf\n",
+                m_upscaledPSNR[COMPONENT_Y] / (double)getNumPic(),
+                m_upscaledPSNR[COMPONENT_Cb] / (double)getNumPic(),
+                m_upscaledPSNR[COMPONENT_Cr] / (double)getNumPic());
+            }
           }
         }
         break;
diff --git a/source/Lib/EncoderLib/AnnexBwrite.h b/source/Lib/EncoderLib/AnnexBwrite.h
index b2686d47389ab30da4d2fdf6b522ac3ef2a336a5..37f967684409f3fc59c8b18e3dfeaa99c860eaab 100644
--- a/source/Lib/EncoderLib/AnnexBwrite.h
+++ b/source/Lib/EncoderLib/AnnexBwrite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,11 +59,7 @@ static std::vector<uint32_t> writeAnnexB(std::ostream& out, const AccessUnit& au
     uint32_t size = 0; /* size of annexB unit in bytes */
 
     static const uint8_t start_code_prefix[] = {0,0,0,1};
-#if HEVC_VPS
-    if (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_VPS || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_PPS)
-#else
-    if (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_PPS)
-#endif
+    if (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_DPS || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_VPS || nalu.m_nalUnitType == NAL_UNIT_PPS)
     {
       /* From AVC, When any of the following conditions are fulfilled, the
        * zero_byte syntax element shall be present:
diff --git a/source/Lib/EncoderLib/BinEncoder.cpp b/source/Lib/EncoderLib/BinEncoder.cpp
index ebad19d3e1a517e3a4cb087e59b9f29abc117329..2d94f7765b9e05b77433864d532126729e987c18 100644
--- a/source/Lib/EncoderLib/BinEncoder.cpp
+++ b/source/Lib/EncoderLib/BinEncoder.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -205,56 +205,41 @@ void BinEncoderBase::encodeBinsEP( unsigned bins, unsigned numBins )
   }
 }
 
-void BinEncoderBase::encodeRemAbsEP( unsigned bins, unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange )
+void BinEncoderBase::encodeRemAbsEP(unsigned bins, unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange)
 {
-  const unsigned threshold = COEF_REMAIN_BIN_REDUCTION << goRicePar;
-  useLimitedPrefixLength = true;
-  if( bins < threshold )
+  const unsigned threshold = cutoff << goRicePar;
+  if (bins < threshold)
   {
-    const unsigned bitMask  = ( 1 << goRicePar ) - 1;
-    const unsigned length   = ( bins >> goRicePar ) + 1;
-    encodeBinsEP( ( 1 << length ) - 2,  length );
-    encodeBinsEP( bins & bitMask,       goRicePar);
+    const unsigned bitMask = (1 << goRicePar) - 1;
+    const unsigned length = (bins >> goRicePar) + 1;
+    encodeBinsEP((1 << length) - 2, length);
+    encodeBinsEP(bins & bitMask, goRicePar);
   }
-  else if (useLimitedPrefixLength)
+  else 
   {
-    const unsigned  maxPrefixLength = 32 - COEF_REMAIN_BIN_REDUCTION - maxLog2TrDynamicRange;
-    unsigned        prefixLength    = 0;
-    unsigned        codeValue       = ( bins >> goRicePar ) - COEF_REMAIN_BIN_REDUCTION;
+    const unsigned  maxPrefixLength = 32 - cutoff - maxLog2TrDynamicRange;
+    unsigned        prefixLength = 0;
+    unsigned        codeValue = (bins >> goRicePar) - cutoff;
     unsigned        suffixLength;
-    if( codeValue >=  ( ( 1 << maxPrefixLength ) - 1 ) )
+    if (codeValue >= ((1 << maxPrefixLength) - 1))
     {
       prefixLength = maxPrefixLength;
       suffixLength = maxLog2TrDynamicRange;
     }
     else
     {
-      while( codeValue > ( ( 2 << prefixLength ) - 2 ) )
+      while (codeValue > ((2 << prefixLength) - 2))
       {
         prefixLength++;
       }
       suffixLength = prefixLength + goRicePar + 1; //+1 for the separator bit
     }
-    const unsigned totalPrefixLength  = prefixLength + COEF_REMAIN_BIN_REDUCTION;
-    const unsigned bitMask            = ( 1 << goRicePar ) - 1;
-    const unsigned prefix             = ( 1 << totalPrefixLength ) - 1;
-    const unsigned suffix             = ( ( codeValue - ( (1 << prefixLength ) - 1 ) ) << goRicePar ) | ( bins & bitMask );
-    encodeBinsEP( prefix, totalPrefixLength ); //prefix
-    encodeBinsEP( suffix, suffixLength      ); //separator, suffix, and rParam bits
-  }
-  else
-  {
-    unsigned length = goRicePar;
-    unsigned delta  = 1 << length;
-    bins           -= threshold;
-    while (bins >= delta )
-    {
-      bins -= delta;
-      delta = 1 << (++length);
-    }
-    unsigned numBin = COEF_REMAIN_BIN_REDUCTION + length + 1 - goRicePar;
-    encodeBinsEP( ( 1 << numBin ) - 2, numBin );
-    encodeBinsEP( bins,                length );
+    const unsigned totalPrefixLength = prefixLength + cutoff;
+    const unsigned bitMask = (1 << goRicePar) - 1;
+    const unsigned prefix = (1 << totalPrefixLength) - 1;
+    const unsigned suffix = ((codeValue - ((1 << prefixLength) - 1)) << goRicePar) | (bins & bitMask);
+    encodeBinsEP(prefix, totalPrefixLength); //prefix
+    encodeBinsEP(suffix, suffixLength); //separator, suffix, and rParam bits
   }
 }
 
@@ -285,22 +270,12 @@ void BinEncoderBase::encodeBinTrm( unsigned bin )
   }
 }
 
-void BinEncoderBase::encodeBinsPCM( unsigned bins, unsigned numBins )
-{
-  m_Bitstream->write( bins, numBins );
-}
 
 void BinEncoderBase::align()
 {
   m_Range = 256;
 }
 
-void BinEncoderBase::pcmAlignBits()
-{
-  finish();
-  m_Bitstream->write( 1, 1 );
-  m_Bitstream->writeAlignZero(); // pcm align zero
-}
 
 void BinEncoderBase::encodeAlignedBinsEP( unsigned bins, unsigned numBins )
 {
@@ -437,46 +412,33 @@ BitEstimatorBase::BitEstimatorBase( const BinProbModel* dummy )
   m_EstFracBits = 0;
 }
 
-void BitEstimatorBase::encodeRemAbsEP( unsigned bins, unsigned goRicePar, bool useLimitedPrefixLength, int maxLog2TrDynamicRange )
+void BitEstimatorBase::encodeRemAbsEP(unsigned bins, unsigned goRicePar, unsigned cutoff, int maxLog2TrDynamicRange)
 {
-  const unsigned threshold = COEF_REMAIN_BIN_REDUCTION << goRicePar;
-  useLimitedPrefixLength = true;
-  if( bins < threshold )
+  const unsigned threshold = cutoff << goRicePar;
+  if (bins < threshold)
   {
-    m_EstFracBits += BinProbModelBase::estFracBitsEP( ( bins >> goRicePar ) + 1 + goRicePar );
+    m_EstFracBits += BinProbModelBase::estFracBitsEP((bins >> goRicePar) + 1 + goRicePar);
   }
-  else if (useLimitedPrefixLength)
+  else 
   {
-    const unsigned  maxPrefixLength = 32 - COEF_REMAIN_BIN_REDUCTION - maxLog2TrDynamicRange;
-    unsigned        prefixLength    = 0;
-    unsigned        codeValue       = ( bins >> goRicePar ) - COEF_REMAIN_BIN_REDUCTION;
+    const unsigned  maxPrefixLength = 32 - cutoff - maxLog2TrDynamicRange;
+    unsigned        prefixLength = 0;
+    unsigned        codeValue = (bins >> goRicePar) - cutoff;
     unsigned        suffixLength;
-    if( codeValue >=  ( ( 1 << maxPrefixLength ) - 1 ) )
+    if (codeValue >= ((1 << maxPrefixLength) - 1))
     {
       prefixLength = maxPrefixLength;
       suffixLength = maxLog2TrDynamicRange;
     }
     else
     {
-      while( codeValue > ( ( 2 << prefixLength ) - 2 ) )
+      while (codeValue > ((2 << prefixLength) - 2))
       {
         prefixLength++;
       }
       suffixLength = prefixLength + goRicePar + 1; //+1 for the separator bit
     }
-    m_EstFracBits += BinProbModelBase::estFracBitsEP( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength );
-  }
-  else
-  {
-    unsigned length = goRicePar;
-    unsigned delta  = 1 << length;
-    bins           -= threshold;
-    while (bins >= delta )
-    {
-      bins -= delta;
-      delta = 1 << (++length);
-    }
-    m_EstFracBits += BinProbModelBase::estFracBitsEP(COEF_REMAIN_BIN_REDUCTION + 1 + (length << 1) - goRicePar);
+    m_EstFracBits += BinProbModelBase::estFracBitsEP(cutoff + prefixLength + suffixLength);
   }
 }
 
@@ -488,13 +450,6 @@ void BitEstimatorBase::align()
   m_EstFracBits &= mask;
 }
 
-void BitEstimatorBase::pcmAlignBits()
-{
-  uint64_t  numCurrBits = ( m_EstFracBits >> SCALE_BITS );
-  uint64_t  filledBytes = ( numCurrBits + 8 ) >> 3; // including aligned_one_bit and aligned_zero_bits
-  unsigned  bitsToAdd   = unsigned( ( filledBytes << 3 ) - numCurrBits );
-  m_EstFracBits        += BinProbModelBase::estFracBitsEP( bitsToAdd );
-}
 
 
 
diff --git a/source/Lib/EncoderLib/BinEncoder.h b/source/Lib/EncoderLib/BinEncoder.h
index 83c108c05e9a2ee58e1bf47ddd7181a573fe9ade..67500723eaac4809945a80156075f2846b8ffbb1 100644
--- a/source/Lib/EncoderLib/BinEncoder.h
+++ b/source/Lib/EncoderLib/BinEncoder.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -118,12 +118,10 @@ public:
   virtual void      encodeBinsEP      ( unsigned bins,  unsigned numBins  ) = 0;
   virtual void      encodeRemAbsEP    ( unsigned bins,
                                         unsigned goRicePar,
-                                        bool     useLimitedPrefixLength,
+                                        unsigned cutoff,
                                         int      maxLog2TrDynamicRange    ) = 0;
   virtual void      encodeBinTrm      ( unsigned bin                      ) = 0;
-  virtual void      encodeBinsPCM     ( unsigned bins,  unsigned numBins  ) = 0;
   virtual void      align             ()                                    = 0;
-  virtual void      pcmAlignBits      ()                                    = 0;
 public:
   virtual uint32_t  getNumBins        ()                                    = 0;
   virtual bool      isEncoding        ()                                    = 0;
@@ -183,12 +181,10 @@ public:
   void      encodeBinsEP        ( unsigned bins,  unsigned numBins  );
   void      encodeRemAbsEP      ( unsigned bins,
                                   unsigned goRicePar,
-                                  bool     useLimitedPrefixLength,
+                                  unsigned cutoff,
                                   int      maxLog2TrDynamicRange    );
   void      encodeBinTrm        ( unsigned bin                      );
-  void      encodeBinsPCM       ( unsigned bins,  unsigned numBins  );
   void      align               ();
-  void      pcmAlignBits        ();
   unsigned  getNumWrittenBits   () { return ( m_Bitstream->getNumberOfWrittenBits() + 8 * m_numBufferedBytes + 23 - m_bitsLeft ); }
 public:
   uint32_t  getNumBins          ()                          { return BinCounter::getAll(); }
@@ -251,11 +247,9 @@ public:
   void      encodeBinsEP        ( unsigned bins,  unsigned numBins  ) { m_EstFracBits += BinProbModelBase::estFracBitsEP ( numBins ); }
   void      encodeRemAbsEP      ( unsigned bins,
                                   unsigned goRicePar,
-                                  bool     useLimitedPrefixLength,
+                                  unsigned cutoff,
                                   int      maxLog2TrDynamicRange    );
-  void      encodeBinsPCM       ( unsigned bins,  unsigned numBins  ) { m_EstFracBits += BinProbModelBase::estFracBitsEP ( numBins ); }
   void      align               ();
-  void      pcmAlignBits        ();
 public:
   uint32_t  getNumBins          ()                                      { THROW("Not supported"); return 0; }
   bool      isEncoding          ()                                      { return false; }
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index cc7faea2c396aa80f40a856cd35b7d5673afb8ad..51afcfbfca8ab039ac1b9c005cba1b62b2969b1e 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -152,51 +152,62 @@ void CABACWriter::end_of_slice()
 //================================================================================
 //  clause 7.3.8.2
 //--------------------------------------------------------------------------------
-//    bool  coding_tree_unit( cs, area, qp, ctuRsAddr, skipSao )
+//    bool  coding_tree_unit( cs, area, qp, ctuRsAddr, skipSao, skipAlf )
 //================================================================================
 
-void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr, bool skipSao /* = false */ )
+void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, int (&qps)[2], unsigned ctuRsAddr, bool skipSao /* = false */, bool skipAlf /* = false */ )
 {
   CUCtx cuCtx( qps[CH_L] );
-  Partitioner *partitioner = PartitionerFactory::get( *cs.slice );
+  QTBTPartitioner partitioner;
 
-  partitioner->initCtu( area, CH_L, *cs.slice );
+  partitioner.initCtu(area, CH_L, *cs.slice);
 
   if( !skipSao )
   {
     sao( *cs.slice, ctuRsAddr );
   }
 
-  for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+  if (!skipAlf)
   {
-    codeAlfCtuEnableFlag( cs, ctuRsAddr, compIdx );
+    for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+    {
+      codeAlfCtuEnableFlag(cs, ctuRsAddr, compIdx, NULL);
+      if (isLuma(ComponentID(compIdx)))
+      {
+        codeAlfCtuFilterIndex(cs, ctuRsAddr, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y));
+      }
+      if (isChroma(ComponentID(compIdx)))
+      {
+        uint8_t* ctbAlfFlag = cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx) ? cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ) : nullptr;
+        if( ctbAlfFlag && ctbAlfFlag[ctuRsAddr] )
+        {
+          codeAlfCtuAlternative( cs, ctuRsAddr, compIdx );
+        }
+      }
+    }
   }
 
   if ( CS::isDualITree(cs) && cs.pcv->chrFormat != CHROMA_400 && cs.pcv->maxCUWidth > 64 )
   {
     CUCtx chromaCuCtx(qps[CH_C]);
-    Partitioner *chromaPartitioner = PartitionerFactory::get(*cs.slice);
-    chromaPartitioner->initCtu(area, CH_C, *cs.slice);
-    coding_tree(cs, *partitioner, cuCtx, chromaPartitioner, &chromaCuCtx);
+    QTBTPartitioner chromaPartitioner;
+    chromaPartitioner.initCtu(area, CH_C, *cs.slice);
+    coding_tree(cs, partitioner, cuCtx, &chromaPartitioner, &chromaCuCtx);
     qps[CH_L] = cuCtx.qp;
     qps[CH_C] = chromaCuCtx.qp;
-
-    delete chromaPartitioner;
   }
   else
   {
-    coding_tree( cs, *partitioner, cuCtx );
+    coding_tree(cs, partitioner, cuCtx);
     qps[CH_L] = cuCtx.qp;
     if( CS::isDualITree( cs ) && cs.pcv->chrFormat != CHROMA_400 )
     {
       CUCtx cuCtxChroma( qps[CH_C] );
-      partitioner->initCtu( area, CH_C, *cs.slice );
-      coding_tree( cs, *partitioner, cuCtxChroma );
+      partitioner.initCtu(area, CH_C, *cs.slice);
+      coding_tree(cs, partitioner, cuCtxChroma);
       qps[CH_C] = cuCtxChroma.qp;
     }
   }
-
-  delete partitioner;
 }
 
 
@@ -235,14 +246,9 @@ void CABACWriter::sao( const Slice& slice, unsigned ctuRsAddr )
   int                 rx                      = ctuRsAddr - ry * frame_width_in_ctus;
   const Position      pos                     ( rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight );
   const unsigned      curSliceIdx             = slice.getIndependentSliceIdx();
-#if HEVC_TILES_WPP
-  const unsigned      curTileIdx              = cs.picture->tileMap->getTileIdxMap( pos );
-  bool                leftMergeAvail          = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0  ), curSliceIdx, curTileIdx, CH_L ) ? true : false;
-  bool                aboveMergeAvail         = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, curTileIdx, CH_L ) ? true : false;
-#else
-  bool                leftMergeAvail          = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0  ), curSliceIdx, CH_L ) ? true : false;
-  bool                aboveMergeAvail         = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, CH_L ) ? true : false;
-#endif
+  const unsigned      curTileIdx              = cs.pps->getTileIdx( pos );
+  bool                leftMergeAvail          = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0  ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+  bool                aboveMergeAvail         = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
   sao_block_pars( sao_ctu_pars, sps.getBitDepths(), sliceEnabled, leftMergeAvail, aboveMergeAvail, false );
 }
 
@@ -374,7 +380,8 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
   const CodingUnit &cu        = *cs.getCU( currArea.blocks[partitioner.chType], partitioner.chType );
 
   // Reset delta QP coding flag and ChromaQPAdjustemt coding flag
-  if( pps.getUseDQP() && partitioner.currQgEnable() )
+  //Note: do not reset qg at chroma CU
+  if( pps.getUseDQP() && partitioner.currQgEnable() && !isChroma( partitioner.chType ) )
   {
     cuCtx.qgStart    = true;
     cuCtx.isDQPCoded          = false;
@@ -449,6 +456,17 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
       }
       else
       {
+        const ModeType modeTypeParent = partitioner.modeType;
+        const ModeType modeTypeChild = CU::getModeTypeAtDepth( cu, partitioner.currDepth );
+        mode_constraint( splitMode, cs, partitioner, modeTypeChild );
+        partitioner.modeType = modeTypeChild;
+
+        bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false;
+        CHECK( chromaNotSplit && partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" );
+        if( partitioner.treeType == TREE_D )
+        {
+          partitioner.treeType = chromaNotSplit ? TREE_L : TREE_D;
+        }
       partitioner.splitCurrArea( splitMode, cs );
 
       do
@@ -460,6 +478,22 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
       } while( partitioner.nextPart( cs ) );
 
       partitioner.exitCurrSplit();
+      if( chromaNotSplit )
+      {
+        CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "must be luma status" );
+        partitioner.chType = CHANNEL_TYPE_CHROMA;
+        partitioner.treeType = TREE_C;
+
+        if( cs.picture->blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) )
+        {
+          coding_tree( cs, partitioner, cuCtx );
+        }
+
+        //recover
+        partitioner.chType = CHANNEL_TYPE_LUMA;
+        partitioner.treeType = TREE_D;
+      }
+      partitioner.modeType = modeTypeParent;
       }
       return;
   }
@@ -470,15 +504,45 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
     cuCtx.qgStart = false;
     cuCtx.qp = CU::predictQP( cu, cuCtx.qp );
   }
+  CHECK( cu.treeType != partitioner.treeType, "treeType mismatch" );
 
 
   // coding unit
   coding_unit( cu, partitioner, cuCtx );
 
+  if( cu.chType == CHANNEL_TYPE_CHROMA )
+  {
+    DTRACE_COND( (isEncoding()), g_trace_ctx, D_QP, "[chroma CU]x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Cb().x, cu.Cb().y, cu.Cb().width, cu.Cb().height, cu.qp );
+  }
+  else
+  {
   DTRACE_COND( ( isEncoding() ), g_trace_ctx, D_QP, "x=%d, y=%d, w=%d, h=%d, qp=%d\n", cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, cu.qp );
+  }
   DTRACE_BLOCK_REC_COND( ( !isEncoding() ), cs.picture->getRecoBuf( cu ), cu, cu.predMode );
 }
 
+void CABACWriter::mode_constraint( const PartSplit split, const CodingStructure& cs, Partitioner& partitioner, const ModeType modeType )
+{
+  CHECK( split == CU_DONT_SPLIT, "splitMode shall not be no split" );
+  int val = cs.signalModeCons( split, partitioner, partitioner.modeType );
+  if( val == LDT_MODE_TYPE_SIGNAL )
+  {
+    CHECK( modeType == MODE_TYPE_ALL, "shall not be no constraint case" );
+    bool flag = modeType == MODE_TYPE_INTRA;
+    int ctxIdx = DeriveCtx::CtxModeConsFlag( cs, partitioner );
+    m_BinEncoder.encodeBin( flag, Ctx::ModeConsFlag( ctxIdx ) );
+    DTRACE( g_trace_ctx, D_SYNTAX, "mode_cons_flag() flag=%d\n", flag );
+  }
+  else if( val == LDT_MODE_TYPE_INFER )
+  {
+    assert( modeType == MODE_TYPE_INTRA );
+  }
+  else
+  {
+    assert( modeType == partitioner.modeType );
+  }
+}
+
 void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& cs, Partitioner& partitioner )
 {
   bool canNo, canQt, canBh, canBv, canTh, canTv;
@@ -544,12 +608,9 @@ void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& c
 //  clause 7.3.8.5
 //--------------------------------------------------------------------------------
 //    void  coding_unit               ( cu, partitioner, cuCtx )
-//    void  cu_transquant_bypass_flag ( cu )
 //    void  cu_skip_flag              ( cu )
 //    void  pred_mode                 ( cu )
 //    void  part_mode                 ( cu )
-//    void  pcm_flag                  ( cu )
-//    void  pcm_samples               ( tu )
 //    void  cu_pred_data              ( pus )
 //    void  cu_lic_flag               ( cu )
 //    void  intra_luma_pred_modes     ( pus )
@@ -561,13 +622,8 @@ void CABACWriter::split_cu_mode( const PartSplit split, const CodingStructure& c
 
 void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, CUCtx& cuCtx )
 {
+  DTRACE( g_trace_ctx, D_SYNTAX, "coding_unit() treeType=%d modeType=%d\n", cu.treeType, cu.modeType );
   CodingStructure& cs = *cu.cs;
-  cs.chType = partitioner.chType;
-  // transquant bypass flag
-  if( cs.pps->getTransquantBypassEnabledFlag() )
-  {
-    cu_transquant_bypass_flag( cu );
-  }
 
   // skip flag
   if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag()) && cu.Y().valid())
@@ -580,43 +636,44 @@ void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, C
   if( cu.skip )
   {
     CHECK( !cu.firstPU->mergeFlag, "Merge flag has to be on!" );
+    CHECK(cu.colorTransform, "ACT should not be enabled for skip mode");
     PredictionUnit&   pu = *cu.firstPU;
     prediction_unit ( pu );
     end_of_ctu      ( cu, cuCtx );
     return;
   }
 
-#if !FIX_PCM
-  // pcm samples
-  if( CU::isIntra(cu) )
-  {
-    pcm_data( cu, partitioner );
-    if( cu.ipcm )
-    {
-      end_of_ctu( cu, cuCtx );
-      return;
-    }
-  }
-#endif
 
   // prediction mode and partitioning data
   pred_mode ( cu );
-
-#if FIX_PCM
-  // pcm samples
-  if( CU::isIntra(cu) )
+  if (CU::isIntra(cu))
+  {
+    adaptive_color_transform(cu);
+  }
+  if (CU::isPLT(cu))
   {
-    pcm_data( cu, partitioner );
-    if( cu.ipcm )
+    CHECK(cu.colorTransform, "ACT should not be enabled for PLT mode");
+    if (cu.isSepTree())
     {
-      end_of_ctu( cu, cuCtx );
-      return;
+      if (isLuma(partitioner.chType))
+      {
+        cu_palette_info(cu, COMPONENT_Y, 1, cuCtx);
+      }
+      if (cu.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA))
+      {
+        cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx);
+      }
     }
+    else
+    {
+      cu_palette_info(cu, COMPONENT_Y, 3, cuCtx);
+    }
+    end_of_ctu(cu, cuCtx);
+    return;
   }
-#endif
-  extend_ref_line(cu);
-
-  isp_mode( cu );
+  bdpcm_mode( cu, ComponentID( partitioner.chType ) );
+  if (!CS::isDualITree(cs) && isLuma(partitioner.chType))
+      bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA));
 
   // prediction data ( intra prediction modes / reference indexes + motion vectors )
   cu_pred_data( cu );
@@ -629,97 +686,132 @@ void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, C
 }
 
 
-void CABACWriter::cu_transquant_bypass_flag( const CodingUnit& cu )
-{
-  m_BinEncoder.encodeBin( (cu.transQuantBypass), Ctx::TransquantBypassFlag() );
-}
-
-
 void CABACWriter::cu_skip_flag( const CodingUnit& cu )
 {
   unsigned ctxId = DeriveCtx::CtxSkipFlag( cu );
 
-  if (cu.slice->isIntra() && cu.cs->slice->getSPS()->getIBCFlag())
+  if ((cu.slice->isIntra() || cu.isConsIntra()) && cu.cs->slice->getSPS()->getIBCFlag())
   {
+    if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
+    {
     m_BinEncoder.encodeBin((cu.skip), Ctx::SkipFlag(ctxId));
     DTRACE(g_trace_ctx, D_SYNTAX, "cu_skip_flag() ctx=%d skip=%d\n", ctxId, cu.skip ? 1 : 0);
+    }
+    return;
+  }
+  if ( !cu.cs->slice->getSPS()->getIBCFlag() && cu.lwidth() == 4 && cu.lheight() == 4 )
+  {
+    return;
+  }
+  if( !cu.cs->slice->getSPS()->getIBCFlag() && cu.isConsIntra() )
+  {
     return;
   }
-
   m_BinEncoder.encodeBin( ( cu.skip ), Ctx::SkipFlag( ctxId ) );
 
   DTRACE( g_trace_ctx, D_SYNTAX, "cu_skip_flag() ctx=%d skip=%d\n", ctxId, cu.skip ? 1 : 0 );
   if (cu.skip && cu.cs->slice->getSPS()->getIBCFlag())
   {
+    if (cu.lwidth() < 128 && cu.lheight() < 128 && !cu.isConsInter()) // disable IBC mode larger than 64x64 and disable IBC when only allowing inter mode
+    {
+      if ( cu.lwidth() == 4 && cu.lheight() == 4 )
+      {
+        return;
+      }
     unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
     m_BinEncoder.encodeBin(CU::isIBC(cu) ? 1 : 0, Ctx::IBCFlag(ctxidx));
     DTRACE(g_trace_ctx, D_SYNTAX, "ibc() ctx=%d cu.predMode=%d\n", ctxidx, cu.predMode);
-#if !JVET_MMVD_OFF_MACRO
-    if (CU::isInter(cu))
-    {
-      m_BinEncoder.encodeBin(cu.mmvdSkip, Ctx::MmvdFlag(0));
-      DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_cu_skip_flag() ctx=%d mmvd_skip=%d\n", 0, cu.mmvdSkip ? 1 : 0);
     }
-#endif
-  }
-#if !JVET_MMVD_OFF_MACRO
-  if (cu.skip && !cu.cs->slice->getSPS()->getIBCFlag())
-  {
-    m_BinEncoder.encodeBin(cu.mmvdSkip, Ctx::MmvdFlag(0));
-    DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_cu_skip_flag() ctx=%d mmvd_skip=%d\n", 0, cu.mmvdSkip ? 1 : 0);
   }
-#endif
 }
 
 
 void CABACWriter::pred_mode( const CodingUnit& cu )
 {
-  if (cu.cs->slice->getSPS()->getIBCFlag())
+  if (cu.cs->slice->getSPS()->getIBCFlag() && cu.chType != CHANNEL_TYPE_CHROMA)
   {
-    if (cu.cs->slice->isIntra())
+    if( cu.isConsInter() )
+    {
+      assert( CU::isInter( cu ) );
+      return;
+    }
+
+    if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() )
     {
+      if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
+      {
       unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
       m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx));
+      }
+      if (!CU::isIBC(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+      {
+        m_BinEncoder.encodeBin(CU::isPLT(cu), Ctx::PLTFlag(0));
+      }
     }
     else
     {
-      m_BinEncoder.encodeBin((CU::isIntra(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)));
-      if (!CU::isIntra(cu))
+      if( cu.isConsInter() )
+      {
+        return;
+      }
+      m_BinEncoder.encodeBin((CU::isIntra(cu) || CU::isPLT(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)));
+      if (CU::isIntra(cu) || CU::isPLT(cu))
       {
+        if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+          m_BinEncoder.encodeBin(CU::isPLT(cu), Ctx::PLTFlag(0));
+      }
+      else
+      {
+        if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
+        {
         unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
         m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx));
+        }
       }
     }
   }
   else
   {
-    if (cu.cs->slice->isIntra())
+    if( cu.isConsInter() )
+    {
+      assert( CU::isInter( cu ) );
+      return;
+    }
+
+    if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() )
     {
+      if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+        m_BinEncoder.encodeBin((CU::isPLT(cu)), Ctx::PLTFlag(0));
       return;
     }
-    m_BinEncoder.encodeBin((CU::isIntra(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)));
+    m_BinEncoder.encodeBin((CU::isIntra(cu) || CU::isPLT(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)));
+    if ((CU::isIntra(cu) || CU::isPLT(cu)) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64)
+    {
+      m_BinEncoder.encodeBin((CU::isPLT(cu)), Ctx::PLTFlag(0));
+    }
   }
 }
-
-void CABACWriter::pcm_data( const CodingUnit& cu, Partitioner& partitioner  )
+void CABACWriter::bdpcm_mode( const CodingUnit& cu, const ComponentID compID )
 {
-  pcm_flag( cu, partitioner );
-  if( cu.ipcm )
+  if( cu.cs->sps->getBDPCMEnabled() == 0 ) return;
+  if( !CU::bdpcmAllowed( cu, compID ) ) return;
+
+  int bdpcmMode = isLuma(compID) ? cu.bdpcmMode : cu.bdpcmModeChroma;
+
+  m_BinEncoder.encodeBin(bdpcmMode > 0 ? 1 : 0, Ctx::BDPCMMode(0));
+
+  if (bdpcmMode)
   {
-    m_BinEncoder.pcmAlignBits();
-    pcm_samples( *cu.firstTU );
+    m_BinEncoder.encodeBin(bdpcmMode > 1 ? 1 : 0, Ctx::BDPCMMode(1));
   }
-}
-
-void CABACWriter::pcm_flag( const CodingUnit& cu, Partitioner& partitioner )
-{
-  const SPS& sps = *cu.cs->sps;
-  if( !sps.getPCMEnabledFlag() || partitioner.currArea().lwidth() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lwidth() < (1 << sps.getPCMLog2MinSize())
-      || partitioner.currArea().lheight() > (1 << sps.getPCMLog2MaxSize()) || partitioner.currArea().lheight() < (1 << sps.getPCMLog2MinSize()) )
+  if (isLuma(compID))
   {
-    return;
+    DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_LUMA, cu.lumaPos().x, cu.lumaPos().y, cu.lwidth(), cu.lheight(), cu.bdpcmMode);
+  }
+  else
+  {
+    DTRACE(g_trace_ctx, D_SYNTAX, "bdpcm_mode(%d) x=%d, y=%d, w=%d, h=%d, bdpcm=%d\n", CHANNEL_TYPE_CHROMA, cu.chromaPos().x, cu.chromaPos().y, cu.chromaSize().width, cu.chromaSize().height, cu.bdpcmModeChroma);
   }
-  m_BinEncoder.encodeBinTrm( cu.ipcm );
 }
 
 
@@ -743,48 +835,44 @@ void CABACWriter::cu_pred_data( const CodingUnit& cu )
   imv_mode   ( cu );
   affine_amvr_mode( cu );
 
-  cu_gbi_flag( cu );
+  cu_bcw_flag( cu );
 
 }
 
-void CABACWriter::cu_gbi_flag(const CodingUnit& cu)
+void CABACWriter::cu_bcw_flag(const CodingUnit& cu)
 {
-  if(!CU::isGBiIdxCoded(cu))
+  if(!CU::isBcwIdxCoded(cu))
   {
     return;
   }
 
-  CHECK(!(GBI_NUM > 1 && (GBI_NUM == 2 || (GBI_NUM & 0x01) == 1)), " !( GBI_NUM > 1 && ( GBI_NUM == 2 || ( GBI_NUM & 0x01 ) == 1 ) ) ");
-  const uint8_t gbiCodingIdx = (uint8_t)g_GbiCodingOrder[CU::getValidGbiIdx(cu)];
-
-  const int32_t numGBi = (cu.slice->getCheckLDC()) ? 5 : 3;
-
-  m_BinEncoder.encodeBin((gbiCodingIdx == 0 ? 1 : 0), Ctx::GBiIdx(0));
+  CHECK(!(BCW_NUM > 1 && (BCW_NUM == 2 || (BCW_NUM & 0x01) == 1)), " !( BCW_NUM > 1 && ( BCW_NUM == 2 || ( BCW_NUM & 0x01 ) == 1 ) ) ");
+  const uint8_t bcwCodingIdx = (uint8_t)g_BcwCodingOrder[CU::getValidBcwIdx(cu)];
 
-  if(numGBi > 2 && gbiCodingIdx != 0)
+  const int32_t numBcw = (cu.slice->getCheckLDC()) ? 5 : 3;
+  m_BinEncoder.encodeBin((bcwCodingIdx == 0 ? 0 : 1), Ctx::BcwIdx(0));
+  if(numBcw > 2 && bcwCodingIdx != 0)
   {
-    const uint32_t prefixNumBits = numGBi - 2;
+    const uint32_t prefixNumBits = numBcw - 2;
     const uint32_t step = 1;
 
-    int ctxIdGBi = 4;
     uint8_t idx = 1;
     for(int ui = 0; ui < prefixNumBits; ++ui)
     {
-      if (gbiCodingIdx == idx)
+      if (bcwCodingIdx == idx)
       {
-        m_BinEncoder.encodeBin(1, Ctx::GBiIdx(ctxIdGBi));
+        m_BinEncoder.encodeBinEP(0);
         break;
       }
       else
       {
-        m_BinEncoder.encodeBin(0, Ctx::GBiIdx(ctxIdGBi));
-        ctxIdGBi += step;
+        m_BinEncoder.encodeBinEP(1);
         idx += step;
       }
     }
   }
 
-  DTRACE(g_trace_ctx, D_SYNTAX, "cu_gbi_flag() gbi_idx=%d\n", cu.GBiIdx ? 1 : 0);
+  DTRACE(g_trace_ctx, D_SYNTAX, "cu_bcw_flag() bcw_idx=%d\n", cu.BcwIdx ? 1 : 0);
 }
 
 void CABACWriter::xWriteTruncBinCode(uint32_t symbol, uint32_t maxSymbol)
@@ -827,12 +915,13 @@ void CABACWriter::xWriteTruncBinCode(uint32_t symbol, uint32_t maxSymbol)
 
 void CABACWriter::extend_ref_line(const PredictionUnit& pu)
 {
-#if !ENABLE_JVET_L0283_MRL
-  return;
-#endif
 
   const CodingUnit& cu = *pu.cu;
-  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType))
+  if( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma( cu.chType ) || cu.bdpcmMode )
+  {
+    return;
+  }
+  if( !cu.cs->sps->getUseMRL() )
   {
     return;
   }
@@ -848,21 +937,18 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu)
     if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0])
     {
       m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1));
-      if (MRL_NUM_REF_LINES > 3 && multiRefIdx != MULTI_REF_LINE_IDX[1])
-      {
-        m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[2], Ctx::MultiRefLineIdx(2));
-      }
     }
   }
 }
 
 void CABACWriter::extend_ref_line(const CodingUnit& cu)
 {
-#if !ENABLE_JVET_L0283_MRL
-  return;
-#endif
 
-  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.ipcm)
+  if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode )
+  {
+    return;
+  }
+  if( !cu.cs->sps->getUseMRL() )
   {
     return;
   }
@@ -884,10 +970,6 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu)
       if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0])
       {
         m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1));
-        if (MRL_NUM_REF_LINES > 3 && multiRefIdx != MULTI_REF_LINE_IDX[1])
-        {
-          m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[2], Ctx::MultiRefLineIdx(2));
-        }
       }
 
     }
@@ -902,6 +984,22 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
     return;
   }
 
+  if( cu.bdpcmMode )
+  {
+    cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX;
+    return;
+  }
+
+  mip_flag(cu);
+  if (cu.mipFlag)
+  {
+    mip_pred_modes(cu);
+    return;
+  }
+  extend_ref_line( cu );
+
+  isp_mode( cu );
+
   const int numMPMs   = NUM_MOST_PROBABLE_MODES;
   const int numBlocks = CU::getNumPUs( cu );
   unsigned  mpm_preds   [4][numMPMs];
@@ -929,7 +1027,7 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
         break;
       }
     }
-    if( pu->multiRefIdx || ( cu.ispMode && isLuma( cu.chType ) ) )
+    if ( pu->multiRefIdx )
     {
       CHECK(mpm_idx >= numMPMs, "use of non-MPM");
     }
@@ -950,7 +1048,9 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
     if( mpm_idx < numMPMs )
     {
       {
-        m_BinEncoder.encodeBinEP( mpm_idx > 0 );
+        unsigned ctx = (pu->cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
+        if (pu->multiRefIdx == 0)
+          m_BinEncoder.encodeBin(mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx));
         if( mpm_idx )
         {
           m_BinEncoder.encodeBinEP( mpm_idx > 1 );
@@ -999,6 +1099,16 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
 void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
 {
 
+  if( pu.cu->bdpcmMode ) return;
+  mip_flag(*pu.cu);
+  if (pu.cu->mipFlag)
+  {
+    mip_pred_mode(pu);
+    return;
+  }
+  extend_ref_line( pu );
+  isp_mode( *pu.cu );
+
   // prev_intra_luma_pred_flag
   const int numMPMs  = NUM_MOST_PROBABLE_MODES;
   unsigned  mpm_pred[numMPMs];
@@ -1016,7 +1126,7 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
       break;
     }
   }
-  if( pu.multiRefIdx || ( pu.cu->ispMode && isLuma( pu.cu->chType ) ) )
+  if ( pu.multiRefIdx )
   {
     CHECK(mpm_idx >= numMPMs, "use of non-MPM");
   }
@@ -1029,7 +1139,9 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
   if( mpm_idx < numMPMs )
   {
     {
-      m_BinEncoder.encodeBinEP( mpm_idx > 0 );
+      unsigned ctx = (pu.cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
+      if (pu.multiRefIdx == 0)
+        m_BinEncoder.encodeBin( mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx) );
       if( mpm_idx )
       {
         m_BinEncoder.encodeBinEP( mpm_idx > 1 );
@@ -1067,7 +1179,7 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
 
 void CABACWriter::intra_chroma_pred_modes( const CodingUnit& cu )
 {
-  if( cu.chromaFormat == CHROMA_400 || ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_LUMA ) )
+  if( cu.chromaFormat == CHROMA_400 || ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_LUMA ) )
   {
     return;
   }
@@ -1076,70 +1188,83 @@ void CABACWriter::intra_chroma_pred_modes( const CodingUnit& cu )
 
   intra_chroma_pred_mode( *pu );
 }
-
-void CABACWriter::intra_chroma_lmc_mode( const PredictionUnit& pu )
+void CABACWriter::intra_chroma_lmc_mode(const PredictionUnit& pu)
 {
   const unsigned intraDir = pu.intraDir[1];
-    int lmModeList[10];
-    int maxSymbol = PU::getLMSymbolList( pu, lmModeList );
-    int symbol    = -1;
-    for ( int k = 0; k < LM_SYMBOL_NUM; k++ )
+  int lmModeList[10];
+  PU::getLMSymbolList(pu, lmModeList);
+  int symbol = -1;
+  for (int k = 0; k < LM_SYMBOL_NUM; k++)
+  {
+    if (lmModeList[k] == intraDir)
     {
-      if ( lmModeList[k] == intraDir || ( lmModeList[k] == -1 && intraDir < LM_CHROMA_IDX ) )
-      {
-        symbol = k;
-        break;
-      }
+      symbol = k;
+      break;
     }
-    CHECK( symbol < 0, "invalid symbol found" );
+  }
+  CHECK(symbol < 0, "invalid symbol found");
+
+  m_BinEncoder.encodeBin(symbol == 0 ? 0 : 1, Ctx::CclmModeIdx(0));
 
-    unary_max_symbol(symbol, Ctx::IntraChromaPredMode(1), Ctx::IntraChromaPredMode(2), maxSymbol - 1);
+  if (symbol > 0)
+  {
+    CHECK(symbol > 2, "invalid symbol for MMLM");
+    unsigned int symbol_minus_1 = symbol - 1;
+    m_BinEncoder.encodeBinEP(symbol_minus_1);
+  }
 }
 
 
-void CABACWriter::intra_chroma_pred_mode( const PredictionUnit& pu )
+void CABACWriter::intra_chroma_pred_mode(const PredictionUnit& pu)
 {
-  const unsigned intraDir = pu.intraDir[1];
-  const bool     isDerivedMode = intraDir == DM_CHROMA_IDX;
-
-  m_BinEncoder.encodeBin(isDerivedMode ? 0 : 1, Ctx::IntraChromaPredMode(0));
+  if (pu.cu->bdpcmModeChroma)
+  {
+      return;
+  }
 
-  if (isDerivedMode)
+  const unsigned intraDir = pu.intraDir[1];
+  if (pu.cu->colorTransform)
   {
+    CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM for adaptive color transform");
     return;
   }
-
-  // LM chroma mode
-  if( pu.cs->sps->getUseLMChroma() )
+  if (pu.cs->sps->getUseLMChroma() && pu.cu->checkCCLMAllowed())
   {
-    intra_chroma_lmc_mode( pu );
-    if ( PU::isLMCMode( intraDir ) )
+    m_BinEncoder.encodeBin(PU::isLMCMode(intraDir) ? 1 : 0, Ctx::CclmModeFlag(0));
+    if (PU::isLMCMode(intraDir))
     {
+      intra_chroma_lmc_mode(pu);
       return;
     }
   }
 
+  const bool     isDerivedMode = intraDir == DM_CHROMA_IDX;
+  m_BinEncoder.encodeBin(isDerivedMode ? 0 : 1, Ctx::IntraChromaPredMode(0));
+  if (isDerivedMode)
+  {
+    return;
+  }
+
   // chroma candidate index
-  unsigned chromaCandModes[ NUM_CHROMA_MODE ];
-  PU::getIntraChromaCandModes( pu, chromaCandModes );
+  unsigned chromaCandModes[NUM_CHROMA_MODE];
+  PU::getIntraChromaCandModes(pu, chromaCandModes);
 
   int candId = 0;
-  for ( ; candId < NUM_CHROMA_MODE; candId++ )
+  for (; candId < NUM_CHROMA_MODE; candId++)
   {
-    if( intraDir == chromaCandModes[ candId ] )
+    if (intraDir == chromaCandModes[candId])
     {
       break;
     }
   }
 
-  CHECK( candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds" );
-  CHECK( chromaCandModes[ candId ] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path" );
+  CHECK(candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds");
+  CHECK(chromaCandModes[candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path");
   {
-    m_BinEncoder.encodeBinsEP( candId, 2 );
+    m_BinEncoder.encodeBinsEP(candId, 2);
   }
 }
 
-
 void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, CUCtx& cuCtx )
 {
   if (!CU::isIntra(cu))
@@ -1156,21 +1281,33 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C
 
     if( !cu.rootCbf )
     {
+      CHECK(cu.colorTransform, "ACT should not be enabled for root_cbf = 0");
       return;
     }
   }
 
+  if (CU::isInter(cu) || CU::isIBC(cu))
+  {
+    adaptive_color_transform(cu);
+  }
+
+  cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA]   = false;
+  cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
+  cuCtx.lfnstLastScanPos                              = false;
+  cuCtx.violatesMtsCoeffConstraint                    = false;
 
-  ChromaCbfs chromaCbfs;
   if( cu.ispMode && isLuma( partitioner.chType ) )
   {
     TUIntraSubPartitioner subTuPartitioner( partitioner );
-    transform_tree( *cu.cs, subTuPartitioner, cuCtx, chromaCbfs, CU::getISPType( cu, getFirstComponentOfChannel( partitioner.chType ) ), 0 );
+    transform_tree( *cu.cs, subTuPartitioner, cuCtx,             CU::getISPType( cu, getFirstComponentOfChannel( partitioner.chType)  ), 0 );
   }
   else
   {
-    transform_tree( *cu.cs, partitioner, cuCtx, chromaCbfs );
+    transform_tree( *cu.cs, partitioner, cuCtx );
   }
+
+  residual_lfnst_mode( cu, cuCtx );
+  mts_idx            ( cu, &cuCtx );
 }
 
 void CABACWriter::rqt_root_cbf( const CodingUnit& cu )
@@ -1180,6 +1317,25 @@ void CABACWriter::rqt_root_cbf( const CodingUnit& cu )
   DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y );
 }
 
+void CABACWriter::adaptive_color_transform(const CodingUnit& cu)
+{
+  if (!cu.slice->getSPS()->getUseColorTrans())
+  {
+    return;
+  }
+
+  if (cu.isSepTree())
+  {
+    CHECK(cu.colorTransform, "adaptive color transform should be disabled when dualtree and localtree are enabled");
+    return;
+  }
+
+  if (CU::isInter(cu) || CU::isIBC(cu) || CU::isIntra(cu))
+  {
+    m_BinEncoder.encodeBin(cu.colorTransform, Ctx::ACTFlag());
+  }
+}
+
 void CABACWriter::sbt_mode( const CodingUnit& cu )
 {
   uint8_t sbtAllowed = cu.checkAllowedSbt();
@@ -1239,112 +1395,368 @@ void CABACWriter::sbt_mode( const CodingUnit& cu )
 
 void CABACWriter::end_of_ctu( const CodingUnit& cu, CUCtx& cuCtx )
 {
-  const Slice*  slice             = cu.cs->slice;
-#if HEVC_TILES_WPP
-  const TileMap& tileMap          = *cu.cs->picture->tileMap;
-  const int     currentCTUTsAddr  = tileMap.getCtuRsToTsAddrMap( CU::getCtuAddr( cu ) );
-#else
-  const int     currentCTUTsAddr  = CU::getCtuAddr( cu );
-#endif
   const bool    isLastSubCUOfCtu  = CU::isLastSubCUOfCtu( cu );
 
   if ( isLastSubCUOfCtu
-    && ( !CS::isDualITree( *cu.cs ) || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) )
+    && ( !cu.isSepTree() || cu.chromaFormat == CHROMA_400 || isChroma( cu.chType ) )
       )
   {
     cuCtx.isDQPCoded = ( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded );
 
-    // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
-    // i.e. when the slice segment CurEnd CTU address is the current CTU address+1.
-#if HEVC_DEPENDENT_SLICES
-    if( slice->getSliceSegmentCurEndCtuTsAddr() != currentCTUTsAddr + 1 )
-#else
-    if(slice->getSliceCurEndCtuTsAddr() != currentCTUTsAddr + 1)
-#endif
-    {
-      m_BinEncoder.encodeBinTrm( 0 );
-    }
   }
 }
 
-
-
-
-
-//================================================================================
-//  clause 7.3.8.6
-//--------------------------------------------------------------------------------
-//    void  prediction_unit ( pu );
-//    void  merge_flag      ( pu );
-//    void  merge_idx       ( pu );
-//    void  inter_pred_idc  ( pu );
-//    void  ref_idx         ( pu, refList );
-//    void  mvp_flag        ( pu, refList );
-//================================================================================
-
-void CABACWriter::prediction_unit( const PredictionUnit& pu )
+void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, CUCtx& cuCtx)
 {
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
-  CHECK( pu.cacheUsed, "Processing a PU that should be in cache!" );
-  CHECK( pu.cu->cacheUsed, "Processing a CU that should be in cache!" );
+  const SPS&       sps = *(cu.cs->sps);
+  TransformUnit&   tu = *cu.firstTU;
+  uint32_t indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
 
-#endif
-  if( pu.cu->skip )
+  if (cu.lastPLTSize[compBegin])
   {
-    CHECK( !pu.mergeFlag, "merge_flag must be true for skipped CUs" );
+    xEncodePLTPredIndicator(cu, MAXPLTSIZE, compBegin);
   }
-  else
+
+  uint32_t reusedPLTnum = 0;
+  for (int idx = 0; idx < cu.lastPLTSize[compBegin]; idx++)
   {
-    merge_flag( pu );
+    if (cu.reuseflag[compBegin][idx])
+      reusedPLTnum++;
   }
-  if( pu.mergeFlag )
+
+  if (reusedPLTnum < MAXPLTSIZE)
   {
-    if (CU::isIBC(*pu.cu))
+    exp_golomb_eqprob(cu.curPLTSize[compBegin] - reusedPLTnum, 0);
+  }
+
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    for (int idx = cu.reusePLTSize[compBegin]; idx < cu.curPLTSize[compBegin]; idx++)
     {
-      merge_idx(pu);
-      return;
+      ComponentID compID = (ComponentID)comp;
+      const int  channelBitDepth = sps.getBitDepth(toChannelType(compID));
+      m_BinEncoder.encodeBinsEP(cu.curPLT[comp][idx], channelBitDepth);
     }
-    subblock_merge_flag( *pu.cu );
-    MHIntra_flag( pu );
-    if ( pu.mhIntraFlag )
+  }
+  uint32_t signalEscape = (cu.useEscape[compBegin]) ? 1 : 0;
+  if (cu.curPLTSize[compBegin] > 0)
+  {
+    m_BinEncoder.encodeBinEP(signalEscape);
+  }
+  //encode index map
+  uint32_t   height = cu.block(compBegin).height;
+  uint32_t   width = cu.block(compBegin).width;
+
+  m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
+  uint32_t total = height * width;
+  if (indexMaxSize > 1)
+    codeScanRotationModeFlag(cu, compBegin);
+  else
+    assert(!cu.useRotation[compBegin]);
+
+  if (cu.useEscape[compBegin] && cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded)
+  {
+    if (!cu.isSepTree() || isLuma(tu.chType))
     {
-      MHIntra_luma_pred_modes( *pu.cu );
+      cu_qp_delta(cu, cuCtx.qp, cu.qp);
+      cuCtx.qp = cu.qp;
+      cuCtx.isDQPCoded = true;
     }
-    triangle_mode( *pu.cu );
-    if (pu.mmvdMergeFlag)
+  }
+  if (cu.useEscape[compBegin] && cu.cs->slice->getUseChromaQpAdj() && !cuCtx.isChromaQpAdjCoded)
+  {
+    if (!CS::isDualITree(*tu.cs) || isChroma(tu.chType))
     {
-      mmvd_merge_idx(pu);
+      cu_chroma_qp_offset(cu);
+      cuCtx.isChromaQpAdjCoded = true;
     }
-    else
-    merge_idx    ( pu );
   }
-  else if (CU::isIBC(*pu.cu))
+
+  uint32_t prevRunPos = 0;
+  unsigned prevRunType = 0;
+  for (int subSetId = 0; subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE; subSetId++)
   {
-    ref_idx(pu, REF_PIC_LIST_0);
-    mvd_coding(pu.mvd[REF_PIC_LIST_0], pu.cu->imv);
-    mvp_flag(pu, REF_PIC_LIST_0);
+    cuPaletteSubblockInfo(cu, compBegin, numComp, subSetId, prevRunPos, prevRunType);
   }
-  else
+}
+void CABACWriter::cuPaletteSubblockInfo(const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType)
+{
+  const SPS&      sps = *(cu.cs->sps);
+  TransformUnit&  tu  = *cu.firstTU;
+  PLTtypeBuf      runType = tu.getrunType(compBegin);
+  PelBuf          curPLTIdx = tu.getcurPLTIdx(compBegin);
+  uint32_t        indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
+  uint32_t        totalPel = cu.block(compBegin).height*cu.block(compBegin).width;
+
+  int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
+  int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
+  maxSubPos = (maxSubPos > totalPel) ? totalPel : maxSubPos; // if last position is out of the current CU size
+
+  unsigned runCopyFlag[(1 << LOG2_PALETTE_CG_SIZE)];
+  for (int i = 0; i < (1 << LOG2_PALETTE_CG_SIZE); i++)
+    runCopyFlag[i] = MAX_INT;
+
+  if (minSubPos == 0)
+    runCopyFlag[0] = 0;
+
+// PLT runCopy flag and runType - context coded
+  int curPos = minSubPos;
+  for (; curPos < maxSubPos && indexMaxSize > 1; curPos++)
   {
-    int8_t affineMvdShift = pu.cu->imv ? ( pu.cu->imv == 1 ? -1 : 1 ) : 0;
-    inter_pred_idc( pu );
-    affine_flag   ( *pu.cu );
-    smvd_mode( pu );
-    if( pu.interDir != 2 /* PRED_L1 */ )
+    uint32_t posy = m_scanOrder[curPos].y;
+    uint32_t posx = m_scanOrder[curPos].x;
+    uint32_t posyprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].y;
+    uint32_t posxprev = (curPos == 0) ? 0 : m_scanOrder[curPos - 1].x;
+    // encode runCopyFlag
+    bool identityFlag = !((runType.at(posx, posy) != runType.at(posxprev, posyprev))
+      || ((runType.at(posx, posy) == PLT_RUN_INDEX) && (curPLTIdx.at(posx, posy) != curPLTIdx.at(posxprev, posyprev))));
+
+    const CtxSet&   ctxSet = (prevRunType == PLT_RUN_INDEX)? Ctx::IdxRunModel: Ctx::CopyRunModel;
+    if ( curPos > 0 ) 
     {
-      ref_idx     ( pu, REF_PIC_LIST_0 );
-      if ( pu.cu->affine )
+      int dist = curPos - prevRunPos - 1;
+      const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(prevRunType, dist);
+      runCopyFlag[curPos - minSubPos] = identityFlag;
+      m_BinEncoder.encodeBin( identityFlag, ctxSet( ctxId ) );
+      DTRACE(g_trace_ctx, D_SYNTAX, "plt_copy_flag() bin=%d ctx=%d\n", identityFlag, ctxId);
+    }
+    // encode run_type
+    if ( !identityFlag || curPos == 0 )
+    {
+      prevRunPos  = curPos;
+      prevRunType = runType.at(posx, posy);
+      if (((posy == 0) && !cu.useRotation[compBegin]) || ((posx == 0) && cu.useRotation[compBegin]))
       {
-        mvd_coding( pu.mvdAffi[REF_PIC_LIST_0][0], affineMvdShift );
-        mvd_coding( pu.mvdAffi[REF_PIC_LIST_0][1], affineMvdShift );
-        if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
+        assert(runType.at(posx, posy) == PLT_RUN_INDEX);
+      }
+      else if (curPos != 0 && runType.at(posxprev, posyprev) == PLT_RUN_COPY)
+      {
+        assert(runType.at(posx, posy) == PLT_RUN_INDEX);
+      }
+      else
+      {
+        m_BinEncoder.encodeBin(runType.at(posx, posy), Ctx::RunTypeFlag());
+      }
+      DTRACE(g_trace_ctx, D_SYNTAX, "plt_type_flag() bin=%d sp=%d\n", runType.at(posx, posy), curPos);
+    }
+  }
+
+// PLT index values - bypass coded
+  if (indexMaxSize > 1)
+  {
+    curPos = minSubPos;
+    for (; curPos < maxSubPos; curPos++)
+    {
+      uint32_t posy = m_scanOrder[curPos].y;
+      uint32_t posx = m_scanOrder[curPos].x;
+      if ( runCopyFlag[curPos - minSubPos] == 0 && runType.at(posx, posy) == PLT_RUN_INDEX)
+      {
+        writePLTIndex(cu, curPos, curPLTIdx, runType, indexMaxSize, compBegin);
+        DTRACE(g_trace_ctx, D_SYNTAX, "plt_idx_idc() value=%d sp=%d\n", curPLTIdx.at(posx, posy), curPos);
+      }
+    }
+  }
+
+// Quantized escape colors - bypass coded
+  uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, sps.getChromaFormatIdc());
+  uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, sps.getChromaFormatIdc());
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    ComponentID compID = (ComponentID)comp;
+    for (curPos = minSubPos; curPos < maxSubPos; curPos++)
+    {
+      uint32_t posy = m_scanOrder[curPos].y;
+      uint32_t posx = m_scanOrder[curPos].x;
+      if (curPLTIdx.at(posx, posy) == cu.curPLTSize[compBegin])
+      {
+          PLTescapeBuf    escapeValue = tu.getescapeValue((ComponentID)comp);
+          if (compID == COMPONENT_Y || compBegin != COMPONENT_Y)
+          {
+            exp_golomb_eqprob((unsigned)escapeValue.at(posx, posy), 3);
+            DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos);
+          }
+          if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && posy % (1 << scaleY) == 0 && posx % (1 << scaleX) == 0)
+          {
+            uint32_t posxC = posx >> scaleX;
+            uint32_t posyC = posy >> scaleY;
+            exp_golomb_eqprob((unsigned)escapeValue.at(posxC, posyC), 3);
+            DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos);
+          }
+      }
+    }
+  }
+}
+void CABACWriter::codeScanRotationModeFlag(const CodingUnit& cu, ComponentID compBegin)
+{
+  m_BinEncoder.encodeBin((cu.useRotation[compBegin]), Ctx::RotationFlag());
+}
+void CABACWriter::xEncodePLTPredIndicator(const CodingUnit& cu, uint32_t maxPLTSize, ComponentID compBegin)
+{
+  int lastPredIdx = -1;
+  uint32_t run = 0;
+  uint32_t numPLTPredicted = 0;
+  for (uint32_t idx = 0; idx < cu.lastPLTSize[compBegin]; idx++)
+  {
+    if (cu.reuseflag[compBegin][idx])
+    {
+      numPLTPredicted++;
+      lastPredIdx = idx;
+    }
+  }
+
+  int idx = 0;
+  while (idx <= lastPredIdx)
+  {
+    if (cu.reuseflag[compBegin][idx])
+    {
+      exp_golomb_eqprob(run ? run + 1 : run, 0);
+      run = 0;
+    }
+    else
+    {
+      run++;
+    }
+    idx++;
+  }
+  if ((numPLTPredicted < maxPLTSize && lastPredIdx + 1 < cu.lastPLTSize[compBegin]) || !numPLTPredicted)
+  {
+    exp_golomb_eqprob(1, 0);
+  }
+}
+Pel CABACWriter::writePLTIndex(const CodingUnit& cu, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin)
+{
+  uint32_t posy = m_scanOrder[idx].y;
+  uint32_t posx = m_scanOrder[idx].x;
+  Pel curLevel = (paletteIdx.at(posx, posy) == cu.curPLTSize[compBegin]) ? (maxSymbol - 1) : paletteIdx.at(posx, posy);
+  if (idx) // R0348: remove index redundancy
+  {
+    uint32_t prevposy = m_scanOrder[idx - 1].y;
+    uint32_t prevposx = m_scanOrder[idx - 1].x;
+    if (paletteRunType.at(prevposx, prevposy) == PLT_RUN_INDEX)
+    {
+      Pel leftLevel = paletteIdx.at(prevposx, prevposy); // left index
+      if (leftLevel == cu.curPLTSize[compBegin]) // escape mode
+      {
+        leftLevel = maxSymbol - 1;
+      }
+      assert(leftLevel != curLevel);
+      if (curLevel > leftLevel)
+      {
+        curLevel--;
+      }
+    }
+    else
+    {
+      Pel aboveLevel;
+      if (cu.useRotation[compBegin])
+      {
+        assert(prevposx > 0);
+        aboveLevel = paletteIdx.at(posx - 1, posy);
+        if (paletteIdx.at(posx - 1, posy) == cu.curPLTSize[compBegin]) // escape mode
+        {
+          aboveLevel = maxSymbol - 1;
+        }
+      }
+      else
+      {
+        assert(prevposy > 0);
+        aboveLevel = paletteIdx.at(posx, posy - 1);
+        if (paletteIdx.at(posx, posy - 1) == cu.curPLTSize[compBegin]) // escape mode
+        {
+          aboveLevel = maxSymbol - 1;
+        }
+      }
+      assert(curLevel != aboveLevel);
+      if (curLevel > aboveLevel)
+      {
+        curLevel--;
+      }
+    }
+    maxSymbol--;
+  }
+  assert(maxSymbol > 0);
+  assert(curLevel >= 0);
+  assert(maxSymbol > curLevel);
+  if (maxSymbol > 1)
+  {
+    xWriteTruncBinCode(curLevel, maxSymbol);
+  }
+  return curLevel;
+}
+
+
+//================================================================================
+//  clause 7.3.8.6
+//--------------------------------------------------------------------------------
+//    void  prediction_unit ( pu );
+//    void  merge_flag      ( pu );
+//    void  merge_idx       ( pu );
+//    void  inter_pred_idc  ( pu );
+//    void  ref_idx         ( pu, refList );
+//    void  mvp_flag        ( pu, refList );
+//================================================================================
+
+void CABACWriter::prediction_unit( const PredictionUnit& pu )
+{
+  CHECK( pu.cu->treeType == TREE_C, "cannot be chroma CU" );
+#if ENABLE_SPLIT_PARALLELISM
+  CHECK( pu.cacheUsed, "Processing a PU that should be in cache!" );
+  CHECK( pu.cu->cacheUsed, "Processing a CU that should be in cache!" );
+
+#endif
+  if( pu.cu->skip )
+  {
+    CHECK( !pu.mergeFlag, "merge_flag must be true for skipped CUs" );
+  }
+  else
+  {
+    merge_flag( pu );
+  }
+  if( pu.mergeFlag )
+  {
+    merge_data(pu);
+  }
+  else if (CU::isIBC(*pu.cu))
+  {
+    ref_idx(pu, REF_PIC_LIST_0);
+    Mv mvd = pu.mvd[REF_PIC_LIST_0];
+    mvd.changeIbcPrecInternal2Amvr(pu.cu->imv);
+    mvd_coding(mvd, 0); // already changed to signaling precision
+    if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 )
+    {
+      CHECK( pu.mvpIdx[REF_PIC_LIST_0], "mvpIdx for IBC mode should be 0" );
+    }
+    else
+    mvp_flag(pu, REF_PIC_LIST_0);
+  }
+  else
+  {
+    inter_pred_idc( pu );
+    affine_flag   ( *pu.cu );
+    smvd_mode( pu );
+    if( pu.interDir != 2 /* PRED_L1 */ )
+    {
+      ref_idx     ( pu, REF_PIC_LIST_0 );
+      if ( pu.cu->affine )
+      {
+        Mv mvd = pu.mvdAffi[REF_PIC_LIST_0][0];
+        mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+        mvd_coding(mvd, 0); // already changed to signaling precision
+        mvd = pu.mvdAffi[REF_PIC_LIST_0][1];
+        mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+        mvd_coding(mvd, 0); // already changed to signaling precision
+        if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
         {
-          mvd_coding( pu.mvdAffi[REF_PIC_LIST_0][2], affineMvdShift );
+          mvd = pu.mvdAffi[REF_PIC_LIST_0][2];
+          mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+          mvd_coding(mvd, 0); // already changed to signaling precision
         }
       }
       else
       {
-        mvd_coding( pu.mvd[REF_PIC_LIST_0], pu.cu->imv );
+        Mv mvd = pu.mvd[REF_PIC_LIST_0];
+        mvd.changeTransPrecInternal2Amvr(pu.cu->imv);
+        mvd_coding(mvd, 0); // already changed to signaling precision
       }
       mvp_flag    ( pu, REF_PIC_LIST_0 );
     }
@@ -1353,20 +1765,28 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu )
       if ( pu.cu->smvdMode != 1 )
       {
       ref_idx     ( pu, REF_PIC_LIST_1 );
-      if( !pu.cs->slice->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
+      if( !pu.cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
       {
         if ( pu.cu->affine )
         {
-          mvd_coding( pu.mvdAffi[REF_PIC_LIST_1][0], affineMvdShift );
-          mvd_coding( pu.mvdAffi[REF_PIC_LIST_1][1], affineMvdShift );
+          Mv mvd = pu.mvdAffi[REF_PIC_LIST_1][0];
+          mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+          mvd_coding(mvd, 0); // already changed to signaling precision
+          mvd = pu.mvdAffi[REF_PIC_LIST_1][1];
+          mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+          mvd_coding(mvd, 0); // already changed to signaling precision
           if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
           {
-            mvd_coding( pu.mvdAffi[REF_PIC_LIST_1][2], affineMvdShift );
+            mvd = pu.mvdAffi[REF_PIC_LIST_1][2];
+            mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+            mvd_coding(mvd, 0); // already changed to signaling precision
           }
         }
         else
         {
-          mvd_coding( pu.mvd[REF_PIC_LIST_1], pu.cu->imv );
+          Mv mvd = pu.mvd[REF_PIC_LIST_1];
+          mvd.changeTransPrecInternal2Amvr(pu.cu->imv);
+          mvd_coding(mvd, 0); // already changed to signaling precision
         }
       }
       }
@@ -1394,15 +1814,11 @@ void CABACWriter::smvd_mode( const PredictionUnit& pu )
 
 void CABACWriter::subblock_merge_flag( const CodingUnit& cu )
 {
-  if ( cu.firstPU->mergeFlag && (cu.firstPU->mmvdMergeFlag || cu.mmvdSkip) )
-  {
-    return;
-  }
 
-  if ( !cu.cs->slice->isIntra() && (cu.cs->sps->getUseAffine() || cu.cs->sps->getSBTMVPEnabledFlag()) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 )
+  if ( !cu.cs->slice->isIntra() && (cu.slice->getPicHeader()->getMaxNumAffineMergeCand() > 0) && cu.lumaSize().width >= 8 && cu.lumaSize().height >= 8 )
   {
     unsigned ctxId = DeriveCtx::CtxAffineFlag( cu );
-    m_BinEncoder.encodeBin( cu.affine, Ctx::AffineFlag( ctxId ) );
+    m_BinEncoder.encodeBin( cu.affine, Ctx::SubblockMergeFlag( ctxId ) );
     DTRACE( g_trace_ctx, D_SYNTAX, "subblock_merge_flag() subblock_merge_flag=%d ctx=%d pos=(%d,%d)\n", cu.affine ? 1 : 0, ctxId, cu.Y().x, cu.Y().y );
   }
 }
@@ -1430,17 +1846,52 @@ void CABACWriter::merge_flag( const PredictionUnit& pu )
 
   DTRACE( g_trace_ctx, D_SYNTAX, "merge_flag() merge=%d pos=(%d,%d) size=%dx%d\n", pu.mergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height );
 
-  if (pu.mergeFlag && CU::isIBC(*pu.cu))
+}
+
+void CABACWriter::merge_data(const PredictionUnit& pu)
+{
+  if (CU::isIBC(*pu.cu))
+  {
+    merge_idx(pu);
+    return;
+  }
+  subblock_merge_flag(*pu.cu);
+  if (pu.cu->affine)
   {
+    merge_idx(pu);
     return;
   }
-#if !JVET_MMVD_OFF_MACRO
-  if (pu.mergeFlag)
+  const bool triangleAvailable = pu.cu->cs->slice->getSPS()->getUseTriangle() && pu.cu->cs->slice->isInterB() && pu.cu->cs->picHeader->getMaxNumTriangleCand() > 1;
+  const bool ciipAvailable = pu.cs->sps->getUseCiip() && !pu.cu->skip && pu.cu->lwidth() < MAX_CU_SIZE && pu.cu->lheight() < MAX_CU_SIZE;
+  if (pu.cu->lwidth() * pu.cu->lheight() >= 64
+    && (triangleAvailable || ciipAvailable))
   {
-    m_BinEncoder.encodeBin(pu.mmvdMergeFlag, Ctx::MmvdFlag(0));
-    DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
+    m_BinEncoder.encodeBin(pu.regularMergeFlag, Ctx::RegularMergeFlag(pu.cu->skip ? 0 : 1));
+  }
+  if (pu.regularMergeFlag)
+  {
+    if (pu.cs->sps->getUseMMVD())
+    {
+      m_BinEncoder.encodeBin(pu.mmvdMergeFlag, Ctx::MmvdFlag(0));
+      DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
+    }
+    if (pu.mmvdMergeFlag || pu.cu->mmvdSkip)
+    {
+      mmvd_merge_idx(pu);
+    }
+    else
+    {
+      merge_idx(pu);
+    }
+  }
+  else
+  {
+    if (triangleAvailable && ciipAvailable)
+    {
+      Ciip_flag(pu);
+    }
+    merge_idx(pu);
   }
-#endif
 }
 
 void CABACWriter::imv_mode( const CodingUnit& cu )
@@ -1462,15 +1913,22 @@ void CABACWriter::imv_mode( const CodingUnit& cu )
     return;
   }
 
-  unsigned ctxId = DeriveCtx::CtxIMVFlag( cu );
   if (CU::isIBC(cu) == false)
-    m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) );
-  DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), ctxId );
+    m_BinEncoder.encodeBin( (cu.imv > 0), Ctx::ImvFlag( 0 ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), 0 );
 
   if( sps->getAMVREnabledFlag() && cu.imv > 0 )
   {
-    m_BinEncoder.encodeBin( ( cu.imv > 1 ), Ctx::ImvFlag( 3 ) );
-    DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", ( cu.imv > 1 ), 3 );
+    if (!CU::isIBC(cu))
+    {
+      m_BinEncoder.encodeBin(cu.imv < IMV_HPEL, Ctx::ImvFlag(4));
+      DTRACE(g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", cu.imv < 3, 4);
+    }
+    if (cu.imv < IMV_HPEL)
+    {
+    m_BinEncoder.encodeBin( (cu.imv > 1), Ctx::ImvFlag( 1 ) );
+    DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 1), 1 );
+    }
   }
 
   DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() IMVFlag=%d\n", cu.imv );
@@ -1490,13 +1948,13 @@ void CABACWriter::affine_amvr_mode( const CodingUnit& cu )
     return;
   }
 
-  m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( 4 ) );
-  DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", ( cu.imv > 0 ), 4 );
+  m_BinEncoder.encodeBin( (cu.imv > 0), Ctx::ImvFlag( 2 ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", (cu.imv > 0), 2 );
 
   if( cu.imv > 0 )
   {
-    m_BinEncoder.encodeBin( ( cu.imv > 1 ), Ctx::ImvFlag( 5 ) );
-    DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", ( cu.imv > 1 ), 5 );
+    m_BinEncoder.encodeBin( (cu.imv > 1), Ctx::ImvFlag( 3 ) );
+    DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() value=%d ctx=%d\n", (cu.imv > 1), 3 );
   }
   DTRACE( g_trace_ctx, D_SYNTAX, "affine_amvr_mode() IMVFlag=%d\n", cu.imv );
 }
@@ -1506,7 +1964,7 @@ void CABACWriter::merge_idx( const PredictionUnit& pu )
 
   if ( pu.cu->affine )
   {
-    int numCandminus1 = int( pu.cs->slice->getMaxNumAffineMergeCand() ) - 1;
+    int numCandminus1 = int( pu.cs->picHeader->getMaxNumAffineMergeCand() ) - 1;
     if ( numCandminus1 > 0 )
     {
       if ( pu.mergeIdx == 0 )
@@ -1543,6 +2001,11 @@ void CABACWriter::merge_idx( const PredictionUnit& pu )
       candIdx1 -= candIdx1 < candIdx0 ? 0 : 1;
       auto encodeOneIdx = [this](uint8_t mrgIdx, int numCandminus1)
       {
+        if (numCandminus1 == 0)
+        {
+          CHECK(mrgIdx, "Incorrect index!");
+          return;
+        }
         if(mrgIdx == 0)
         {
           this->m_BinEncoder.encodeBin( 0, Ctx::MergeIdx() );
@@ -1562,11 +2025,19 @@ void CABACWriter::merge_idx( const PredictionUnit& pu )
         }
       };
       m_BinEncoder.encodeBinEP(splitDir);
-      encodeOneIdx(candIdx0, TRIANGLE_MAX_NUM_UNI_CANDS - 1);
-      encodeOneIdx(candIdx1, TRIANGLE_MAX_NUM_UNI_CANDS - 2);
+      const int maxNumTriangleCand = pu.cs->picHeader->getMaxNumTriangleCand();
+      CHECK(maxNumTriangleCand < 2, "Incorrect max number of triangle candidates");
+      CHECK(candIdx0 >= maxNumTriangleCand, "Incorrect candIdx0");
+      CHECK(candIdx1 >= maxNumTriangleCand, "Incorrect candIdx1");
+      encodeOneIdx(candIdx0, maxNumTriangleCand - 1);
+      encodeOneIdx(candIdx1, maxNumTriangleCand - 2);
       return;
     }
-  int numCandminus1 = int( pu.cs->slice->getMaxNumMergeCand() ) - 1;
+    int numCandminus1;
+    if (pu.cu->predMode == MODE_IBC)
+      numCandminus1 = int(pu.cs->picHeader->getMaxNumIBCMergeCand()) - 1;
+    else
+      numCandminus1 = int(pu.cs->picHeader->getMaxNumMergeCand()) - 1;
   if( numCandminus1 > 0 )
   {
     if( pu.mergeIdx == 0 )
@@ -1599,25 +2070,11 @@ void CABACWriter::mmvd_merge_idx(const PredictionUnit& pu)
   var1 = (mvpIdx - (var0 * MMVD_MAX_REFINE_NUM)) / 4;
   var2 = mvpIdx - (var0 * MMVD_MAX_REFINE_NUM) - var1 * 4;
 
-  int numCandminus1_base = MMVD_BASE_MV_NUM - 1;
-  if (numCandminus1_base > 0)
+  if (pu.cs->picHeader->getMaxNumMergeCand() > 1)
   {
-    if (var0 == 0)
-    {
-      m_BinEncoder.encodeBin(0, Ctx::MmvdMergeIdx());
-    }
-    else
-    {
-      m_BinEncoder.encodeBin(1, Ctx::MmvdMergeIdx());
-      for (unsigned idx = 1; idx < numCandminus1_base; idx++)
-      {
-        m_BinEncoder.encodeBinEP(var0 == idx ? 0 : 1);
-        if (var0 == idx)
-        {
-          break;
-        }
-      }
-    }
+    static_assert(MMVD_BASE_MV_NUM == 2, "");
+    assert(var0 < 2);
+    m_BinEncoder.encodeBin(var0, Ctx::MmvdMergeIdx());
   }
   DTRACE(g_trace_ctx, D_SYNTAX, "base_mvp_idx() base_mvp_idx=%d\n", var0);
 
@@ -1668,8 +2125,8 @@ void CABACWriter::inter_pred_idc( const PredictionUnit& pu )
       m_BinEncoder.encodeBin( 0, Ctx::InterDir(ctxId) );
     }
   }
-  m_BinEncoder.encodeBin( ( pu.interDir == 2 ), Ctx::InterDir( 4 ) );
-  DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=4 value=%d pos=(%d,%d)\n", pu.interDir, pu.lumaPos().x, pu.lumaPos().y );
+  m_BinEncoder.encodeBin( ( pu.interDir == 2 ), Ctx::InterDir( 5 ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "inter_pred_idc() ctx=5 value=%d pos=(%d,%d)\n", pu.interDir, pu.lumaPos().x, pu.lumaPos().y );
 }
 
 
@@ -1728,152 +2185,23 @@ void CABACWriter::mvp_flag( const PredictionUnit& pu, RefPicList eRefList )
   DTRACE( g_trace_ctx, D_SYNTAX, "mvpIdx(refList:%d)=%d\n", eRefList, pu.mvpIdx[eRefList] );
 }
 
-void CABACWriter::MHIntra_flag(const PredictionUnit& pu)
+void CABACWriter::Ciip_flag(const PredictionUnit& pu)
 {
-  if (!pu.cs->sps->getUseMHIntra())
+  if (!pu.cs->sps->getUseCiip())
   {
-    CHECK(pu.mhIntraFlag == true, "invalid MHIntra SPS");
+    CHECK(pu.ciipFlag == true, "invalid Ciip SPS");
     return;
   }
   if (pu.cu->skip)
   {
-    CHECK(pu.mhIntraFlag == true, "invalid MHIntra and skip");
-    return;
-  }
-  if (pu.mmvdMergeFlag)
-  {
-    CHECK(pu.mhIntraFlag == true, "invalid MHIntra and mmvd");
-    return;
-  }
-  if (pu.cu->affine)
-  {
-    CHECK(pu.mhIntraFlag == true, "invalid MHIntra and affine");
-    return;
-  }
-  if (pu.cu->lwidth() * pu.cu->lheight() < 64 || pu.cu->lwidth() >= MAX_CU_SIZE || pu.cu->lheight() >= MAX_CU_SIZE)
-  {
-    CHECK(pu.mhIntraFlag == true, "invalid MHIntra and blk");
-    return;
-  }
-  m_BinEncoder.encodeBin(pu.mhIntraFlag, Ctx::MHIntraFlag());
-  DTRACE(g_trace_ctx, D_SYNTAX, "MHIntra_flag() MHIntra=%d pos=(%d,%d) size=%dx%d\n", pu.mhIntraFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
-}
-
-void CABACWriter::MHIntra_luma_pred_modes(const CodingUnit& cu)
-{
-  if (!cu.Y().valid())
-  {
-    return;
-  }
-
-  const int numMPMs = 3;
-  int      numBlocks = CU::getNumPUs(cu);
-  unsigned mpm_idxs[4];
-  unsigned pred_modes[4];
-
-  const PredictionUnit* pu = cu.firstPU;
-
-  unsigned mpm_pred[numMPMs];
-  for (int k = 0; k < numBlocks; k++)
-  {
-    unsigned&  mpm_idx = mpm_idxs[k];
-    unsigned&  pred_mode = pred_modes[k];
-
-    PU::getMHIntraMPMs(*pu, mpm_pred);
-
-    pred_mode = pu->intraDir[0];
-
-    mpm_idx = numMPMs;
-
-    for (int idx = 0; idx < numMPMs; idx++)
-    {
-      if (pred_mode == mpm_pred[idx])
-      {
-        mpm_idx = idx;
-        break;
-      }
-    }
-    if (PU::getNarrowShape(pu->lwidth(), pu->lheight()) == 0)
-    {
-      m_BinEncoder.encodeBin(mpm_idx < numMPMs, Ctx::MHIntraPredMode());
-    }
-    pu = pu->next;
-  }
-
-  pu = cu.firstPU;
-
-  // mpm_idx / rem_intra_luma_pred_mode
-  for (int k = 0; k < numBlocks; k++)
-  {
-    const unsigned& mpm_idx = mpm_idxs[k];
-    if (mpm_idx < numMPMs)
-    {
-      m_BinEncoder.encodeBinEP(mpm_idx > 0);
-      if (mpm_idx)
-      {
-        m_BinEncoder.encodeBinEP(mpm_idx > 1);
-      }
-    }
-    DTRACE(g_trace_ctx, D_SYNTAX, "intra_luma_pred_modes() idx=%d pos=(%d,%d) mode=%d\n", k, pu->lumaPos().x, pu->lumaPos().y, pu->intraDir[0]);
-    pu = pu->next;
-  }
-}
-
-void CABACWriter::triangle_mode( const CodingUnit& cu )
-{
-  if( !cu.cs->slice->getSPS()->getUseTriangle() || !cu.cs->slice->isInterB() || cu.lwidth() * cu.lheight() < TRIANGLE_MIN_SIZE || cu.affine )
-  {
-    return;
-  }
-
-  if ( cu.firstPU->mmvdMergeFlag || cu.mmvdSkip )
-  {
-    return;
-  }
-
-  if ( cu.firstPU->mhIntraFlag )
-  {
+    CHECK(pu.ciipFlag == true, "invalid Ciip and skip");
     return;
   }
-
-  unsigned flag_idx     = DeriveCtx::CtxTriangleFlag( cu );
-
-  m_BinEncoder.encodeBin( cu.triangle, Ctx::TriangleFlag(flag_idx) );
-
-  DTRACE( g_trace_ctx, D_SYNTAX, "triangle_mode() triangle_mode=%d pos=(%d,%d) size: %dx%d\n", cu.triangle, cu.Y().x, cu.Y().y, cu.lumaSize().width, cu.lumaSize().height );
+  m_BinEncoder.encodeBin(pu.ciipFlag, Ctx::CiipFlag());
+  DTRACE(g_trace_ctx, D_SYNTAX, "Ciip_flag() Ciip=%d pos=(%d,%d) size=%dx%d\n", pu.ciipFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
 }
 
-//================================================================================
-//  clause 7.3.8.7
-//--------------------------------------------------------------------------------
-//    void  pcm_samples( tu )
-//================================================================================
 
-void CABACWriter::pcm_samples( const TransformUnit& tu )
-{
-  CHECK( !tu.cu->ipcm, "pcm mode expected" );
-
-  const SPS&        sps       = *tu.cu->cs->sps;
-
-  const CodingStructure *cs = tu.cs;
-  const ChannelType chType = tu.chType;
-
-  ComponentID compStr = (CS::isDualITree(*cs) && !isLuma(chType)) ? COMPONENT_Cb: COMPONENT_Y;
-  ComponentID compEnd = (CS::isDualITree(*cs) && isLuma(chType)) ? COMPONENT_Y : COMPONENT_Cr;
-  for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) )
-  {
-    const CPelBuf   samples     = tu.getPcmbuf( compID );
-    const unsigned  sampleBits  = sps.getPCMBitDepth( toChannelType(compID) );
-    for( unsigned y = 0; y < samples.height; y++ )
-    {
-      for( unsigned x = 0; x < samples.width; x++ )
-      {
-        m_BinEncoder.encodeBinsPCM( samples.at(x, y), sampleBits );
-      }
-    }
-  }
-  m_BinEncoder.restart();
-}
 
 
 
@@ -1884,19 +2212,14 @@ void CABACWriter::pcm_samples( const TransformUnit& tu )
 //    bool  split_transform_flag( split, depth )
 //    bool  cbf_comp            ( cbf, area, depth )
 //================================================================================
-
-void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType, const int subTuIdx )
+void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx,                         const PartSplit ispType, const int subTuIdx )
 {
-  ChromaCbfs chromaCbfsLastDepth;
-  chromaCbfsLastDepth.Cb              = chromaCbfs.Cb;
-  chromaCbfsLastDepth.Cr              = chromaCbfs.Cr;
-  const UnitArea&       area          = partitioner.currArea();
-        int             subTuCounter  = subTuIdx;
-  const TransformUnit&  tu            = *cs.getTU( area.blocks[partitioner.chType].pos(), partitioner.chType, subTuIdx );
-  const CodingUnit&     cu            = *tu.cu;
-  const unsigned        trDepth       = partitioner.currTrDepth;
-  const bool            split         = ( tu.depth > trDepth );
-  const bool            chromaCbfISP  = area.blocks[COMPONENT_Cb].valid() && cu.ispMode && !split;
+  const UnitArea&       area = partitioner.currArea();
+  int             subTuCounter = subTuIdx;
+  const TransformUnit&  tu = *cs.getTU(area.blocks[partitioner.chType].pos(), partitioner.chType, subTuIdx);
+  const CodingUnit&     cu = *tu.cu;
+  const unsigned        trDepth = partitioner.currTrDepth;
+  const bool            split = (tu.depth > trDepth);
 
   // split_transform_flag
   if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
@@ -1911,46 +2234,8 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
   CHECK( split && !cu.ispMode, "transform split not allowed with QTBT" );
 
 
-  // cbf_cb & cbf_cr
-  if( area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && ( !CS::isDualITree( cs ) || partitioner.chType == CHANNEL_TYPE_CHROMA ) && ( !cu.ispMode || chromaCbfISP ) )
-  {
-    {
-      unsigned cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth;
-      if( trDepth == 0 || chromaCbfs.Cb || chromaCbfISP )
-      {
-        chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth );
-        if( !( cu.sbtInfo && trDepth == 1 ) )
-        cbf_comp( cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth );
-      }
-      else
-      {
-        CHECK( TU::getCbfAtDepth( tu, COMPONENT_Cb, cbfDepth ) != chromaCbfs.Cb, "incorrect Cb cbf" );
-      }
-
-      if( trDepth == 0 || chromaCbfs.Cr || chromaCbfISP )
-      {
-        chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth );
-        if( !( cu.sbtInfo && trDepth == 1 ) )
-        cbf_comp( cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb );
-      }
-      else
-      {
-        CHECK( TU::getCbfAtDepth( tu, COMPONENT_Cr, cbfDepth ) != chromaCbfs.Cr, "incorrect Cr cbf" );
-      }
-    }
-  }
-  else if( CS::isDualITree( cs ) )
-  {
-    chromaCbfs = ChromaCbfs( false );
-  }
-
   if( split )
   {
-    if( area.chromaFormat != CHROMA_400 )
-    {
-      chromaCbfs.Cb        = TU::getCbfAtDepth( tu, COMPONENT_Cb,  trDepth );
-      chromaCbfs.Cr        = TU::getCbfAtDepth( tu, COMPONENT_Cr,  trDepth );
-    }
 
     if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
     {
@@ -1974,8 +2259,7 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
 
     do
     {
-      ChromaCbfs subChromaCbfs = chromaCbfs;
-      transform_tree( cs, partitioner, cuCtx, subChromaCbfs, ispType, subTuCounter );
+      transform_tree( cs, partitioner, cuCtx,                ispType, subTuCounter );
       subTuCounter += subTuCounter != -1 ? 1 : 0;
     } while( partitioner.nextPart( cs ) );
 
@@ -1985,65 +2269,30 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
   {
     DTRACE( g_trace_ctx, D_SYNTAX, "transform_unit() pos=(%d,%d) size=%dx%d depth=%d trDepth=%d\n", tu.blocks[tu.chType].x, tu.blocks[tu.chType].y, tu.blocks[tu.chType].width, tu.blocks[tu.chType].height, cu.depth, partitioner.currTrDepth );
 
-    if( !isChroma( partitioner.chType ) )
-    {
-      if( !CU::isIntra( cu ) && trDepth == 0 && !chromaCbfs.sigChroma( area.chromaFormat ) )
-      {
-        CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter units with no chroma coeffs" );
-      }
-      else if( cu.sbtInfo && tu.noResidual )
-      {
-        CHECK( TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be false for inter sbt no-residual tu" );
-      }
-      else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) )
-      {
-        assert( !tu.noResidual );
-        CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter sbt residual tu" );
-      }
-      else
-      {
-        bool previousCbf       = false;
-        bool rootCbfSoFar      = false;
-        bool lastCbfIsInferred = false;
-        if( cu.ispMode )
-        {
-          uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> g_aucLog2[tu.lheight()] : cu.lwidth() >> g_aucLog2[tu.lwidth()];
-          if( subTuCounter == nTus - 1 )
-          {
-            TransformUnit* tuPointer = cu.firstTU;
-            for( int tuIdx = 0; tuIdx < subTuCounter; tuIdx++ )
-            {
-              rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, trDepth );
-              tuPointer = tuPointer->next;
-            }
-            if( !rootCbfSoFar )
-            {
-              lastCbfIsInferred = true;
-            }
-          }
-          if( !lastCbfIsInferred )
-          {
-            previousCbf = TU::getPrevTuCbfAtDepth( tu, COMPONENT_Y, partitioner.currTrDepth );
-          }
-        }
-        if( !lastCbfIsInferred )
-        {
-          cbf_comp( cs, TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), tu.Y(), trDepth, previousCbf, cu.ispMode );
-        }
-      }
-    }
-
-
-    transform_unit( tu, cuCtx, chromaCbfs );
+    transform_unit( tu, cuCtx, partitioner, subTuCounter);
   }
 }
 
-void CABACWriter::cbf_comp( const CodingStructure& cs, bool cbf, const CompArea& area, unsigned depth, const bool prevCbCbf, const bool useISP )
+void CABACWriter::cbf_comp( const CodingStructure& cs, bool cbf, const CompArea& area, unsigned depth, const bool prevCbf, const bool useISP )
 {
-  const unsigned  ctxId   = DeriveCtx::CtxQtCbf( area.compID, depth, prevCbCbf, useISP && isLuma(area.compID) );
+  unsigned  ctxId = DeriveCtx::CtxQtCbf(area.compID, prevCbf, useISP && isLuma(area.compID));
   const CtxSet&   ctxSet  = Ctx::QtCbf[ area.compID ];
 
+  if ((area.compID == COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmMode)
+   || (area.compID != COMPONENT_Y && cs.getCU(area.pos(), ChannelType(area.compID)) != NULL && cs.getCU(area.pos(), ChannelType(area.compID))->bdpcmModeChroma))
+  {
+    if (area.compID == COMPONENT_Y)
+      ctxId = 1;
+    else if (area.compID == COMPONENT_Cb)
+      ctxId = 1;
+    else
+      ctxId = 2;
+    m_BinEncoder.encodeBin(cbf, ctxSet(ctxId));
+  }
+  else
+  {
   m_BinEncoder.encodeBin( cbf, ctxSet( ctxId ) );
+  }
   DTRACE( g_trace_ctx, D_SYNTAX, "cbf_comp() etype=%d pos=(%d,%d) ctx=%d cbf=%d\n", area.compID, area.x, area.y, ctxId, cbf );
 }
 
@@ -2062,14 +2311,20 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv )
   int       verMvd = rMvd.getVer();
   if ( imv > 0 )
   {
-    CHECK( (horMvd % 4) != 0 && (verMvd % 4) != 0, "IMV: MVD is not a multiple of 4" );
-    horMvd >>= 2;
-    verMvd >>= 2;
-    if( imv == 2 )//IMV_4PEL
+    CHECK((horMvd % 2) != 0 && (verMvd % 2) != 0, "IMV: MVD is not a multiple of 2");
+    horMvd >>= 1;
+    verMvd >>= 1;
+    if (imv < IMV_HPEL)
     {
-      CHECK( (horMvd % 4) != 0 && (verMvd % 4) != 0, "IMV: MVD is not a multiple of 8" );
-      horMvd >>= 2;
-      verMvd >>= 2;
+      CHECK((horMvd % 2) != 0 && (verMvd % 2) != 0, "IMV: MVD is not a multiple of 4");
+      horMvd >>= 1;
+      verMvd >>= 1;
+      if (imv == IMV_4PEL)//IMV_4PEL
+      {
+        CHECK((horMvd % 4) != 0 && (verMvd % 4) != 0, "IMV: MVD is not a multiple of 16");
+        horMvd >>= 2;
+        verMvd >>= 2;
+      }
     }
   }
   unsigned  horAbs  = unsigned( horMvd < 0 ? -horMvd : horMvd );
@@ -2095,7 +2350,7 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv )
   {
     if( horAbs > 1 )
     {
-      exp_golomb_eqprob( horAbs - 2, 1 );
+      m_BinEncoder.encodeRemAbsEP(horAbs - 2, 1, 0, MV_BITS - 1);
     }
     m_BinEncoder.encodeBinEP( (horMvd < 0) );
   }
@@ -2103,7 +2358,7 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv )
   {
     if( verAbs > 1 )
     {
-      exp_golomb_eqprob( verAbs - 2, 1 );
+      m_BinEncoder.encodeRemAbsEP(verAbs - 2, 1, 0, MV_BITS - 1);
     }
     m_BinEncoder.encodeBinEP( (verMvd < 0) );
   }
@@ -2119,10 +2374,90 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv )
 //    void  cu_qp_delta         ( cu )
 //    void  cu_chroma_qp_offset ( cu )
 //================================================================================
-
-void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, ChromaCbfs& chromaCbfs )
+void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, Partitioner& partitioner, const int subTuCounter)
 {
-  CodingUnit& cu        = *tu.cu;
+  const CodingStructure&  cs = *tu.cs;
+  const CodingUnit&       cu = *tu.cu;
+  const UnitArea&         area = partitioner.currArea();
+  const unsigned          trDepth = partitioner.currTrDepth;
+  const bool              chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode;
+  ChromaCbfs              chromaCbfs;
+  CHECK(tu.depth != trDepth, " transform unit should be not be futher partitioned");
+
+  // cbf_cb & cbf_cr
+  if (area.chromaFormat != CHROMA_400 && area.blocks[COMPONENT_Cb].valid() && (!cu.isSepTree() || partitioner.chType == CHANNEL_TYPE_CHROMA) && (!cu.ispMode || chromaCbfISP))
+  {
+    {
+      unsigned cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth;
+      {
+        chromaCbfs.Cb = TU::getCbfAtDepth(tu, COMPONENT_Cb, trDepth);
+        //if (!(cu.sbtInfo && trDepth == 1))
+        if (!(cu.sbtInfo && tu.noResidual))
+          cbf_comp(cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth);
+      }
+
+      {
+        chromaCbfs.Cr = TU::getCbfAtDepth(tu, COMPONENT_Cr, trDepth);
+        //if (!(cu.sbtInfo && trDepth == 1))
+        if (!(cu.sbtInfo && tu.noResidual))
+          cbf_comp(cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb);
+      }
+    }
+  }
+  else if (cu.isSepTree())
+  {
+    chromaCbfs = ChromaCbfs(false);
+  }
+
+  if (!isChroma(partitioner.chType))
+  {
+    if (!CU::isIntra(cu) && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat))
+    {
+      CHECK(!TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "Luma cbf must be true for inter units with no chroma coeffs");
+    }
+    else if (cu.sbtInfo && tu.noResidual)
+    {
+      CHECK(TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "Luma cbf must be false for inter sbt no-residual tu");
+    }
+    else if (cu.sbtInfo && !chromaCbfs.sigChroma(area.chromaFormat))
+    {
+      assert(!tu.noResidual);
+      CHECK(!TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "Luma cbf must be true for inter sbt residual tu");
+    }
+    else
+    {
+      bool lumaCbfIsInferredACT = (cu.colorTransform && cu.predMode == MODE_INTRA && trDepth == 0 && !chromaCbfs.sigChroma(area.chromaFormat));
+      CHECK(lumaCbfIsInferredACT && !TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), "adaptive color transform cannot have all zero coefficients");
+      bool lastCbfIsInferred    = lumaCbfIsInferredACT; // ISP and ACT are mutually exclusive
+      bool previousCbf          = false;
+      bool rootCbfSoFar         = false;
+      if (cu.ispMode)
+      {
+        uint32_t nTus = cu.ispMode == HOR_INTRA_SUBPARTITIONS ? cu.lheight() >> floorLog2(tu.lheight()) : cu.lwidth() >> floorLog2(tu.lwidth());
+        if (subTuCounter == nTus - 1)
+        {
+          TransformUnit* tuPointer = cu.firstTU;
+          for (int tuIdx = 0; tuIdx < subTuCounter; tuIdx++)
+          {
+            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, trDepth);
+            tuPointer = tuPointer->next;
+          }
+          if (!rootCbfSoFar)
+          {
+            lastCbfIsInferred = true;
+          }
+        }
+        if (!lastCbfIsInferred)
+        {
+          previousCbf = TU::getPrevTuCbfAtDepth(tu, COMPONENT_Y, partitioner.currTrDepth);
+        }
+      }
+      if (!lastCbfIsInferred)
+      {
+        cbf_comp(cs, TU::getCbfAtDepth(tu, COMPONENT_Y, trDepth), tu.Y(), trDepth, previousCbf, cu.ispMode);
+      }
+    }
+  }
   bool        lumaOnly  = ( cu.chromaFormat == CHROMA_400 || !tu.blocks[COMPONENT_Cb].valid() );
   bool        cbf[3]    = { TU::getCbf( tu, COMPONENT_Y ), chromaCbfs.Cb, chromaCbfs.Cr };
   bool        cbfLuma   = ( cbf[ COMPONENT_Y ] != 0 );
@@ -2137,25 +2472,37 @@ void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, ChromaC
     }
     cbfChroma = ( cbf[ COMPONENT_Cb ] || cbf[ COMPONENT_Cr ] );
   }
-  if( cbfLuma || cbfChroma )
+
+  if( ( cu.lwidth() > 64 || cu.lheight() > 64 || cbfLuma || cbfChroma ) &&
+    (!tu.cu->isSepTree() || isLuma(tu.chType)) )
   {
     if( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded )
     {
-      if (!CS::isDualITree(*tu.cs) || isLuma(tu.chType))
-      {
-        cu_qp_delta(cu, cuCtx.qp, cu.qp);
-        cuCtx.qp = cu.qp;
-        cuCtx.isDQPCoded = true;
-      }
+      cu_qp_delta(cu, cuCtx.qp, cu.qp);
+      cuCtx.qp = cu.qp;
+      cuCtx.isDQPCoded = true;
     }
-    if( cu.cs->slice->getUseChromaQpAdj() && cbfChroma && !cu.transQuantBypass && !cuCtx.isChromaQpAdjCoded )
+  }
+  if (!cu.isSepTree() || isChroma(tu.chType))   // !DUAL_TREE_LUMA
+  {
+    SizeType channelWidth = !cu.isSepTree() ? cu.lwidth() : cu.chromaSize().width;
+    SizeType channelHeight = !cu.isSepTree() ? cu.lheight() : cu.chromaSize().height;
+
+    if (cu.cs->slice->getUseChromaQpAdj() && (channelWidth > 64 || channelHeight > 64 || cbfChroma) && !cuCtx.isChromaQpAdjCoded)
     {
-      cu_chroma_qp_offset( cu );
+      cu_chroma_qp_offset(cu);
       cuCtx.isChromaQpAdjCoded = true;
     }
+  }
+
+  if( !lumaOnly )
+  {
+    joint_cb_cr( tu, ( cbf[COMPONENT_Cb] ? 2 : 0 ) + ( cbf[COMPONENT_Cr] ? 1 : 0 ) );
+  }
+
     if( cbfLuma )
     {
-      residual_coding( tu, COMPONENT_Y );
+      residual_coding( tu, COMPONENT_Y, &cuCtx );
     }
     if( !lumaOnly )
     {
@@ -2167,8 +2514,7 @@ void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, ChromaC
         }
         if( cbf[ compID ] )
         {
-          residual_coding( tu, compID );
-        }
+          residual_coding( tu, compID, &cuCtx );
       }
     }
   }
@@ -2208,7 +2554,7 @@ void CABACWriter::cu_chroma_qp_offset( const CodingUnit& cu )
   else
   {
     m_BinEncoder.encodeBin( 1, Ctx::ChromaQpAdjFlag() );
-    int length = cu.cs->pps->getPpsRangeExtension().getChromaQpOffsetListLen();
+    int length = cu.cs->pps->getChromaQpOffsetListLen();
     if( length > 1 )
     {
       unary_max_symbol( qpAdj-1, Ctx::ChromaQpAdjIdc(), Ctx::ChromaQpAdjIdc(), length-1 );
@@ -2230,19 +2576,41 @@ void CABACWriter::cu_chroma_qp_offset( const CodingUnit& cu )
 //    void        residual_coding_subblock( coeffCtx )
 //================================================================================
 
-void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID )
+void CABACWriter::joint_cb_cr( const TransformUnit& tu, const int cbfMask )
+{
+  if ( !tu.cu->slice->getSPS()->getJointCbCrEnabledFlag() )
+  {
+    return;
+  }
+
+  CHECK( tu.jointCbCr && tu.jointCbCr != cbfMask, "wrong value of jointCbCr (" << (int)tu.jointCbCr << " vs " << (int)cbfMask << ")" );
+  if( ( CU::isIntra( *tu.cu ) && cbfMask ) || ( cbfMask == 3 ) )
+  {
+    m_BinEncoder.encodeBin( tu.jointCbCr ? 1 : 0, Ctx::JointCbCrFlag( cbfMask - 1 ) );
+  }
+}
+
+void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID, CUCtx* cuCtx )
 {
   const CodingUnit& cu = *tu.cu;
   DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode );
 
+  if( compID == COMPONENT_Cr && tu.jointCbCr == 3 )
+    return;
+
   // code transform skip and explicit rdpcm mode
-  mts_coding         ( tu, compID );
+  ts_flag            ( tu, compID );
   explicit_rdpcm_mode( tu, compID );
 
-#if HEVC_USE_SIGN_HIDING
+  if (tu.mtsIdx[compID] == MTS_SKIP)
+  {
+    residual_codingTS( tu, compID );
+    return;
+  }
+
   // determine sign hiding
-  bool signHiding  = ( cu.cs->slice->getSignDataHidingEnabledFlag() && !cu.transQuantBypass && tu.rdpcm[compID] == RDPCM_OFF );
-  if(  signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx==1 )
+  bool signHiding  = ( cu.cs->picHeader->getSignDataHidingEnabledFlag() && tu.rdpcm[compID] == RDPCM_OFF );
+  if(  signHiding && CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && tu.mtsIdx[compID] == MTS_SKIP)
   {
     const ChannelType chType    = toChannelType( compID );
     const unsigned    intraMode = PU::getFinalIntraMode( *cu.cs->getPU( tu.blocks[compID].pos(), chType ), chType );
@@ -2251,14 +2619,9 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID )
       signHiding = false;
     }
   }
-#endif
 
   // init coeff coding context
-#if HEVC_USE_SIGN_HIDING
   CoeffCodingContext  cctx    ( tu, compID, signHiding );
-#else
-  CoeffCodingContext  cctx    ( tu, compID );
-#endif
   const TCoeff*       coeff   = tu.getCoeffs( compID ).buf;
 
   // determine and set last coeff position and sig group flags
@@ -2276,17 +2639,38 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID )
   CHECK( scanPosLast < 0, "Coefficient coding called for empty TU" );
   cctx.setScanPosLast(scanPosLast);
 
+  if (cuCtx && tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4)
+  {
+    const int maxLfnstPos = ((tu.blocks[compID].height == 4 && tu.blocks[compID].width == 4) || (tu.blocks[compID].height == 8 && tu.blocks[compID].width == 8)) ? 7 : 15;
+    cuCtx->violatesLfnstConstrained[ toChannelType(compID) ] |= cctx.scanPosLast() > maxLfnstPos;
+  }
+  if (cuCtx && tu.mtsIdx[compID] != MTS_SKIP && tu.blocks[compID].height >= 4 && tu.blocks[compID].width >= 4)
+  {
+    const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA;
+    cuCtx->lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh;
+  }
+#if !JVET_Q0055_MTS_SIGNALLING
+  if( cuCtx && isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) )
+  {
+    cuCtx->violatesMtsCoeffConstraint = true;
+  }
+#endif
+  
   // code last coeff position
   last_sig_coeff( cctx, tu, compID );
 
   // code subblocks
-  const int stateTab  = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 );
+  const int stateTab  = ( tu.cs->picHeader->getDepQuantEnabledFlag() ? 32040 : 0 );
   int       state     = 0;
 
+  int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
+  cctx.regBinLimit = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4;
+
   for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--)
   {
     cctx.initSubblock       ( subSetId, sigGroupFlags[subSetId] );
-    if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+
+    if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && compID == COMPONENT_Y )
     {
       if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) )
        || ( tu.blocks[ compID ].width  == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth()  ) ) )
@@ -2295,80 +2679,74 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID )
       }
     }
     residual_coding_subblock( cctx, coeff, stateTab, state );
-
+    
+#if JVET_Q0055_MTS_SIGNALLING
+    if ( cuCtx && isLuma(compID) && cctx.isSigGroup() && ( cctx.cgPosY() > 3 || cctx.cgPosX() > 3 ) )
+    {
+      cuCtx->violatesMtsCoeffConstraint = true;
+    }
+#endif
   }
-
-
 }
 
-void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID )
+void CABACWriter::ts_flag( const TransformUnit& tu, ComponentID compID )
 {
-  const CodingUnit  &cu = *tu.cu;
-  const bool  tsAllowed = TU::isTSAllowed ( tu, compID );
-  const bool mtsAllowed = TU::isMTSAllowed( tu, compID );
-
-  if( !mtsAllowed && !tsAllowed ) return;
-
-  int symbol  = 0;
-  int ctxIdx  = 0;
-
-  if( tsAllowed )
+  int tsFlag = tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0;
+  int ctxIdx = isLuma(compID) ? 0 : 1;
+  
+  if( TU::isTSAllowed ( tu, compID ) )
   {
-    symbol = 1 - ( tu.mtsIdx == 1 ? 1 : 0 );
-    ctxIdx = 6;
-    m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) );
+    m_BinEncoder.encodeBin( tsFlag, Ctx::TransformSkipFlag(ctxIdx));
   }
+  DTRACE( g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag );
+}
 
-  if( tu.mtsIdx != 1 )
+void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx* cuCtx )
+{
+  TransformUnit &tu = *cu.firstTU;
+  int        mtsIdx = tu.mtsIdx[COMPONENT_Y];
+  
+  if( CU::isMTSAllowed( cu, COMPONENT_Y ) && cuCtx && !cuCtx->violatesMtsCoeffConstraint &&
+      cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) )
   {
-    if( mtsAllowed )
+    int symbol = mtsIdx != MTS_DCT2_DCT2 ? 1 : 0;
+    int ctxIdx = 0;
+    
+    m_BinEncoder.encodeBin( symbol, Ctx::MTSIdx(ctxIdx));
+    
+    if( symbol )
     {
-      symbol = tu.mtsIdx != 0 ? 1 : 0;
-      ctxIdx = std::min( (int)cu.qtDepth, 5 );
-      m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) );
-
-      if( symbol )
+      ctxIdx = 1;
+      for( int i = 0; i < 3; i++, ctxIdx++ )
       {
-        ctxIdx = 7;
-        for( int i = 0; i < 3; i++, ctxIdx++ )
+        symbol = mtsIdx > i + MTS_DST7_DST7 ? 1 : 0;
+        m_BinEncoder.encodeBin( symbol, Ctx::MTSIdx(ctxIdx));
+        
+        if( !symbol )
         {
-          symbol = tu.mtsIdx > i + 2 ? 1 : 0;
-          m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) );
-
-          if( !symbol )
-          {
-            break;
-          }
+          break;
         }
       }
     }
   }
-
-  DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), tu.mtsIdx );
+  DTRACE( g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx);
 }
 
 void CABACWriter::isp_mode( const CodingUnit& cu )
 {
-  if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || cu.ipcm )
+  if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform )
   {
-    CHECK( cu.ispMode != NOT_INTRA_SUBPARTITIONS, "error: cu.intraSubPartitions != 0" );
+    CHECK( cu.ispMode != NOT_INTRA_SUBPARTITIONS, "cu.ispMode != 0" );
     return;
   }
-  const ISPType allowedSplits = CU::canUseISPSplit( cu, getFirstComponentOfChannel( cu.chType ) );
-  if( allowedSplits == NOT_INTRA_SUBPARTITIONS ) return;
-
-  if( cu.ispMode == NOT_INTRA_SUBPARTITIONS )
+  if ( cu.ispMode == NOT_INTRA_SUBPARTITIONS )
   {
     m_BinEncoder.encodeBin( 0, Ctx::ISPMode( 0 ) );
   }
   else
   {
     m_BinEncoder.encodeBin( 1, Ctx::ISPMode( 0 ) );
-
-    if( allowedSplits == CAN_USE_VER_AND_HORL_SPLITS )
-    {
-      m_BinEncoder.encodeBin( cu.ispMode - 1, Ctx::ISPMode( 1 ) );
-    }
+    m_BinEncoder.encodeBin( cu.ispMode - 1, Ctx::ISPMode( 1 ) );
   }
   DTRACE( g_trace_ctx, D_SYNTAX, "intra_subPartitions() etype=%d pos=(%d,%d) ispIdx=%d\n", cu.chType, cu.blocks[cu.chType].x, cu.blocks[cu.chType].y, (int)cu.ispMode );
 }
@@ -2376,7 +2754,7 @@ void CABACWriter::isp_mode( const CodingUnit& cu )
 void CABACWriter::explicit_rdpcm_mode( const TransformUnit& tu, ComponentID compID )
 {
   const CodingUnit& cu = *tu.cu;
-  if( !CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && ( tu.mtsIdx==1 || cu.transQuantBypass ) )
+  if (!CU::isIntra(cu) && CU::isRDPCMEnabled(cu) && (tu.mtsIdx[compID] == MTS_SKIP))
   {
     ChannelType chType = toChannelType( compID );
     switch( tu.rdpcm[compID] )
@@ -2395,19 +2773,55 @@ void CABACWriter::explicit_rdpcm_mode( const TransformUnit& tu, ComponentID comp
   }
 }
 
+void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx )
+{
+  int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0;
+  if( ( cu.ispMode && !CU::canUseLfnstWithISP( cu, cu.chType ) ) ||
+      (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) ||
+    ( cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 )
+    || ( cu.blocks[ chIdx ].lumaSize().width > cu.cs->sps->getMaxTbSize() || cu.blocks[ chIdx ].lumaSize().height > cu.cs->sps->getMaxTbSize() )
+    )
+  {
+    return;
+  }
+
+  if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) )
+  {
+    const bool lumaFlag                   = cu.isSepTree() ? (   isLuma( cu.chType ) ? true : false ) : true;
+    const bool chromaFlag                 = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true;
+          bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] );
+
+    const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP;
+    if( (!cuCtx.lfnstLastScanPos && !cu.ispMode) || nonZeroCoeffNonTsCorner8x8 || isTrSkip )
+    {
+      return;
+    }
+  }
+  else
+  {
+    return;
+  }
+
+
+  unsigned cctx = 0;
+  if ( cu.isSepTree() ) cctx++;
+
+  const uint32_t idxLFNST = cu.lfnstIdx;
+  assert( idxLFNST < 3 );
+  m_BinEncoder.encodeBin( idxLFNST ? 1 : 0, Ctx::LFNSTIdx( cctx ) );
+
+  if( idxLFNST )
+  {
+    m_BinEncoder.encodeBin( (idxLFNST - 1) ? 1 : 0, Ctx::LFNSTIdx(2));
+  }
+
+  DTRACE( g_trace_ctx, D_SYNTAX, "residual_lfnst_mode() etype=%d pos=(%d,%d) mode=%d\n", COMPONENT_Y, cu.lx(), cu.ly(), ( int ) cu.lfnstIdx );
+}
 
 void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit& tu, ComponentID compID )
 {
   unsigned blkPos = cctx.blockPos( cctx.scanPosLast() );
   unsigned posX, posY;
-#if HEVC_USE_MDCS
-  if( cctx.scanType() == SCAN_VER )
-  {
-    posX  = blkPos / cctx.width();
-    posY  = blkPos - ( posX * cctx.width() );
-  }
-  else
-#endif
   {
     posY  = blkPos / cctx.width();
     posX  = blkPos - ( posY * cctx.width() );
@@ -2420,7 +2834,7 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit&
   unsigned maxLastPosX = cctx.maxLastPosX();
   unsigned maxLastPosY = cctx.maxLastPosY();
 
-  if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+  if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && compID == COMPONENT_Y )
   {
     maxLastPosX = ( tu.blocks[compID].width  == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX;
     maxLastPosY = ( tu.blocks[compID].height == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosY;
@@ -2488,15 +2902,12 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
 
   //===== encode absolute values =====
   const int inferSigPos   = nextSigPos != cctx.scanPosLast() ? ( cctx.isNotFirst() ? minSubPos : -1 ) : nextSigPos;
-#if HEVC_USE_SIGN_HIDING
   int       firstNZPos    = nextSigPos;
   int       lastNZPos     = -1;
-#endif
   int       remAbsLevel   = -1;
   int       numNonZero    =  0;
   unsigned  signPattern   =  0;
-  bool      is2x2subblock = ( cctx.log2CGSize() == 2 );
-  int       remRegBins    = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK );
+  int       remRegBins    = cctx.regBinLimit;
   int       firstPosMode2 = minSubPos - 1;
 
   for( ; nextSigPos >= minSubPos && remRegBins >= 4; nextSigPos-- )
@@ -2510,16 +2921,18 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId );
       remRegBins--;
     }
+    else if( nextSigPos != cctx.scanPosLast() )
+    {
+      cctx.sigCtxIdAbs( nextSigPos, coeff, state ); // required for setting variables that are needed for gtx/par context selection
+    }
 
     if( sigFlag )
     {
       uint8_t&  ctxOff  = ctxOffset[ nextSigPos - minSubPos ];
       ctxOff            = cctx.ctxOffsetAbs();
       numNonZero++;
-#if HEVC_USE_SIGN_HIDING
       firstNZPos  = nextSigPos;
       lastNZPos   = std::max<int>( lastNZPos, nextSigPos );
-#endif
       remAbsLevel = abs( Coeff ) - 1;
 
       if( nextSigPos != cctx.scanPosLast() ) signPattern <<= 1;
@@ -2548,23 +2961,21 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
     state = ( stateTransTable >> ((state<<2)+((Coeff&1)<<1)) ) & 3;
   }
   firstPosMode2 = nextSigPos;
-
+  cctx.regBinLimit = remRegBins;
 
 
   //===== 2nd PASS: Go-rice codes =====
   unsigned ricePar = 0;
   for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- )
   {
+    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 4);
+    ricePar = g_auiGoRiceParsCoeff[sumAll];
     unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] );
     if( absLevel >= 4 )
     {
       unsigned rem      = ( absLevel - 4 ) >> 1;
-      m_BinEncoder.encodeRemAbsEP( rem, ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() );
+      m_BinEncoder.encodeRemAbsEP( rem, ricePar, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar );
-      if( ricePar < 3 && rem > (3<<ricePar)-1 )
-      {
-        ricePar++;
-      }
     }
   }
 
@@ -2573,26 +2984,24 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
   {
     TCoeff    Coeff     = coeff[ cctx.blockPos( scanPos ) ];
     unsigned  absLevel  = abs( Coeff );
-    int       sumAll    = cctx.templateAbsSum(scanPos, coeff);
+    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 0);
     int       rice      = g_auiGoRiceParsCoeff                        [sumAll];
-    int       pos0      = g_auiGoRicePosCoeff0[std::max(0, state - 1)][sumAll];
+    int       pos0      = g_auiGoRicePosCoeff0(state, rice);
     unsigned  rem       = ( absLevel == 0 ? pos0 : absLevel <= pos0 ? absLevel-1 : absLevel );
-    m_BinEncoder.encodeRemAbsEP( rem, rice, cctx.extPrec(), cctx.maxLog2TrDRange() );
+    m_BinEncoder.encodeRemAbsEP( rem, rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
     DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice );
     state = ( stateTransTable >> ((state<<2)+((absLevel&1)<<1)) ) & 3;
     if( absLevel )
     {
       numNonZero++;
-#if HEVC_USE_SIGN_HIDING
+      firstNZPos = scanPos;
       lastNZPos   = std::max<int>( lastNZPos, scanPos );
-#endif
       signPattern <<= 1;
       if( Coeff < 0 ) signPattern++;
     }
   }
 
   //===== encode sign's =====
-#if HEVC_USE_SIGN_HIDING
   unsigned numSigns = numNonZero;
   if( cctx.hideSign( firstNZPos, lastNZPos ) )
   {
@@ -2600,11 +3009,155 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
     signPattern >>= 1;
   }
   m_BinEncoder.encodeBinsEP( signPattern, numSigns );
-#else
-  m_BinEncoder.encodeBinsEP( signPattern, numNonZero );
-#endif
 }
 
+void CABACWriter::residual_codingTS( const TransformUnit& tu, ComponentID compID )
+{
+  DTRACE( g_trace_ctx, D_SYNTAX, "residual_codingTS() etype=%d pos=(%d,%d) size=%dx%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height );
+
+  // init coeff coding context
+  CoeffCodingContext  cctx    ( tu, compID, false, isLuma(compID) ? tu.cu->bdpcmMode : tu.cu->bdpcmModeChroma);
+  const TCoeff*       coeff   = tu.getCoeffs( compID ).buf;
+  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
+  cctx.setNumCtxBins(maxCtxBins);
+
+  // determine and set last coeff position and sig group flags
+  std::bitset<MLS_GRP_NUM> sigGroupFlags;
+  for( int scanPos = 0; scanPos < cctx.maxNumCoeff(); scanPos++)
+  {
+    unsigned blkPos = cctx.blockPos( scanPos );
+    if( coeff[blkPos] )
+    {
+      sigGroupFlags.set( scanPos >> cctx.log2CGSize() );
+    }
+  }
+
+  // code subblocks
+  for( int subSetId = 0; subSetId <= ( cctx.maxNumCoeff() - 1 ) >> cctx.log2CGSize(); subSetId++ )
+  {
+    cctx.initSubblock         ( subSetId, sigGroupFlags[subSetId] );
+    residual_coding_subblockTS( cctx, coeff );
+  }
+}
+
+void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TCoeff* coeff )
+{
+  //===== init =====
+  const int   minSubPos   = cctx.maxSubPos();
+  int         firstSigPos = cctx.minSubPos();
+  int         nextSigPos  = firstSigPos;
+
+  //===== encode significant_coeffgroup_flag =====
+  if( !cctx.isLastSubSet() || !cctx.only1stSigGroup() )
+  {
+    if( cctx.isSigGroup() )
+    {
+        m_BinEncoder.encodeBin( 1, cctx.sigGroupCtxId( true ) );
+        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 1, cctx.sigGroupCtxId() );
+    }
+    else
+    {
+        m_BinEncoder.encodeBin( 0, cctx.sigGroupCtxId( true ) );
+        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 0, cctx.sigGroupCtxId() );
+      return;
+    }
+  }
+
+  //===== encode absolute values =====
+  const int inferSigPos   = minSubPos;
+  int       remAbsLevel   = -1;
+  int       numNonZero    =  0;
+
+  int rightPixel, belowPixel, modAbsCoeff;
+
+  int lastScanPosPass1 = -1;
+  int lastScanPosPass2 = -1;
+  for (; nextSigPos <= minSubPos && cctx.numCtxBins() >= 4; nextSigPos++)
+  {
+    TCoeff    Coeff      = coeff[ cctx.blockPos( nextSigPos ) ];
+    unsigned  sigFlag    = ( Coeff != 0 );
+    if( numNonZero || nextSigPos != inferSigPos )
+    {
+        const unsigned sigCtxId = cctx.sigCtxIdAbsTS( nextSigPos, coeff );
+        m_BinEncoder.encodeBin( sigFlag, sigCtxId );
+        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId );
+        cctx.decimateNumCtxBins(1);
+    }
+
+    if( sigFlag )
+    {
+      //===== encode sign's =====
+      int sign = Coeff < 0;
+        const unsigned signCtxId = cctx.signCtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
+        m_BinEncoder.encodeBin(sign, signCtxId);
+        cctx.decimateNumCtxBins(1);
+      numNonZero++;
+      cctx.neighTS(rightPixel, belowPixel, nextSigPos, coeff);
+      modAbsCoeff = cctx.deriveModCoeff(rightPixel, belowPixel, abs(Coeff), cctx.bdpcm());
+      remAbsLevel = modAbsCoeff - 1;
+
+      unsigned gt1 = !!remAbsLevel;
+      const unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
+        m_BinEncoder.encodeBin(gt1, gt1CtxId);
+        DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt1_flag() bin=%d ctx=%d\n", gt1, gt1CtxId);
+        cctx.decimateNumCtxBins(1);
+
+      if( gt1 )
+      {
+        remAbsLevel  -= 1;
+          m_BinEncoder.encodeBin( remAbsLevel&1, cctx.parityCtxIdAbsTS() );
+          DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_par_flag() bin=%d ctx=%d\n", remAbsLevel&1, cctx.parityCtxIdAbsTS() );
+          cctx.decimateNumCtxBins(1);
+      }
+    }
+    lastScanPosPass1 = nextSigPos;
+  }
+
+  int cutoffVal = 2;
+  int numGtBins = 4;
+  for (int scanPos = firstSigPos; scanPos <= minSubPos && cctx.numCtxBins() >= 4; scanPos++)
+  {
+    unsigned absLevel;
+    cctx.neighTS(rightPixel, belowPixel, scanPos, coeff);
+    absLevel = cctx.deriveModCoeff(rightPixel, belowPixel, abs(coeff[cctx.blockPos(scanPos)]), cctx.bdpcm());
+    cutoffVal = 2;
+    for (int i = 0; i < numGtBins; i++)
+    {
+      if (absLevel >= cutoffVal)
+      {
+        unsigned gt2 = (absLevel >= (cutoffVal + 2));
+          m_BinEncoder.encodeBin(gt2, cctx.greaterXCtxIdAbsTS(cutoffVal >> 1));
+          DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt%d_flag() bin=%d ctx=%d sp=%d coeff=%d\n", i, gt2, cctx.greaterXCtxIdAbsTS(cutoffVal >> 1), scanPos, min<int>(absLevel, cutoffVal + 2));
+          cctx.decimateNumCtxBins(1);
+      }
+      cutoffVal += 2;
+    }
+    lastScanPosPass2 = scanPos;
+  }
+
+  //===== coeff bypass ====
+  for( int scanPos = firstSigPos; scanPos <= minSubPos; scanPos++ )
+  {
+    unsigned absLevel;
+    cctx.neighTS(rightPixel, belowPixel, scanPos, coeff);
+    cutoffVal = (scanPos <= lastScanPosPass2 ? 10 : (scanPos <= lastScanPosPass1 ? 2 : 0));
+    absLevel = cctx.deriveModCoeff(rightPixel, belowPixel, abs(coeff[cctx.blockPos(scanPos)]), cctx.bdpcm()||!cutoffVal);
+
+    if( absLevel >= cutoffVal )
+    {
+      int       rice = cctx.templateAbsSumTS( scanPos, coeff );
+      unsigned  rem = scanPos <= lastScanPosPass1 ? (absLevel - cutoffVal) >> 1 : absLevel;
+      m_BinEncoder.encodeRemAbsEP( rem, rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
+      DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_rem_val() bin=%d ctx=%d sp=%d\n", rem, rice, scanPos );
+
+      if (absLevel && scanPos > lastScanPosPass1)
+      {
+        int sign = coeff[cctx.blockPos(scanPos)] < 0;
+        m_BinEncoder.encodeBinEP(sign);
+      }
+    }
+  }
+}
 
 
 
@@ -2712,13 +3265,12 @@ void CABACWriter::exp_golomb_eqprob( unsigned symbol, unsigned count )
   }
   bins <<= 1;
   numBins++;
-  bins = (bins << count) | symbol;
-  numBins += count;
-  CHECK(!( numBins <= 32 ), "Unspecified error");
-  m_BinEncoder.encodeBinsEP( bins, numBins );
+  //CHECK(!( numBins + count <= 32 ), "Unspecified error");
+  m_BinEncoder.encodeBinsEP(bins, numBins);
+  m_BinEncoder.encodeBinsEP(symbol, count);
 }
 
-void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channel, AlfSliceParam* alfParam)
+void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channel, AlfParam* alfParam)
 {
   if( isLuma( channel ) )
   {
@@ -2733,7 +3285,7 @@ void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channe
       codeAlfCtuEnableFlags( cs, COMPONENT_Cr, alfParam );
   }
 }
-void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID, AlfSliceParam* alfParam)
+void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID, AlfParam* alfParam)
 {
   uint32_t numCTUs = cs.pcv->sizeInCtus;
 
@@ -2743,11 +3295,11 @@ void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID
   }
 }
 
-void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfSliceParam* alfParam)
+void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfParam* alfParam)
 {
-  const AlfSliceParam& alfSliceParam = alfParam ? (*alfParam) : cs.aps->getAlfAPSParam();
+  const bool alfComponentEnabled = (alfParam != NULL) ? alfParam->enabledFlag[compIdx] : cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx);
 
-  if( cs.sps->getALFEnabledFlag() && alfSliceParam.enabledFlag[compIdx] )
+  if( cs.sps->getALFEnabledFlag() && alfComponentEnabled )
   {
     const PreCalcValues& pcv = *cs.pcv;
     int                 frame_width_in_ctus = pcv.widthInCtus;
@@ -2755,27 +3307,165 @@ void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr,
     int                 rx = ctuRsAddr - ry * frame_width_in_ctus;
     const Position      pos( rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight );
     const uint32_t          curSliceIdx = cs.slice->getIndependentSliceIdx();
-#if HEVC_TILES_WPP
-    const uint32_t          curTileIdx = cs.picture->tileMap->getTileIdxMap( pos );
-    bool                leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, curTileIdx, CH_L ) ? true : false;
-    bool                aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, curTileIdx, CH_L ) ? true : false;
-#else
-    bool                leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), curSliceIdx, CH_L ) ? true : false;
-    bool                aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), curSliceIdx, CH_L ) ? true : false;
-#endif
+    const uint32_t      curTileIdx = cs.pps->getTileIdx( pos );
+    bool                leftAvail = cs.getCURestricted( pos.offset( -(int)pcv.maxCUWidth, 0 ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+    bool                aboveAvail = cs.getCURestricted( pos.offset( 0, -(int)pcv.maxCUHeight ), pos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
 
     int leftCTUAddr = leftAvail ? ctuRsAddr - 1 : -1;
     int aboveCTUAddr = aboveAvail ? ctuRsAddr - frame_width_in_ctus : -1;
 
-    if( alfSliceParam.enabledFlag[compIdx] )
+    uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx );
+    int ctx = 0;
+    ctx += leftCTUAddr > -1 ? ( ctbAlfFlag[leftCTUAddr] ? 1 : 0 ) : 0;
+    ctx += aboveCTUAddr > -1 ? ( ctbAlfFlag[aboveCTUAddr] ? 1 : 0 ) : 0;
+    m_BinEncoder.encodeBin( ctbAlfFlag[ctuRsAddr], Ctx::ctbAlfFlag( compIdx * 3 + ctx ) );
+  }
+}
+
+void CABACWriter::code_unary_fixed( unsigned symbol, unsigned ctxId, unsigned unary_max, unsigned fixed )
+{
+  bool unary = (symbol <= unary_max);
+  m_BinEncoder.encodeBin( unary, ctxId );
+  if( unary )
+  {
+    unary_max_eqprob( symbol, unary_max );
+  }
+  else
+  {
+    m_BinEncoder.encodeBinsEP( symbol - unary_max - 1, fixed );
+  }
+}
+
+void CABACWriter::mip_flag( const CodingUnit& cu )
+{
+  if( !cu.Y().valid() )
+  {
+    return;
+  }
+  if( !cu.cs->sps->getUseMIP() )
+  {
+    return;
+  }
+
+  unsigned ctxId = DeriveCtx::CtxMipFlag( cu );
+  m_BinEncoder.encodeBin( cu.mipFlag, Ctx::MipFlag( ctxId ) );
+  DTRACE( g_trace_ctx, D_SYNTAX, "mip_flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.mipFlag ? 1 : 0 );
+}
+
+void CABACWriter::mip_pred_modes( const CodingUnit& cu )
+{
+  if( !cu.Y().valid() )
+  {
+    return;
+  }
+  for( const auto &pu : CU::traversePUs( cu ) )
+  {
+    mip_pred_mode( pu );
+  }
+}
+
+void CABACWriter::mip_pred_mode( const PredictionUnit& pu )
+{
+  m_BinEncoder.encodeBinEP( (pu.mipTransposedFlag ? 1 : 0) );
+
+  const int numModes = getNumModesMip( pu.Y() );
+  CHECKD( pu.intraDir[CHANNEL_TYPE_LUMA] < 0 || pu.intraDir[CHANNEL_TYPE_LUMA] >= numModes, "Invalid MIP mode" );
+  xWriteTruncBinCode( pu.intraDir[CHANNEL_TYPE_LUMA], numModes );
+
+  DTRACE( g_trace_ctx, D_SYNTAX, "mip_pred_mode() pos=(%d,%d) mode=%d transposed=%d\n", pu.lumaPos().x, pu.lumaPos().y, pu.intraDir[CHANNEL_TYPE_LUMA], pu.mipTransposedFlag ? 1 : 0 );
+}
+
+void CABACWriter::codeAlfCtuFilterIndex(CodingStructure& cs, uint32_t ctuRsAddr, bool alfEnableLuma)
+{
+  if ( (!cs.sps->getALFEnabledFlag()) || (!alfEnableLuma))
+  {
+    return;
+  }
+
+  uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag(COMPONENT_Y);
+  if (!ctbAlfFlag[ctuRsAddr])
+  {
+    return;
+  }
+
+  short* alfCtbFilterIndex = cs.slice->getPic()->getAlfCtbFilterIndex();
+  const unsigned filterSetIdx = alfCtbFilterIndex[ctuRsAddr];
+  unsigned numAps = cs.slice->getTileGroupNumAps();
+  unsigned numAvailableFiltSets = numAps + NUM_FIXED_FILTER_SETS;
+  if (numAvailableFiltSets > NUM_FIXED_FILTER_SETS)
+  {
+    int useTemporalFilt = (filterSetIdx >= NUM_FIXED_FILTER_SETS) ? 1 : 0;
+    m_BinEncoder.encodeBin(useTemporalFilt, Ctx::AlfUseTemporalFilt());
+    if (useTemporalFilt)
+    {
+      CHECK((filterSetIdx - NUM_FIXED_FILTER_SETS) >= (numAvailableFiltSets - NUM_FIXED_FILTER_SETS), "temporal non-latest set");
+      if (numAps > 1)
+      {
+        xWriteTruncBinCode(filterSetIdx - NUM_FIXED_FILTER_SETS, numAvailableFiltSets - NUM_FIXED_FILTER_SETS);
+      }
+    }
+    else
+    {
+      CHECK(filterSetIdx >= NUM_FIXED_FILTER_SETS, "fixed set larger than temporal");
+      xWriteTruncBinCode(filterSetIdx, NUM_FIXED_FILTER_SETS);
+    }
+  }
+  else
+  {
+    CHECK(filterSetIdx >= NUM_FIXED_FILTER_SETS, "fixed set numavail < num_fixed");
+    xWriteTruncBinCode(filterSetIdx, NUM_FIXED_FILTER_SETS);
+  }
+}
+void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ChannelType channel, AlfParam* alfParam)
+{
+  if( isChroma( channel ) )
+  {
+    if (alfParam->enabledFlag[COMPONENT_Cb])
+      codeAlfCtuAlternatives( cs, COMPONENT_Cb, alfParam );
+    if (alfParam->enabledFlag[COMPONENT_Cr])
+      codeAlfCtuAlternatives( cs, COMPONENT_Cr, alfParam );
+  }
+}
+void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ComponentID compID, AlfParam* alfParam)
+{
+  if( compID == COMPONENT_Y )
+    return;
+  uint32_t numCTUs = cs.pcv->sizeInCtus;
+  uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compID );
+
+  for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+  {
+    if( ctbAlfFlag[ctuIdx] )
+    {
+      codeAlfCtuAlternative( cs, ctuIdx, compID, alfParam );
+    }
+  }
+}
+
+void CABACWriter::codeAlfCtuAlternative( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, const AlfParam* alfParam)
+{
+  if( compIdx == COMPONENT_Y )
+    return;
+  int apsIdx = alfParam ? 0 : cs.slice->getTileGroupApsIdChroma();
+  const AlfParam& alfParamRef = alfParam ? (*alfParam) : cs.slice->getAlfAPSs()[apsIdx]->getAlfAPSParam();
+
+  if( alfParam || (cs.sps->getALFEnabledFlag() && cs.slice->getTileGroupAlfEnabledFlag( (ComponentID)compIdx )) )
+  {
+    uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx );
+
+    if( ctbAlfFlag[ctuRsAddr] )
     {
-      uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx );
-      int ctx = 0;
-      ctx += leftCTUAddr > -1 ? ( ctbAlfFlag[leftCTUAddr] ? 1 : 0 ) : 0;
-      ctx += aboveCTUAddr > -1 ? ( ctbAlfFlag[aboveCTUAddr] ? 1 : 0 ) : 0;
-      m_BinEncoder.encodeBin( ctbAlfFlag[ctuRsAddr], Ctx::ctbAlfFlag( compIdx * 3 + ctx ) );
+      const int numAlts = alfParamRef.numAlternativesChroma;
+      uint8_t* ctbAlfAlternative = cs.slice->getPic()->getAlfCtuAlternativeData( compIdx );
+      unsigned numOnes = ctbAlfAlternative[ctuRsAddr];
+      assert( ctbAlfAlternative[ctuRsAddr] < numAlts );
+      for( int i = 0; i < numOnes; ++i )
+        m_BinEncoder.encodeBin( 1, Ctx::ctbAlfAlternative( compIdx-1 ) );
+      if( numOnes < numAlts-1 )
+        m_BinEncoder.encodeBin( 0, Ctx::ctbAlfAlternative( compIdx-1 ) );
     }
   }
 }
 
+
 //! \}
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index bebfb8af099a70fe67191e6524c9207ac279cca7..940fa4c22be37adc3cf9c970d17187ee34b4dca9 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -74,7 +74,7 @@ public:
   void        end_of_slice              ();
 
   // coding tree unit (clause 7.3.8.2)
-  void        coding_tree_unit          (       CodingStructure&        cs,       const UnitArea&   area,       int (&qps)[2],  unsigned ctuRsAddr,  bool skipSao = false );
+  void        coding_tree_unit          (       CodingStructure&        cs,       const UnitArea&   area,       int (&qps)[2],  unsigned ctuRsAddr,  bool skipSao = false, bool skipAlf = false );
 
   // sao (clause 7.3.8.3)
   void        sao                       ( const Slice&                  slice,    unsigned          ctuRsAddr );
@@ -83,16 +83,16 @@ public:
   // coding (quad)tree (clause 7.3.8.4)
   void        coding_tree               ( const CodingStructure&        cs,       Partitioner&      pm,         CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr);
   void        split_cu_mode             ( const PartSplit               split,    const CodingStructure& cs,    Partitioner& pm );
+  void        mode_constraint           ( const PartSplit               split,    const CodingStructure& cs,    Partitioner& pm,    const ModeType modeType );
 
   // coding unit (clause 7.3.8.5)
   void        coding_unit               ( const CodingUnit&             cu,       Partitioner&      pm,         CUCtx& cuCtx );
-  void        cu_transquant_bypass_flag ( const CodingUnit&             cu );
   void        cu_skip_flag              ( const CodingUnit&             cu );
   void        pred_mode                 ( const CodingUnit&             cu );
-  void        pcm_data                  ( const CodingUnit&             cu,       Partitioner&      pm );
-  void        pcm_flag                  ( const CodingUnit&             cu,       Partitioner&      pm );
+  void        bdpcm_mode                ( const CodingUnit&             cu,       const ComponentID compID );
+
   void        cu_pred_data              ( const CodingUnit&             cu );
-  void        cu_gbi_flag               ( const CodingUnit&             cu );
+  void        cu_bcw_flag               ( const CodingUnit&             cu );
   void        extend_ref_line           (const PredictionUnit&          pu );
   void        extend_ref_line           (const CodingUnit&              cu );
   void        intra_luma_pred_modes     ( const CodingUnit&             cu );
@@ -102,12 +102,19 @@ public:
   void        intra_chroma_pred_mode    ( const PredictionUnit&         pu );
   void        cu_residual               ( const CodingUnit&             cu,       Partitioner&      pm,         CUCtx& cuCtx );
   void        rqt_root_cbf              ( const CodingUnit&             cu );
+  void        adaptive_color_transform(const CodingUnit&             cu);
   void        sbt_mode                  ( const CodingUnit&             cu );
   void        end_of_ctu                ( const CodingUnit&             cu,       CUCtx&            cuCtx );
-
+  void        mip_flag                  ( const CodingUnit&             cu );
+  void        mip_pred_modes            ( const CodingUnit&             cu );
+  void        mip_pred_mode             ( const PredictionUnit&         pu );
+  void        cu_palette_info           ( const CodingUnit&             cu,       ComponentID       compBegin,     uint32_t numComp,          CUCtx&       cuCtx);
+  void        cuPaletteSubblockInfo     ( const CodingUnit&             cu,       ComponentID       compBegin,     uint32_t numComp,          int subSetId,               uint32_t& prevRunPos,        unsigned& prevRunType );
+  Pel         writePLTIndex             ( const CodingUnit&             cu,       uint32_t          idx,           PelBuf&  paletteIdx,       PLTtypeBuf&  paletteRunType, int         maxSymbol,   ComponentID compBegin );
   // prediction unit (clause 7.3.8.6)
   void        prediction_unit           ( const PredictionUnit&         pu );
   void        merge_flag                ( const PredictionUnit&         pu );
+  void        merge_data                ( const PredictionUnit&         pu );
   void        affine_flag               ( const CodingUnit&             cu );
   void        subblock_merge_flag       ( const CodingUnit&             cu );
   void        merge_idx                 ( const PredictionUnit&         pu );
@@ -118,55 +125,64 @@ public:
   void        ref_idx                   ( const PredictionUnit&         pu,       RefPicList        eRefList );
   void        mvp_flag                  ( const PredictionUnit&         pu,       RefPicList        eRefList );
 
-  void        MHIntra_flag              ( const PredictionUnit&         pu );
-  void        MHIntra_luma_pred_modes   ( const CodingUnit&             cu );
-  void        triangle_mode             ( const CodingUnit&             cu );
+  void        Ciip_flag              ( const PredictionUnit&         pu );
   void        smvd_mode              ( const PredictionUnit&         pu );
 
-  // pcm samples (clause 7.3.8.7)
-  void        pcm_samples               ( const TransformUnit&          tu );
 
   // transform tree (clause 7.3.8.8)
-  void        transform_tree            ( const CodingStructure&        cs,       Partitioner&      pm,     CUCtx& cuCtx, ChromaCbfs& chromaCbfs, const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 );
-  void        cbf_comp                  ( const CodingStructure&        cs,       bool              cbf,    const CompArea& area, unsigned depth, const bool prevCbCbf = false, const bool useISP = false );
+  void        transform_tree            ( const CodingStructure&        cs,       Partitioner&      pm,     CUCtx& cuCtx,                         const PartSplit ispType = TU_NO_ISP, const int subTuIdx = -1 );
+  void        cbf_comp                  ( const CodingStructure&        cs,       bool              cbf,    const CompArea& area, unsigned depth, const bool prevCbf = false, const bool useISP = false );
 
   // mvd coding (clause 7.3.8.9)
   void        mvd_coding                ( const Mv &rMvd, int8_t imv );
   // transform unit (clause 7.3.8.10)
-  void        transform_unit            ( const TransformUnit&          tu,       CUCtx&            cuCtx,  ChromaCbfs& chromaCbfs );
+  void        transform_unit            ( const TransformUnit&          tu,       CUCtx&            cuCtx,  Partitioner& pm,       const int subTuCounter = -1 );
   void        cu_qp_delta               ( const CodingUnit&             cu,       int               predQP, const int8_t qp );
   void        cu_chroma_qp_offset       ( const CodingUnit&             cu );
 
   // residual coding (clause 7.3.8.11)
-  void        residual_coding           ( const TransformUnit&          tu,       ComponentID       compID );
-  void        mts_coding                ( const TransformUnit&          tu,       ComponentID       compID );
+  void        residual_coding           ( const TransformUnit&          tu,       ComponentID       compID, CUCtx* cuCtx = nullptr );
+  void        ts_flag                   ( const TransformUnit&          tu,       ComponentID       compID );
+  void        mts_idx                   ( const CodingUnit&             cu,       CUCtx*            cuCtx  );
+  void        residual_lfnst_mode       ( const CodingUnit&             cu,       CUCtx&            cuCtx );
   void        isp_mode                  ( const CodingUnit&             cu );
   void        explicit_rdpcm_mode       ( const TransformUnit&          tu,       ComponentID       compID );
   void        last_sig_coeff            ( CoeffCodingContext&           cctx,     const TransformUnit& tu, ComponentID       compID );
   void        residual_coding_subblock  ( CoeffCodingContext&           cctx,     const TCoeff*     coeff, const int stateTransTable, int& state );
+  void        residual_codingTS         ( const TransformUnit&          tu,       ComponentID       compID );
+  void        residual_coding_subblockTS( CoeffCodingContext&           cctx,     const TCoeff*     coeff  );
+  void        joint_cb_cr               ( const TransformUnit&          tu,       const int cbfMask );
 
   // cross component prediction (clause 7.3.8.12)
   void        cross_comp_pred           ( const TransformUnit&          tu,       ComponentID       compID );
 
-  void        codeAlfCtuEnableFlags     ( CodingStructure& cs, ChannelType channel, AlfSliceParam* alfParam);
-  void        codeAlfCtuEnableFlags     ( CodingStructure& cs, ComponentID compID, AlfSliceParam* alfParam);
-  void        codeAlfCtuEnableFlag      ( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfSliceParam* alfParam = NULL );
+  void        codeAlfCtuEnableFlags     ( CodingStructure& cs, ChannelType channel, AlfParam* alfParam);
+  void        codeAlfCtuEnableFlags     ( CodingStructure& cs, ComponentID compID, AlfParam* alfParam);
+  void        codeAlfCtuEnableFlag      ( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfParam* alfParam );
+  void        codeAlfCtuFilterIndex(CodingStructure& cs, uint32_t ctuRsAddr, bool alfEnableLuma);
+
+  void        codeAlfCtuAlternatives     ( CodingStructure& cs, ChannelType channel, AlfParam* alfParam);
+  void        codeAlfCtuAlternatives     ( CodingStructure& cs, ComponentID compID, AlfParam* alfParam);
+  void        codeAlfCtuAlternative      ( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, const AlfParam* alfParam = NULL );
 
 private:
   void        unary_max_symbol          ( unsigned symbol, unsigned ctxId0, unsigned ctxIdN, unsigned maxSymbol );
   void        unary_max_eqprob          ( unsigned symbol,                                   unsigned maxSymbol );
   void        exp_golomb_eqprob         ( unsigned symbol, unsigned count );
+  void        code_unary_fixed          ( unsigned symbol, unsigned ctxId, unsigned unary_max, unsigned fixed );
 
   // statistic
   unsigned    get_num_written_bits()    { return m_BinEncoder.getNumWrittenBits(); }
 
   void  xWriteTruncBinCode(uint32_t uiSymbol, uint32_t uiMaxSymbol);
-
+  void        codeScanRotationModeFlag   ( const CodingUnit& cu,     ComponentID compBegin);
+  void        xEncodePLTPredIndicator    ( const CodingUnit& cu,     uint32_t    maxPltSize, ComponentID compBegin);
 private:
   BinEncIf&         m_BinEncoder;
   OutputBitstream*  m_Bitstream;
   Ctx               m_TestCtx;
   EncCu*            m_EncCu;
+  ScanElement*      m_scanOrder;
 };
 
 
diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt
index 89286b308f417c0aa6795129e38fa0612deba306..2a50346f2c45c4a57d7c73788df086a19645d7f0 100644
--- a/source/Lib/EncoderLib/CMakeLists.txt
+++ b/source/Lib/EncoderLib/CMakeLists.txt
@@ -20,6 +20,10 @@ if( EXTENSION_360_VIDEO )
   target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_360_VIDEO=1 )
 endif()
 
+if( EXTENSION_HDRTOOLS )
+  target_compile_definitions( ${LIB_NAME} PUBLIC EXTENSION_HDRTOOLS=1 )
+endif()
+
 if( SET_ENABLE_TRACING )
   if( ENABLE_TRACING )
     target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=1 )
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index 8259f17589ceca365198962e31c3c931e6ba0c66..6de3317a96841f45a070bbe482aac24164d9fed8 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -39,11 +39,370 @@
 #include "CommonLib/Picture.h"
 #include "CommonLib/CodingStructure.h"
 
-#define AlfCtx(c) SubCtx( Ctx::ctbAlfFlag, c )
+#define AlfCtx(c) SubCtx( Ctx::Alf, c)
 std::vector<double> EncAdaptiveLoopFilter::m_lumaLevelToWeightPLUT;
 
-EncAdaptiveLoopFilter::EncAdaptiveLoopFilter()
+void AlfCovariance::getClipMax(const AlfFilterShape& alfShape, int *clip_max) const
+{
+  for( int k = 0; k < numCoeff-1; ++k )
+  {
+    clip_max[k] = 0;
+
+    bool inc = true;
+    while( inc && clip_max[k]+1 < numBins && y[clip_max[k]+1][k] == y[clip_max[k]][k] )
+    {
+      for( int l = 0; inc && l < numCoeff; ++l )
+        if( E[clip_max[k]][0][k][l] != E[clip_max[k]+1][0][k][l] )
+        {
+          inc = false;
+        }
+      if( inc )
+      {
+        ++clip_max[k];
+      }
+    }
+  }
+  clip_max[numCoeff-1] = 0;
+}
+
+void AlfCovariance::reduceClipCost(const AlfFilterShape& alfShape, int *clip) const
+{
+  for( int k = 0; k < numCoeff-1; ++k )
+  {
+    bool dec = true;
+    while( dec && clip[k] > 0 && y[clip[k]-1][k] == y[clip[k]][k] )
+    {
+      for( int l = 0; dec && l < numCoeff; ++l )
+        if( E[clip[k]][clip[l]][k][l] != E[clip[k]-1][clip[l]][k][l] )
+        {
+          dec = false;
+        }
+      if( dec )
+      {
+        --clip[k];
+      }
+    }
+  }
+}
+
+double AlfCovariance::optimizeFilter(const AlfFilterShape& alfShape, int* clip, double *f, bool optimize_clip) const
+{
+  const int size = alfShape.numCoeff;
+  int clip_max[MAX_NUM_ALF_LUMA_COEFF];
+
+  double err_best, err_last;
+
+  TE kE;
+  Ty ky;
+
+  if( optimize_clip )
+  {
+    // Start by looking for min clipping that has no impact => max_clipping
+    getClipMax(alfShape, clip_max);
+    for (int k=0; k<size; ++k)
+    {
+      clip[k] = std::max(clip_max[k], clip[k]);
+      clip[k] = std::min(clip[k], numBins-1);
+    }
+  }
+
+  setEyFromClip( clip, kE, ky, size );
+
+  gnsSolveByChol( kE, ky, f, size );
+  err_best = calculateError( clip, f, size );
+
+  int step = optimize_clip ? (numBins+1)/2 : 0;
+
+  while( step > 0 )
+  {
+    double err_min = err_best;
+    int idx_min = -1;
+    int inc_min = 0;
+
+    for( int k = 0; k < size-1; ++k )
+    {
+      if( clip[k] - step >= clip_max[k] )
+      {
+        clip[k] -= step;
+        ky[k] = y[clip[k]][k];
+        for( int l = 0; l < size; l++ )
+        {
+          kE[k][l] = E[clip[k]][clip[l]][k][l];
+          kE[l][k] = E[clip[l]][clip[k]][l][k];
+        }
+
+        gnsSolveByChol( kE, ky, f, size );
+        err_last = calculateError( clip, f, size );
+
+        if( err_last < err_min )
+        {
+          err_min = err_last;
+          idx_min = k;
+          inc_min = -step;
+        }
+        clip[k] += step;
+      }
+      if( clip[k] + step < numBins )
+      {
+        clip[k] += step;
+        ky[k] = y[clip[k]][k];
+        for( int l = 0; l < size; l++ )
+        {
+          kE[k][l] = E[clip[k]][clip[l]][k][l];
+          kE[l][k] = E[clip[l]][clip[k]][l][k];
+        }
+
+        gnsSolveByChol( kE, ky, f, size );
+        err_last = calculateError( clip, f, size );
+
+        if( err_last < err_min )
+        {
+          err_min = err_last;
+          idx_min = k;
+          inc_min = step;
+        }
+        clip[k] -= step;
+
+      }
+      ky[k] = y[clip[k]][k];
+      for( int l = 0; l < size; l++ )
+      {
+        kE[k][l] = E[clip[k]][clip[l]][k][l];
+        kE[l][k] = E[clip[l]][clip[k]][l][k];
+      }
+    }
+
+    if( idx_min >= 0 )
+    {
+      err_best = err_min;
+      clip[idx_min] += inc_min;
+      ky[idx_min] = y[clip[idx_min]][idx_min];
+      for( int l = 0; l < size; l++ )
+      {
+        kE[idx_min][l] = E[clip[idx_min]][clip[l]][idx_min][l];
+        kE[l][idx_min] = E[clip[l]][clip[idx_min]][l][idx_min];
+      }
+    }
+    else
+    {
+      --step;
+    }
+  }
+
+  if( optimize_clip ) {
+    // test all max
+    for( int k = 0; k < size-1; ++k )
+    {
+      clip_max[k] = 0;
+    }
+    TE kE_max;
+    Ty ky_max;
+    setEyFromClip( clip_max, kE_max, ky_max, size );
+
+    gnsSolveByChol( kE_max, ky_max, f, size );
+    err_last = calculateError( clip_max, f, size );
+    if( err_last < err_best )
+    {
+      err_best = err_last;
+      for (int k=0; k<size; ++k)
+      {
+        clip[k] = clip_max[k];
+      }
+    }
+    else
+    {
+      // update clip to reduce coding cost
+      reduceClipCost(alfShape, clip);
+
+      // update f with best solution
+      gnsSolveByChol( kE, ky, f, size );
+    }
+  }
+
+  return err_best;
+}
+
+double AlfCovariance::calcErrorForCoeffs( const int *clip, const int *coeff, const int numCoeff, const int bitDepth ) const
+{
+  double factor = 1 << ( bitDepth - 1 );
+  double error = 0;
+
+  for( int i = 0; i < numCoeff; i++ )   //diagonal
+  {
+    double sum = 0;
+    for( int j = i + 1; j < numCoeff; j++ )
+    {
+      // E[j][i] = E[i][j], sum will be multiplied by 2 later
+      sum += E[clip[i]][clip[j]][i][j] * coeff[j];
+    }
+    error += ( ( E[clip[i]][clip[i]][i][i] * coeff[i] + sum * 2 ) / factor - 2 * y[clip[i]][i] ) * coeff[i];
+  }
+
+  return error / factor;
+}
+
+double AlfCovariance::calculateError( const int *clip, const double *coeff, const int numCoeff ) const
+{
+  double sum = 0;
+  for( int i = 0; i < numCoeff; i++ )
+  {
+    sum += coeff[i] * y[clip[i]][i];
+  }
+
+  return pixAcc - sum;
+}
+
+double AlfCovariance::calculateError( const int *clip ) const
+{
+  Ty c;
+
+  return optimizeFilter( clip, c, numCoeff );
+}
+//********************************
+// Cholesky decomposition
+//********************************
+
+#define ROUND(a)  (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5))
+#define REG              0.0001
+#define REG_SQR          0.0000001
+
+//Find filter coeff related
+int AlfCovariance::gnsCholeskyDec( TE inpMatr, TE outMatr, int numEq ) const
+{
+  Ty invDiag;  /* Vector of the inverse of diagonal entries of outMatr */
+
+  for( int i = 0; i < numEq; i++ )
+  {
+    for( int j = i; j < numEq; j++ )
+    {
+      /* Compute the scaling factor */
+      double scale = inpMatr[i][j];
+      if( i > 0 )
+      {
+        for( int k = i - 1; k >= 0; k-- )
+        {
+          scale -= outMatr[k][j] * outMatr[k][i];
+        }
+      }
+
+      /* Compute i'th row of outMatr */
+      if( i == j )
+      {
+        if( scale <= REG_SQR ) // if(scale <= 0 )  /* If inpMatr is singular */
+        {
+          return 0;
+        }
+        else              /* Normal operation */
+          invDiag[i] = 1.0 / ( outMatr[i][i] = sqrt( scale ) );
+      }
+      else
+      {
+        outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part          */
+        outMatr[j][i] = 0.0;              /* Lower triangular part set to 0 */
+      }
+    }
+  }
+  return 1; /* Signal that Cholesky factorization is successfully performed */
+}
+
+void AlfCovariance::gnsTransposeBacksubstitution( TE U, double* rhs, double* x, int order ) const
+{
+  /* Backsubstitution starts */
+  x[0] = rhs[0] / U[0][0];               /* First row of U'                   */
+  for( int i = 1; i < order; i++ )
+  {         /* For the rows 1..order-1           */
+
+    double sum = 0; //Holds backsubstitution from already handled rows
+
+    for( int j = 0; j < i; j++ ) /* Backsubst already solved unknowns */
+    {
+      sum += x[j] * U[j][i];
+    }
+
+    x[i] = ( rhs[i] - sum ) / U[i][i];       /* i'th component of solution vect.  */
+  }
+}
+
+void AlfCovariance::gnsBacksubstitution( TE R, double* z, int size, double* A ) const
+{
+  size--;
+  A[size] = z[size] / R[size][size];
+
+  for( int i = size - 1; i >= 0; i-- )
+  {
+    double sum = 0;
+
+    for( int j = i + 1; j <= size; j++ )
+    {
+      sum += R[i][j] * A[j];
+    }
+
+    A[i] = ( z[i] - sum ) / R[i][i];
+  }
+}
+
+int AlfCovariance::gnsSolveByChol( const int *clip, double *x, int numEq ) const
+{
+  TE LHS;
+  Ty rhs;
+
+  setEyFromClip( clip, LHS, rhs, numEq );
+  return gnsSolveByChol( LHS, rhs, x, numEq );
+}
+
+int AlfCovariance::gnsSolveByChol( TE LHS, double* rhs, double *x, int numEq ) const
+{
+  Ty aux;     /* Auxiliary vector */
+  TE U;    /* Upper triangular Cholesky factor of LHS */
+
+  int res = 1;  // Signal that Cholesky factorization is successfully performed
+
+                /* The equation to be solved is LHSx = rhs */
+
+                /* Compute upper triangular U such that U'*U = LHS */
+  if( gnsCholeskyDec( LHS, U, numEq ) ) /* If Cholesky decomposition has been successful */
+  {
+    /* Now, the equation is  U'*U*x = rhs, where U is upper triangular
+    * Solve U'*aux = rhs for aux
+    */
+    gnsTransposeBacksubstitution( U, rhs, aux, numEq );
+
+    /* The equation is now U*x = aux, solve it for x (new motion coefficients) */
+    gnsBacksubstitution( U, aux, numEq, x );
+
+  }
+  else /* LHS was singular */
+  {
+    res = 0;
+
+    /* Regularize LHS */
+    for( int i = 0; i < numEq; i++ )
+    {
+      LHS[i][i] += REG;
+    }
+
+    /* Compute upper triangular U such that U'*U = regularized LHS */
+    res = gnsCholeskyDec( LHS, U, numEq );
+
+    if( !res )
+    {
+      std::memset( x, 0, sizeof( double )*numEq );
+      return 0;
+    }
+
+    /* Solve  U'*aux = rhs for aux */
+    gnsTransposeBacksubstitution( U, rhs, aux, numEq );
+
+    /* Solve U*x = aux for x */
+    gnsBacksubstitution( U, aux, numEq, x );
+  }
+  return res;
+}
+//////////////////////////////////////////////////////////////////////////////////////////
+
+EncAdaptiveLoopFilter::EncAdaptiveLoopFilter( int& apsIdStart )
   : m_CABACEstimator( nullptr )
+  , m_apsIdStart( apsIdStart )
 {
   for( int i = 0; i < MAX_NUM_COMPONENT; i++ )
   {
@@ -53,21 +412,23 @@ EncAdaptiveLoopFilter::EncAdaptiveLoopFilter()
   {
     m_alfCovarianceFrame[i] = nullptr;
   }
-  m_filterCoeffQuant = nullptr;
   m_filterCoeffSet = nullptr;
+  m_filterClippSet = nullptr;
   m_diffFilterCoeff = nullptr;
 
   m_alfWSSD = 0;
 }
 
-void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] )
+void EncAdaptiveLoopFilter::create( const EncCfg* encCfg, const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] )
 {
   AdaptiveLoopFilter::create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxCUDepth, inputBitDepth );
+  CHECK( encCfg == nullptr, "encCfg must not be null" );
+  m_encCfg = encCfg;
 
   for( int channelIdx = 0; channelIdx < MAX_NUM_CHANNEL_TYPE; channelIdx++ )
   {
     ChannelType chType = (ChannelType)channelIdx;
-    int numClasses = channelIdx ? 1 : MAX_NUM_ALF_CLASSES;
+    int numClasses = channelIdx ? MAX_NUM_ALF_ALTERNATIVES_CHROMA : MAX_NUM_ALF_CLASSES;
     m_alfCovarianceFrame[chType] = new AlfCovariance*[m_filterShapes[chType].size()];
     for( int i = 0; i != m_filterShapes[chType].size(); i++ )
     {
@@ -82,6 +443,16 @@ void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, con
   for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
   {
     m_ctuEnableFlagTmp[compIdx] = new uint8_t[m_numCTUsInPic];
+    m_ctuEnableFlagTmp2[compIdx] = new uint8_t[m_numCTUsInPic];
+    if( isLuma( ComponentID(compIdx) ) )
+    {
+      m_ctuAlternativeTmp[compIdx] = nullptr;
+    }
+    else
+    {
+      m_ctuAlternativeTmp[compIdx] = new uint8_t[m_numCTUsInPic];
+      std::fill_n( m_ctuAlternativeTmp[compIdx], m_numCTUsInPic, 0 );
+    }
     ChannelType chType = toChannelType( ComponentID( compIdx ) );
     int numClasses = compIdx ? 1 : MAX_NUM_ALF_CLASSES;
 
@@ -103,25 +474,39 @@ void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, con
 
   for( int i = 0; i != m_filterShapes[COMPONENT_Y].size(); i++ )
   {
-    for( int j = 0; j <= MAX_NUM_ALF_CLASSES; j++ )
+    for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++)
     {
       m_alfCovarianceMerged[i][j].create( m_filterShapes[COMPONENT_Y][i].numCoeff );
     }
   }
 
-  m_filterCoeffQuant = new int[MAX_NUM_ALF_LUMA_COEFF];
-  m_filterCoeffSet = new int*[MAX_NUM_ALF_CLASSES];
+  m_filterCoeffSet = new int*[std::max(MAX_NUM_ALF_CLASSES, MAX_NUM_ALF_ALTERNATIVES_CHROMA)];
+  m_filterClippSet = new int*[std::max(MAX_NUM_ALF_CLASSES, MAX_NUM_ALF_ALTERNATIVES_CHROMA)];
   m_diffFilterCoeff = new int*[MAX_NUM_ALF_CLASSES];
 
   for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
   {
     m_filterCoeffSet[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
+    m_filterClippSet[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
     m_diffFilterCoeff[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
   }
+
+
+  m_ctbDistortionFixedFilter = new double[m_numCTUsInPic];
+  for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++)
+  {
+    m_ctbDistortionUnfilter[comp] = new double[m_numCTUsInPic];
+  }
+  m_alfCtbFilterSetIndexTmp.resize(m_numCTUsInPic);
+  memset(m_clipDefaultEnc, 0, sizeof(m_clipDefaultEnc));
 }
 
 void EncAdaptiveLoopFilter::destroy()
 {
+  if (!m_created)
+  {
+    return;
+  }
   for( int channelIdx = 0; channelIdx < MAX_NUM_CHANNEL_TYPE; channelIdx++ )
   {
     if( m_alfCovarianceFrame[channelIdx] )
@@ -150,12 +535,24 @@ void EncAdaptiveLoopFilter::destroy()
       m_ctuEnableFlagTmp[compIdx] = nullptr;
     }
 
-    if( m_alfCovariance[compIdx] )
+    if( m_ctuEnableFlagTmp2[compIdx] )
     {
-      ChannelType chType = toChannelType( ComponentID( compIdx ) );
-      int numClasses = compIdx ? 1 : MAX_NUM_ALF_CLASSES;
+      delete[] m_ctuEnableFlagTmp2[compIdx];
+      m_ctuEnableFlagTmp2[compIdx] = nullptr;
+    }
 
-      for( int i = 0; i != m_filterShapes[chType].size(); i++ )
+    if( m_ctuAlternativeTmp[compIdx] )
+    {
+      delete[] m_ctuAlternativeTmp[compIdx];
+      m_ctuAlternativeTmp[compIdx] = nullptr;
+    }
+
+    if( m_alfCovariance[compIdx] )
+    {
+      ChannelType chType = toChannelType( ComponentID( compIdx ) );
+      int numClasses = compIdx ? 1 : MAX_NUM_ALF_CLASSES;
+
+      for( int i = 0; i != m_filterShapes[chType].size(); i++ )
       {
         for( int j = 0; j < m_numCTUsInPic; j++ )
         {
@@ -178,7 +575,7 @@ void EncAdaptiveLoopFilter::destroy()
 
   for( int i = 0; i != m_filterShapes[COMPONENT_Y].size(); i++ )
   {
-    for( int j = 0; j <= MAX_NUM_ALF_CLASSES; j++ )
+    for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++)
     {
       m_alfCovarianceMerged[i][j].destroy();
     }
@@ -195,6 +592,17 @@ void EncAdaptiveLoopFilter::destroy()
     m_filterCoeffSet = nullptr;
   }
 
+  if( m_filterClippSet )
+  {
+    for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
+    {
+      delete[] m_filterClippSet[i];
+      m_filterClippSet[i] = nullptr;
+    }
+    delete[] m_filterClippSet;
+    m_filterClippSet = nullptr;
+  }
+
   if( m_diffFilterCoeff )
   {
     for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
@@ -206,28 +614,57 @@ void EncAdaptiveLoopFilter::destroy()
     m_diffFilterCoeff = nullptr;
   }
 
-  delete[] m_filterCoeffQuant;
-  m_filterCoeffQuant = nullptr;
 
+  delete[] m_ctbDistortionFixedFilter;
+  m_ctbDistortionFixedFilter = nullptr;
+  for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++)
+  {
+    delete[] m_ctbDistortionUnfilter[comp];
+    m_ctbDistortionUnfilter[comp] = nullptr;
+  }
   AdaptiveLoopFilter::destroy();
 }
-
-void EncAdaptiveLoopFilter::initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice )
+void EncAdaptiveLoopFilter::initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice
+, ParameterSetMap<APS>* apsMap )
 {
+  m_apsMap = apsMap;
   m_CABACEstimator = cabacEncoder->getCABACEstimator( pcSlice->getSPS() );
   m_CtxCache = ctxCache;
   m_CABACEstimator->initCtxModels( *pcSlice );
   m_CABACEstimator->resetBits();
 }
 
-void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambdas,
+void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambdas
 #if ENABLE_QPA
-                                        const double lambdaChromaWeight,
+                                       , const double lambdaChromaWeight
 #endif
-                                        AlfSliceParam& alfSliceParam )
+                                      )
 {
+  int layerIdx = cs.vps == nullptr ? 0 : cs.vps->getGeneralLayerIdx( cs.slice->getPic()->layerId );
+
+   // IRAP AU is assumed
+  if( !layerIdx && ( cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() ) )
+  {
+    memset(cs.slice->getAlfAPSs(), 0, sizeof(*cs.slice->getAlfAPSs())*ALF_CTB_MAX_NUM_APS);
+    m_apsIdStart = ALF_CTB_MAX_NUM_APS;
+
+    m_apsMap->clear();
+    for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
+    {
+      APS* alfAPS = m_apsMap->getPS((i << NUM_APS_TYPE_LEN) + ALF_APS);
+      m_apsMap->clearChangedFlag((i << NUM_APS_TYPE_LEN) + ALF_APS);
+      if (alfAPS)
+      {
+        alfAPS->getAlfAPSParam().reset();
+        alfAPS = nullptr;
+      }
+    }
+  }
+  AlfParam alfParam;
+  alfParam.reset();
+  const TempCtx  ctxStart(m_CtxCache, AlfCtx(m_CABACEstimator->getCtx()));
   // set available filter shapes
-  alfSliceParam.filterShapes = m_filterShapes;
+  alfParam.filterShapes = m_filterShapes;
 
   // set clipping range
   m_clpRngs = cs.slice->getClpRngs();
@@ -236,10 +673,11 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd
   for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
   {
     m_ctuEnableFlag[compIdx] = cs.picture->getAlfCtuEnableFlag( compIdx );
+    m_ctuAlternative[compIdx] = cs.picture->getAlfCtuAlternativeData( compIdx );
   }
 
   // reset ALF parameters
-  alfSliceParam.reset();
+  alfParam.reset();
   int shiftLuma = 2 * DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_LUMA]);
   int shiftChroma = 2 * DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_CHROMA]);
   m_lambda[COMPONENT_Y] = lambdas[COMPONENT_Y] * double(1 << shiftLuma);
@@ -254,30 +692,110 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd
 
   // derive classification
   const CPelBuf& recLuma = recYuv.get( COMPONENT_Y );
-  Area blk( 0, 0, recLuma.width, recLuma.height );
-  deriveClassification( m_classifier, recLuma, blk );
-  Area blkPCM(0, 0, recLuma.width, recLuma.height);
-  resetPCMBlkClassInfo(cs, m_classifier, recLuma, blkPCM);
+  const PreCalcValues& pcv = *cs.pcv;
+  bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false;
+  int numHorVirBndry = 0, numVerVirBndry = 0;
+  int horVirBndryPos[] = { 0, 0, 0 };
+  int verVirBndryPos[] = { 0, 0, 0 };
+
+  for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
+  {
+    for( int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth )
+    {
+      const int width = ( xPos + pcv.maxCUWidth > pcv.lumaWidth ) ? ( pcv.lumaWidth - xPos ) : pcv.maxCUWidth;
+      const int height = ( yPos + pcv.maxCUHeight > pcv.lumaHeight ) ? ( pcv.lumaHeight - yPos ) : pcv.maxCUHeight;
+      int rasterSliceAlfPad = 0;
+      if (isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) )
+      {
+        int yStart = yPos;
+        for( int i = 0; i <= numHorVirBndry; i++ )
+        {
+          const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i];
+          const int h = yEnd - yStart;
+          const bool clipT = ( i == 0 && clipTop ) || ( i > 0 ) || ( yStart == 0 );
+          const bool clipB = ( i == numHorVirBndry && clipBottom ) || ( i < numHorVirBndry ) || ( yEnd == pcv.lumaHeight );
+          int xStart = xPos;
+          for( int j = 0; j <= numVerVirBndry; j++ )
+          {
+            const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j];
+            const int w = xEnd - xStart;
+            const bool clipL = ( j == 0 && clipLeft ) || ( j > 0 ) || ( xStart == 0 );
+            const bool clipR = ( j == numVerVirBndry && clipRight ) || ( j < numVerVirBndry ) || ( xEnd == pcv.lumaWidth );
+            const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE);
+            const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE);
+            PelUnitBuf buf = m_tempBuf2.subBuf( UnitArea( cs.area.chromaFormat, Area( 0, 0, wBuf, hBuf ) ) );
+            buf.copyFrom( recYuv.subBuf( UnitArea( cs.area.chromaFormat, Area( xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf ) ) ) );
+            // pad top-left unavailable samples for raster slice
+            if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) )
+            {
+              buf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 );
+            }
+
+            // pad bottom-right unavailable samples for raster slice
+            if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) )
+            {
+              buf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 );
+            }
+            buf.extendBorderPel( MAX_ALF_PADDING_SIZE );
+            buf = buf.subBuf( UnitArea ( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
+
+            const Area blkSrc( 0, 0, w, h );
+            const Area blkDst( xStart, yStart, w, h );
+            deriveClassification( m_classifier, buf.get(COMPONENT_Y), blkDst, blkSrc );
+
+            xStart = xEnd;
+          }
+
+          yStart = yEnd;
+        }
+      }
+      else
+      {
+        Area blk( xPos, yPos, width, height );
+        deriveClassification( m_classifier, recLuma, blk, blk );
+      }
+    }
+  }
 
   // get CTB stats for filtering
-  deriveStatsForFiltering( orgYuv, recYuv );
+  deriveStatsForFiltering( orgYuv, recYuv, cs );
 
+  for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+  {
+    cs.slice->getPic()->getAlfCtbFilterIndex()[ctbIdx] = NUM_FIXED_FILTER_SETS;
+  }
+  // consider using new filter (only)
+  alfParam.newFilterFlag[CHANNEL_TYPE_LUMA] = true;
+  alfParam.newFilterFlag[CHANNEL_TYPE_CHROMA] = true;
+  cs.slice->setTileGroupNumAps(1); // Only new filter for RD cost optimization
   // derive filter (luma)
-  alfEncoder( cs, alfSliceParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_LUMA
+  alfEncoder( cs, alfParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_LUMA
 #if ENABLE_QPA
             , lambdaChromaWeight
 #endif
             );
 
   // derive filter (chroma)
-  if( alfSliceParam.enabledFlag[COMPONENT_Y] )
   {
-    alfEncoder( cs, alfSliceParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_CHROMA
+    alfEncoder( cs, alfParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_CHROMA
 #if ENABLE_QPA
               , lambdaChromaWeight
 #endif
               );
   }
+
+  // let alfEncoderCtb decide now
+  alfParam.newFilterFlag[CHANNEL_TYPE_LUMA] = false;
+  alfParam.newFilterFlag[CHANNEL_TYPE_CHROMA] = false;
+  cs.slice->setTileGroupNumAps(0);
+  m_CABACEstimator->getCtx() = AlfCtx(ctxStart);
+  alfEncoderCtb(cs, alfParam
+#if ENABLE_QPA
+    , lambdaChromaWeight
+#endif
+  );
+
+  alfReconstructor(cs, recYuv);
 }
 
 double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, const int iShapeIdx, ChannelType channel,
@@ -288,41 +806,97 @@ double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, cons
 {
   TempCtx        ctxTempStart( m_CtxCache );
   TempCtx        ctxTempBest( m_CtxCache );
+  TempCtx        ctxTempAltStart( m_CtxCache );
+  TempCtx        ctxTempAltBest( m_CtxCache );
   const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb;
   const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr;
+  const int numAlts = isLuma( channel ) ? 1 : m_alfParamTemp.numAlternativesChroma;
 
   double cost = 0;
   distUnfilter = 0;
 
-  setEnableFlag(m_alfSliceParamTemp, channel, true);
+  setEnableFlag(m_alfParamTemp, channel, true);
 #if ENABLE_QPA
-  CHECK ((chromaWeight > 0.0) && (cs.slice->getSliceCurStartCtuTsAddr() != 0), "incompatible start CTU address, must be 0");
+  CHECK ((chromaWeight > 0.0) && (cs.slice->getFirstCtuRsAddrInSlice() != 0), "incompatible start CTU address, must be 0");
 #endif
 
+  reconstructCoeff(m_alfParamTemp, channel, true, isLuma(channel));
+  for( int altIdx = 0; altIdx < (isLuma(channel) ? 1 : MAX_NUM_ALF_ALTERNATIVES_CHROMA); altIdx++)
+  {
+    for (int classIdx = 0; classIdx < (isLuma(channel) ? MAX_NUM_ALF_CLASSES : 1); classIdx++)
+    {
+      for (int i = 0; i < (isLuma(channel) ? MAX_NUM_ALF_LUMA_COEFF : MAX_NUM_ALF_CHROMA_COEFF); i++)
+      {
+        m_filterCoeffSet[isLuma(channel) ? classIdx : altIdx][i] = isLuma(channel) ? m_coeffFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + i] : m_chromaCoeffFinal[altIdx][i];
+        m_filterClippSet[isLuma(channel) ? classIdx : altIdx][i] = isLuma(channel) ? m_clippFinal[classIdx * MAX_NUM_ALF_LUMA_COEFF + i] : m_chromaClippFinal[altIdx][i];
+      }
+    }
+  }
+
   for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ )
   {
     for( int compID = compIDFirst; compID <= compIDLast; compID++ )
     {
-      double distUnfilterCtu = getUnfilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses );
-
-      ctxTempStart = AlfCtx( m_CABACEstimator->getCtx() );
-      m_CABACEstimator->resetBits();
-      m_ctuEnableFlag[compID][ctuIdx] = 1;
-      m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfSliceParamTemp );
-      double costOn = distUnfilterCtu + getFilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses, m_alfSliceParamTemp.numLumaFilters - 1, numCoeff );
 #if ENABLE_QPA
       const double ctuLambda = chromaWeight > 0.0 ? (isLuma (channel) ? cs.picture->m_uEnerHpCtu[ctuIdx] : cs.picture->m_uEnerHpCtu[ctuIdx] / chromaWeight) : m_lambda[compID];
 #else
       const double ctuLambda = m_lambda[compID];
 #endif
-      costOn += ctuLambda * FracBitsScale*(double)m_CABACEstimator->getEstFracBits();
+
+      double distUnfilterCtu = getUnfilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses );
+
+      ctxTempStart = AlfCtx( m_CABACEstimator->getCtx() );
+      m_CABACEstimator->resetBits();
+      m_ctuEnableFlag[compID][ctuIdx] = 1;
+      m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfParamTemp );
+      if( isLuma( channel ) )
+      {
+        // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation
+        assert( cs.slice->getPic()->getAlfCtbFilterIndex()[ctuIdx] == NUM_FIXED_FILTER_SETS );
+        assert( cs.slice->getTileGroupNumAps() == 1 );
+        m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctuIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]);
+      }
+      double costOn = distUnfilterCtu + ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+
       ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() );
+      if( isLuma( channel ) )
+      {
+        costOn += getFilteredDistortion( m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses, m_alfParamTemp.numLumaFilters - 1, numCoeff );
+      }
+      else
+      {
+        double bestAltCost = MAX_DOUBLE;
+        int bestAltIdx = -1;
+        ctxTempAltStart = AlfCtx( ctxTempBest );
+        for( int altIdx = 0; altIdx < numAlts; ++altIdx )
+        {
+          if( altIdx )
+            m_CABACEstimator->getCtx() = AlfCtx( ctxTempAltStart );
+          m_CABACEstimator->resetBits();
+          m_ctuAlternative[compID][ctuIdx] = altIdx;
+          m_CABACEstimator->codeAlfCtuAlternative( cs, ctuIdx, compID, &m_alfParamTemp );
+          double r_altCost = ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+
+          double altDist = 0.;
+          altDist += m_alfCovariance[compID][iShapeIdx][ctuIdx][0].calcErrorForCoeffs(  m_filterClippSet[altIdx], m_filterCoeffSet[altIdx], numCoeff, m_NUM_BITS );
+
+          double altCost = altDist + r_altCost;
+          if( altCost < bestAltCost )
+          {
+            bestAltCost = altCost;
+            bestAltIdx = altIdx;
+            ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() );
+          }
+        }
+        m_ctuAlternative[compID][ctuIdx] = bestAltIdx;
+        costOn += bestAltCost;
+      }
 
       m_CABACEstimator->getCtx() = AlfCtx( ctxTempStart );
       m_CABACEstimator->resetBits();
       m_ctuEnableFlag[compID][ctuIdx] = 0;
-      m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfSliceParamTemp);
-      double costOff = distUnfilterCtu + ctuLambda * FracBitsScale*(double)m_CABACEstimator->getEstFracBits();
+      m_CABACEstimator->codeAlfCtuEnableFlag( cs, ctuIdx, compID, &m_alfParamTemp);
+      double costOff = distUnfilterCtu + ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
 
       if( costOn < costOff )
       {
@@ -341,15 +915,13 @@ double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, cons
 
   if( isChroma( channel ) )
   {
-    setEnableFlag(m_alfSliceParamTemp, channel, m_ctuEnableFlag);
-    const int alfChromaIdc = m_alfSliceParamTemp.enabledFlag[COMPONENT_Cb] * 2 + m_alfSliceParamTemp.enabledFlag[COMPONENT_Cr];
-    cost += lengthTruncatedUnary(alfChromaIdc, 3) * m_lambda[channel];
+    setEnableFlag(m_alfParamTemp, channel, m_ctuEnableFlag);
   }
 
   return cost;
 }
 
-void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfSliceParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel
+void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfParam& alfParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel
 #if ENABLE_QPA
                                       , const double lambdaChromaWeight // = 0.0
 #endif
@@ -360,134 +932,145 @@ void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfS
 
   double costMin = MAX_DOUBLE;
 
-  std::vector<AlfFilterShape>& alfFilterShape = alfSliceParam.filterShapes[channel];
-  const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb;
-  const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr;
+  std::vector<AlfFilterShape>& alfFilterShape = alfParam.filterShapes[channel];
+  m_bitsNewFilter[channel] = 0;
   const int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1;
   int uiCoeffBits = 0;
 
   for( int iShapeIdx = 0; iShapeIdx < alfFilterShape.size(); iShapeIdx++ )
   {
-    m_alfSliceParamTemp = alfSliceParam;
+    m_alfParamTemp = alfParam;
     //1. get unfiltered distortion
+    if( isChroma(channel) )
+      m_alfParamTemp.numAlternativesChroma = 1;
     double cost = getUnfilteredDistortion( m_alfCovarianceFrame[channel][iShapeIdx], channel );
     cost /= 1.001; // slight preference for unfiltered choice
 
     if( cost < costMin )
     {
       costMin = cost;
-      setEnableFlag( alfSliceParam, channel, false );
+      setEnableFlag( alfParam, channel, false );
       // no CABAC signalling
       ctxBest = AlfCtx( ctxStart );
       setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 0 );
+      if( isChroma(channel) )
+        setCtuAlternativeChroma( m_ctuAlternativeTmp, 0 );
     }
 
-    //2. all CTUs are on
-    setEnableFlag( m_alfSliceParamTemp, channel, true );
-    m_CABACEstimator->getCtx() = AlfCtx( ctxStart );
-    setCtuEnableFlag( m_ctuEnableFlag, channel, 1 );
-    cost = getFilterCoeffAndCost( cs, 0, channel, false, iShapeIdx, uiCoeffBits );
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+     const int nonLinearFlagMax =
+      ( isLuma( channel ) ? m_encCfg->getUseNonLinearAlfLuma() : m_encCfg->getUseNonLinearAlfChroma()) // For Chroma non linear flag is check for each alternative filter
+      ? 2 : 1;
+#else
+    const int nonLinearFlagMax =
+      ( isLuma( channel ) ? m_encCfg->getUseNonLinearAlfLuma() : 0 ) // For Chroma non linear flag is check for each alternative filter
+      ? 2 : 1;
+#endif
 
-    if( cost < costMin )
+    for( int nonLinearFlag = 0; nonLinearFlag < nonLinearFlagMax; nonLinearFlag++ )
     {
-      costMin = cost;
-      copyAlfSliceParam( alfSliceParam, m_alfSliceParamTemp, channel );
-      ctxBest = AlfCtx( m_CABACEstimator->getCtx() );
-      setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 1 );
-    }
-
-    //3. CTU decision
-    double distUnfilter = 0;
-    const int iterNum = isLuma(channel) ? (2 * 4 + 1) : (2 * 2 + 1);
-
-    for( int iter = 0; iter < iterNum; iter++ )
+    for( int numAlternatives = isLuma( channel ) ? 1 : getMaxNumAlternativesChroma(); numAlternatives > 0; numAlternatives-- )
     {
-      if ((iter & 0x01) == 0)
-      {
-        m_CABACEstimator->getCtx() = AlfCtx(ctxStart);
-        cost = m_lambda[channel] * uiCoeffBits;
-        cost += deriveCtbAlfEnableFlags(cs, iShapeIdx, channel,
-#if ENABLE_QPA
-                                        lambdaChromaWeight,
+      if( isChroma( channel ) )
+        m_alfParamTemp.numAlternativesChroma = numAlternatives;
+      //2. all CTUs are on
+      setEnableFlag( m_alfParamTemp, channel, true );
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+      m_alfParamTemp.nonLinearFlag[channel] = nonLinearFlag;
+#else
+      if (isLuma(channel))
+        m_alfParamTemp.nonLinearFlag[channel][0] = nonLinearFlag;
 #endif
-                                        numClasses, alfFilterShape[iShapeIdx].numCoeff, distUnfilter);
-        if (cost < costMin)
-        {
-          costMin = cost;
-          ctxBest = AlfCtx(m_CABACEstimator->getCtx());
-          copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, channel);
-          copyAlfSliceParam(alfSliceParam, m_alfSliceParamTemp, channel);
-        }
-      }
-      else
+      m_CABACEstimator->getCtx() = AlfCtx( ctxStart );
+      setCtuEnableFlag( m_ctuEnableFlag, channel, 1 );
+      // all alternatives are on
+      if( isChroma( channel ) )
+        initCtuAlternativeChroma( m_ctuAlternative );
+      cost = getFilterCoeffAndCost( cs, 0, channel, true, iShapeIdx, uiCoeffBits );
+
+      if( cost < costMin )
       {
-        // unfiltered distortion is added due to some CTBs may not use filter
-        cost = getFilterCoeffAndCost(cs, distUnfilter, channel, true, iShapeIdx, uiCoeffBits);
+        m_bitsNewFilter[channel] = uiCoeffBits;
+        costMin = cost;
+        copyAlfParam( alfParam, m_alfParamTemp, channel );
+        ctxBest = AlfCtx( m_CABACEstimator->getCtx() );
+        setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 1 );
+        if( isChroma(channel) )
+          copyCtuAlternativeChroma( m_ctuAlternativeTmp, m_ctuAlternative );
       }
-    }//for iter
-  }//for shapeIdx
-  m_CABACEstimator->getCtx() = AlfCtx( ctxBest );
-  copyCtuEnableFlag( m_ctuEnableFlag, m_ctuEnableFlagTmp, channel );
-
-  //filtering
-  reconstructCoeff( alfSliceParam, channel, isLuma( channel ) );
 
-  for( int compIdx = compIDFirst; compIdx <= compIDLast; compIdx++ )
-  {
-    ComponentID compID = (ComponentID)compIdx;
-    if( alfSliceParam.enabledFlag[compID] )
-    {
-      const PreCalcValues& pcv = *cs.pcv;
-      int ctuIdx = 0;
-      const int chromaScaleX = getComponentScaleX( compID, recBuf.chromaFormat );
-      const int chromaScaleY = getComponentScaleY( compID, recBuf.chromaFormat );
-      AlfFilterType filterType = isLuma( compID ) ? ALF_FILTER_7 : ALF_FILTER_5;
-      short* coeff = isLuma( compID ) ? m_coeffFinal : alfSliceParam.chromaCoeff;
+      //3. CTU decision
+      double distUnfilter = 0;
+      double prevItCost = MAX_DOUBLE;
+      const int iterNum = isLuma(channel) ? (2 * 4 + 1) : (2 * (2 + m_alfParamTemp.numAlternativesChroma - 1) + 1);
 
-      for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
+      for( int iter = 0; iter < iterNum; iter++ )
       {
-        for( int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth )
+        if ((iter & 0x01) == 0)
         {
-          const int width = ( xPos + pcv.maxCUWidth > pcv.lumaWidth ) ? ( pcv.lumaWidth - xPos ) : pcv.maxCUWidth;
-          const int height = ( yPos + pcv.maxCUHeight > pcv.lumaHeight ) ? ( pcv.lumaHeight - yPos ) : pcv.maxCUHeight;
-          Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY );
-
-          if( m_ctuEnableFlag[compID][ctuIdx] )
+          m_CABACEstimator->getCtx() = AlfCtx(ctxStart);
+          cost = m_lambda[channel] * uiCoeffBits;
+          cost += deriveCtbAlfEnableFlags(cs, iShapeIdx, channel,
+#if ENABLE_QPA
+                                          lambdaChromaWeight,
+#endif
+                                          numClasses, alfFilterShape[iShapeIdx].numCoeff, distUnfilter);
+          if (cost < costMin)
           {
-            if( filterType == ALF_FILTER_5 )
-            {
-              m_filter5x5Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, m_clpRngs.comp[compIdx], cs );
-            }
-            else if( filterType == ALF_FILTER_7 )
-            {
-              m_filter7x7Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, m_clpRngs.comp[compIdx], cs );
-           }
-            else
-            {
-              CHECK( 0, "Wrong ALF filter type" );
-            }
+            m_bitsNewFilter[channel] = uiCoeffBits;
+            costMin = cost;
+            ctxBest = AlfCtx(m_CABACEstimator->getCtx());
+            copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, channel);
+            if( isChroma(channel) )
+              copyCtuAlternativeChroma( m_ctuAlternativeTmp, m_ctuAlternative );
+            copyAlfParam(alfParam, m_alfParamTemp, channel);
+          }
+          else if ( cost >= prevItCost  )
+          {
+            // High probability that we have converged or we are diverging
+            break;
           }
-          ctuIdx++;
+          prevItCost = cost;
         }
-      }
+        else
+        {
+          // unfiltered distortion is added due to some CTBs may not use filter
+          // no need to reset CABAC here, since uiCoeffBits is not affected
+          /*cost = */getFilterCoeffAndCost( cs, distUnfilter, channel, true, iShapeIdx, uiCoeffBits );
+        }
+      }//for iter
+      // Decrease number of alternatives and reset ctu params and filters
     }
-  }
+    }// for nonLineaFlag
+  }//for shapeIdx
+  m_CABACEstimator->getCtx() = AlfCtx( ctxBest );
+  if( isChroma(channel) )
+    copyCtuAlternativeChroma( m_ctuAlternative, m_ctuAlternativeTmp );
+  copyCtuEnableFlag( m_ctuEnableFlag, m_ctuEnableFlagTmp, channel );
 }
 
-void EncAdaptiveLoopFilter::copyAlfSliceParam( AlfSliceParam& alfSliceParamDst, AlfSliceParam& alfSliceParamSrc, ChannelType channel )
+void EncAdaptiveLoopFilter::copyAlfParam( AlfParam& alfParamDst, AlfParam& alfParamSrc, ChannelType channel )
 {
   if( isLuma( channel ) )
   {
-    memcpy( &alfSliceParamDst, &alfSliceParamSrc, sizeof( AlfSliceParam ) );
+    memcpy( &alfParamDst, &alfParamSrc, sizeof( AlfParam ) );
   }
   else
   {
-    alfSliceParamDst.enabledFlag[COMPONENT_Cb] = alfSliceParamSrc.enabledFlag[COMPONENT_Cb];
-    alfSliceParamDst.enabledFlag[COMPONENT_Cr] = alfSliceParamSrc.enabledFlag[COMPONENT_Cr];
-    memcpy( alfSliceParamDst.chromaCoeff, alfSliceParamSrc.chromaCoeff, sizeof( alfSliceParamDst.chromaCoeff ) );
+    alfParamDst.enabledFlag[COMPONENT_Cb] = alfParamSrc.enabledFlag[COMPONENT_Cb];
+    alfParamDst.enabledFlag[COMPONENT_Cr] = alfParamSrc.enabledFlag[COMPONENT_Cr];
+    alfParamDst.numAlternativesChroma = alfParamSrc.numAlternativesChroma;
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    alfParamDst.nonLinearFlag[CHANNEL_TYPE_CHROMA] = alfParamSrc.nonLinearFlag[CHANNEL_TYPE_CHROMA];
+#else
+    memcpy( alfParamDst.nonLinearFlag[CHANNEL_TYPE_CHROMA], alfParamSrc.nonLinearFlag[CHANNEL_TYPE_CHROMA], sizeof( alfParamDst.nonLinearFlag[CHANNEL_TYPE_CHROMA] ) );
+#endif
+    memcpy( alfParamDst.chromaCoeff, alfParamSrc.chromaCoeff, sizeof( alfParamDst.chromaCoeff ) );
+    memcpy( alfParamDst.chromaClipp, alfParamSrc.chromaClipp, sizeof( alfParamDst.chromaClipp ) );
   }
 }
-double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits )
+
+double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits, bool onlyFilterCost )
 {
   //collect stat based on CTU decision
   if( bReCollectStat )
@@ -497,108 +1080,119 @@ double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double
 
   double dist = distUnfilter;
   uiCoeffBits = 0;
-  int uiSliceFlag = 0;
-  AlfFilterShape& alfFilterShape = m_alfSliceParamTemp.filterShapes[channel][iShapeIdx];
+  AlfFilterShape& alfFilterShape = m_alfParamTemp.filterShapes[channel][iShapeIdx];
   //get filter coeff
   if( isLuma( channel ) )
   {
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    std::fill_n(m_alfClipMerged[iShapeIdx][0][0], MAX_NUM_ALF_LUMA_COEFF*MAX_NUM_ALF_CLASSES*MAX_NUM_ALF_CLASSES, m_alfParamTemp.nonLinearFlag[channel] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0);
+#else
+    std::fill_n(m_alfClipMerged[iShapeIdx][0][0], MAX_NUM_ALF_LUMA_COEFF*MAX_NUM_ALF_CLASSES*MAX_NUM_ALF_CLASSES, m_alfParamTemp.nonLinearFlag[channel][0] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0);
+#endif
+    // Reset Merge Tmp Cov
+    m_alfCovarianceMerged[iShapeIdx][MAX_NUM_ALF_CLASSES].reset(AlfNumClippingValues[channel]);
+    m_alfCovarianceMerged[iShapeIdx][MAX_NUM_ALF_CLASSES + 1].reset(AlfNumClippingValues[channel]);
     //distortion
-    dist += mergeFiltersAndCost( m_alfSliceParamTemp, alfFilterShape, m_alfCovarianceFrame[channel][iShapeIdx], m_alfCovarianceMerged[iShapeIdx], uiCoeffBits );
+    dist += mergeFiltersAndCost( m_alfParamTemp, alfFilterShape, m_alfCovarianceFrame[channel][iShapeIdx], m_alfCovarianceMerged[iShapeIdx], m_alfClipMerged[iShapeIdx], uiCoeffBits );
   }
   else
   {
     //distortion
-    dist += m_alfCovarianceFrame[channel][iShapeIdx][0].pixAcc + deriveCoeffQuant( m_filterCoeffQuant, m_alfCovarianceFrame[channel][iShapeIdx][0].E, m_alfCovarianceFrame[channel][iShapeIdx][0].y, alfFilterShape.numCoeff, alfFilterShape.weights, m_NUM_BITS, true );
-    memcpy( m_filterCoeffSet[0], m_filterCoeffQuant, sizeof( *m_filterCoeffQuant ) * alfFilterShape.numCoeff );
-    //setEnableFlag( m_alfSliceParamTemp, channel, m_ctuEnableFlag );
-    const int alfChromaIdc = m_alfSliceParamTemp.enabledFlag[COMPONENT_Cb] * 2 + m_alfSliceParamTemp.enabledFlag[COMPONENT_Cr];
-    for( int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++ )
-    {
-      m_alfSliceParamTemp.chromaCoeff[i] = m_filterCoeffQuant[i];
-    }
-    uiCoeffBits += getCoeffRate( m_alfSliceParamTemp, true );
-    uiSliceFlag = lengthTruncatedUnary(alfChromaIdc, 3);
-  }
-
-  double rate = uiCoeffBits + uiSliceFlag;
-  m_CABACEstimator->resetBits();
-  m_CABACEstimator->codeAlfCtuEnableFlags( cs, channel, &m_alfSliceParamTemp);
-  rate += FracBitsScale * (double)m_CABACEstimator->getEstFracBits();
-  return dist + m_lambda[channel] * rate;
-}
-
-int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isChroma )
-{
-  int iBits = 0;
-  if( !isChroma )
-  {
-    iBits++;                                               // alf_coefficients_delta_flag
-    if( !alfSliceParam.alfLumaCoeffDeltaFlag )
+    for( int altIdx = 0; altIdx < m_alfParamTemp.numAlternativesChroma; ++altIdx )
     {
-      if( alfSliceParam.numLumaFilters > 1 )
-      {
-        iBits++;                                           // coeff_delta_pred_mode_flag
-      }
-    }
-  }
+      assert(alfFilterShape.numCoeff == m_alfCovarianceFrame[channel][iShapeIdx][altIdx].numCoeff);
+      AlfParam bestSliceParam;
+      double bestCost = MAX_DOUBLE;
+      double bestDist = MAX_DOUBLE;
+      int bestCoeffBits = 0;
+      const int nonLinearFlagMax = m_encCfg->getUseNonLinearAlfChroma() ? 2 : 1;
 
-  memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
-  AlfFilterShape alfShape( isChroma ? 5 : 7 );
-  const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType );
-  const short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff;
-  const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters;
-
-  // vlc for all
-  for( int ind = 0; ind < numFilters; ++ind )
-  {
-    if( isChroma || !alfSliceParam.alfLumaCoeffDeltaFlag || alfSliceParam.alfLumaCoeffFlag[ind] )
-    {
-      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      for( int nonLinearFlag = 0; nonLinearFlag < nonLinearFlagMax; nonLinearFlag++ )
       {
-        int coeffVal = abs( coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] );
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+        int currentNonLinearFlag = m_alfParamTemp.nonLinearFlag[channel] ? 1 : 0;
+        if (nonLinearFlag != currentNonLinearFlag)
+        {
+          continue;
+        }
+#else
+        m_alfParamTemp.nonLinearFlag[channel][altIdx] = nonLinearFlag;
+#endif
 
-        for( int k = 1; k < 15; k++ )
+        std::fill_n(m_filterClippSet[altIdx], MAX_NUM_ALF_CHROMA_COEFF, nonLinearFlag ? AlfNumClippingValues[CHANNEL_TYPE_CHROMA] / 2 : 0 );
+        double dist = m_alfCovarianceFrame[channel][iShapeIdx][altIdx].pixAcc + deriveCoeffQuant( m_filterClippSet[altIdx], m_filterCoeffSet[altIdx], m_alfCovarianceFrame[channel][iShapeIdx][altIdx], alfFilterShape, m_NUM_BITS, nonLinearFlag );
+        for( int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++ )
+        {
+          m_alfParamTemp.chromaCoeff[altIdx][i] = m_filterCoeffSet[altIdx][i];
+          m_alfParamTemp.chromaClipp[altIdx][i] = m_filterClippSet[altIdx][i];
+        }
+        int coeffBits = getChromaCoeffRate( m_alfParamTemp, altIdx );
+        double cost = dist + m_lambda[channel] * coeffBits;
+        if( cost < bestCost )
         {
-          m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k );
+          bestCost = cost;
+          bestDist = dist;
+          bestCoeffBits = coeffBits;
+          bestSliceParam = m_alfParamTemp;
         }
       }
+      uiCoeffBits += bestCoeffBits;
+      dist += bestDist;
+      m_alfParamTemp = bestSliceParam;
     }
+    uiCoeffBits += lengthUvlc( m_alfParamTemp.numAlternativesChroma-1 );
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    uiCoeffBits++;
+#else
+    uiCoeffBits += m_alfParamTemp.numAlternativesChroma; // non-linear flags
+#endif
   }
-
-  int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
-
-  // Golomb parameters
-  iBits += lengthUvlc( kMin - 1 );  // "min_golomb_order"
-  int golombOrderIncreaseFlag = 0;
-
-  for( int idx = 0; idx < maxGolombIdx; idx++ )
+  if (onlyFilterCost)
   {
-    golombOrderIncreaseFlag = ( m_kMinTab[idx] != kMin ) ? 1 : 0;
-    CHECK( !( m_kMinTab[idx] <= kMin + 1 ), "ALF Golomb parameter not consistent" );
-    iBits += golombOrderIncreaseFlag;                           //golomb_order_increase_flag
-    kMin = m_kMinTab[idx];
+    return dist + m_lambda[channel] * uiCoeffBits;
   }
-
-  if( !isChroma )
+  double rate = uiCoeffBits;
+  m_CABACEstimator->resetBits();
+  m_CABACEstimator->codeAlfCtuEnableFlags( cs, channel, &m_alfParamTemp);
+  for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ )
   {
-    if( alfSliceParam.alfLumaCoeffDeltaFlag )
+    if( isLuma( channel ) )
     {
-      iBits += numFilters;             //filter_coefficient_flag[i]
+      // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation
+      assert( cs.slice->getPic()->getAlfCtbFilterIndex()[ctuIdx] == NUM_FIXED_FILTER_SETS );
+      assert( cs.slice->getTileGroupNumAps() == 1 );
+      m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctuIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]);
     }
   }
+  m_CABACEstimator->codeAlfCtuAlternatives( cs, channel, &m_alfParamTemp );
+  rate += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+  return dist + m_lambda[channel] * rate;
+}
+
+int EncAdaptiveLoopFilter::getChromaCoeffRate( AlfParam& alfParam, int altIdx )
+{
+  int iBits = 0;
 
+  AlfFilterShape alfShape(5);
   // Filter coefficients
-  for( int ind = 0; ind < numFilters; ++ind )
+  for( int i = 0; i < alfShape.numCoeff - 1; i++ )
   {
-    if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      continue;
-    }
-
-    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+    iBits += lengthGolomb( alfParam.chromaCoeff[altIdx][i], 3 );  // alf_coeff_chroma[altIdx][i]
+  }
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_CHROMA] )
+#else
+  if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx] )
+#endif
+  {
+    for (int i = 0; i < alfShape.numCoeff - 1; i++)
     {
-      iBits += lengthGolomb( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], m_kMinTab[alfShape.golombIdx[i]] );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
+      if( !abs( alfParam.chromaCoeff[altIdx][i] ) )
+      {
+        alfParam.chromaClipp[altIdx][i] = 0;
+      }
     }
+    iBits += ((alfShape.numCoeff - 1) << 1);
   }
   return iBits;
 }
@@ -612,7 +1206,7 @@ double EncAdaptiveLoopFilter::getUnfilteredDistortion( AlfCovariance* cov, Chann
   }
   else
   {
-    dist = getUnfilteredDistortion( cov, 1 ) + lengthTruncatedUnary( 0, 3 ) * m_lambda[COMPONENT_Cb];
+    dist = getUnfilteredDistortion( cov, 1 );
   }
   return dist;
 }
@@ -633,14 +1227,13 @@ double EncAdaptiveLoopFilter::getFilteredDistortion( AlfCovariance* cov, const i
 
   for( int classIdx = 0; classIdx < numClasses; classIdx++ )
   {
-    int filterIdx = numClasses == 1 ? 0 : m_filterIndices[numFiltersMinus1][classIdx];
-    dist += calcErrorForCoeffs( cov[classIdx].E, cov[classIdx].y, m_filterCoeffSet[filterIdx], numCoeff, m_NUM_BITS );
+    dist += cov[classIdx].calcErrorForCoeffs(m_filterClippSet[classIdx], m_filterCoeffSet[classIdx], numCoeff, m_NUM_BITS);
   }
 
   return dist;
 }
 
-double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int& uiCoeffBits )
+double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfParam& alfParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], int& uiCoeffBits )
 {
   int numFiltersBest = 0;
   int numFilters = MAX_NUM_ALF_CLASSES;
@@ -648,16 +1241,16 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam,
   static double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2];
 
   double cost, cost0, dist, distForce0, costMin = MAX_DOUBLE;
-  int predMode = 0, bestPredMode = 0, coeffBits, coeffBitsForce0;
+  int coeffBits, coeffBitsForce0;
 
-  mergeClasses( covFrame, covMerged, MAX_NUM_ALF_CLASSES, m_filterIndices );
+  mergeClasses( alfShape, covFrame, covMerged, clipMerged, MAX_NUM_ALF_CLASSES, m_filterIndices );
 
   while( numFilters >= 1 )
   {
-    dist = deriveFilterCoeffs( covFrame, covMerged, alfShape, m_filterIndices[numFilters - 1], numFilters, errorForce0CoeffTab );
+    dist = deriveFilterCoeffs(covFrame, covMerged, clipMerged, alfShape, m_filterIndices[numFilters - 1], numFilters, errorForce0CoeffTab, alfParam);
     // filter coeffs are stored in m_filterCoeffSet
     distForce0 = getDistForce0( alfShape, numFilters, errorForce0CoeffTab, codedVarBins );
-    coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFilters, predMode );
+    coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFilters );
     coeffBitsForce0 = getCostFilterCoeffForce0( alfShape, m_filterCoeffSet, numFilters, codedVarBins );
 
     cost = dist + m_lambda[COMPONENT_Y] * coeffBits;
@@ -672,177 +1265,111 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam,
     {
       costMin = cost;
       numFiltersBest = numFilters;
-      bestPredMode = predMode;
     }
     numFilters--;
   }
 
-  dist = deriveFilterCoeffs( covFrame, covMerged, alfShape, m_filterIndices[numFiltersBest - 1], numFiltersBest, errorForce0CoeffTab );
-  coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFiltersBest, predMode );
+  dist = deriveFilterCoeffs( covFrame, covMerged, clipMerged, alfShape, m_filterIndices[numFiltersBest - 1], numFiltersBest, errorForce0CoeffTab, alfParam );
+  coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFiltersBest );
   distForce0 = getDistForce0( alfShape, numFiltersBest, errorForce0CoeffTab, codedVarBins );
   coeffBitsForce0 = getCostFilterCoeffForce0( alfShape, m_filterCoeffSet, numFiltersBest, codedVarBins );
 
   cost = dist + m_lambda[COMPONENT_Y] * coeffBits;
   cost0 = distForce0 + m_lambda[COMPONENT_Y] * coeffBitsForce0;
 
-  alfSliceParam.numLumaFilters = numFiltersBest;
+  alfParam.numLumaFilters = numFiltersBest;
   double distReturn;
   if (cost <= cost0)
   {
     distReturn = dist;
-    alfSliceParam.alfLumaCoeffDeltaFlag = 0;
+    alfParam.alfLumaCoeffDeltaFlag = 0;
     uiCoeffBits = coeffBits;
-    alfSliceParam.alfLumaCoeffDeltaPredictionFlag = bestPredMode;
   }
   else
   {
     distReturn = distForce0;
-    alfSliceParam.alfLumaCoeffDeltaFlag = 1;
+    alfParam.alfLumaCoeffDeltaFlag = 1;
     uiCoeffBits = coeffBitsForce0;
-    memcpy( alfSliceParam.alfLumaCoeffFlag, codedVarBins, sizeof( codedVarBins ) );
-    alfSliceParam.alfLumaCoeffDeltaPredictionFlag = 0;
+    memcpy( alfParam.alfLumaCoeffFlag, codedVarBins, sizeof( codedVarBins ) );
 
     for( int varInd = 0; varInd < numFiltersBest; varInd++ )
     {
       if( codedVarBins[varInd] == 0 )
       {
         memset( m_filterCoeffSet[varInd], 0, sizeof( int )*MAX_NUM_ALF_LUMA_COEFF );
+        memset( m_filterClippSet[varInd], 0, sizeof( int )*MAX_NUM_ALF_LUMA_COEFF );
       }
     }
   }
 
-  for( int ind = 0; ind < alfSliceParam.numLumaFilters; ++ind )
+  for( int ind = 0; ind < alfParam.numLumaFilters; ++ind )
   {
     for( int i = 0; i < alfShape.numCoeff; i++ )
     {
-      if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag )
-      {
-        alfSliceParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_diffFilterCoeff[ind][i];
-      }
-      else
-      {
-        alfSliceParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterCoeffSet[ind][i];
-      }
+      alfParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterCoeffSet[ind][i];
+      alfParam.lumaClipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterClippSet[ind][i];
     }
   }
 
-  memcpy( alfSliceParam.filterCoeffDeltaIdx, m_filterIndices[numFiltersBest - 1], sizeof( short ) * MAX_NUM_ALF_CLASSES );
-  uiCoeffBits += getNonFilterCoeffRate( alfSliceParam );
+  memcpy( alfParam.filterCoeffDeltaIdx, m_filterIndices[numFiltersBest - 1], sizeof( short ) * MAX_NUM_ALF_CLASSES );
+  uiCoeffBits += getNonFilterCoeffRate( alfParam );
   return distReturn;
 }
 
-int EncAdaptiveLoopFilter::getNonFilterCoeffRate( AlfSliceParam& alfSliceParam )
+int EncAdaptiveLoopFilter::getNonFilterCoeffRate( AlfParam& alfParam )
 {
-  int len = 1   // alf_coefficients_delta_flag
-          + lengthTruncatedUnary( 0, 3 )    // chroma_idc = 0, it is signalled when ALF is enabled for luma
-          + getTBlength( alfSliceParam.numLumaFilters - 1, MAX_NUM_ALF_CLASSES );   //numLumaFilters
+  int len = 0   // alf_coefficients_delta_flag
+          + 2                                          // slice_alf_chroma_idc                     u(2)
+          + lengthUvlc (alfParam.numLumaFilters - 1);  // alf_luma_num_filters_signalled_minus1   ue(v)
 
-  if( alfSliceParam.numLumaFilters > 1 )
+  if( alfParam.numLumaFilters > 1 )
   {
+    const int coeffLength = ceilLog2(alfParam.numLumaFilters);
     for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
     {
-      len += getTBlength( (int)alfSliceParam.filterCoeffDeltaIdx[i], alfSliceParam.numLumaFilters );  //filter_coeff_delta[i]
+      len += coeffLength;                              // alf_luma_coeff_delta_idx   u(v)
     }
   }
   return len;
 }
 
-int EncAdaptiveLoopFilter::lengthTruncatedUnary( int symbol, int maxSymbol )
-{
-  if( maxSymbol == 0 )
-  {
-    return 0;
-  }
-
-  bool codeLast = ( maxSymbol > symbol );
-  int bins = 0;
-  int numBins = 0;
-  while( symbol-- )
-  {
-    bins <<= 1;
-    bins++;
-    numBins++;
-  }
-  if( codeLast )
-  {
-    bins <<= 1;
-    numBins++;
-  }
-
-  return numBins;
-}
-
-int EncAdaptiveLoopFilter::getTBlength( int uiSymbol, const int uiMaxSymbol )
-{
-  int uiThresh;
-  if( uiMaxSymbol > 256 )
-  {
-    int uiThreshVal = 1 << 8;
-    uiThresh = 8;
-    while( uiThreshVal <= uiMaxSymbol )
-    {
-      uiThresh++;
-      uiThreshVal <<= 1;
-    }
-    uiThresh--;
-  }
-  else
-  {
-    uiThresh = g_tbMax[uiMaxSymbol];
-  }
-
-  int uiVal = 1 << uiThresh;
-  assert( uiVal <= uiMaxSymbol );
-  assert( ( uiVal << 1 ) > uiMaxSymbol );
-  assert( uiSymbol < uiMaxSymbol );
-  int b = uiMaxSymbol - uiVal;
-  assert( b < uiVal );
-  if( uiSymbol < uiVal - b )
-  {
-    return uiThresh;
-  }
-  else
-  {
-    return uiThresh + 1;
-  }
-}
 
 int EncAdaptiveLoopFilter::getCostFilterCoeffForce0( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters, bool* codedVarBins )
 {
-  const int maxGolombIdx = getMaxGolombIdx( alfShape.filterType );
-  memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
-
+  int len = 0;
+  // Filter coefficients
   for( int ind = 0; ind < numFilters; ++ind )
   {
-    if( !codedVarBins[ind] )
+    if( codedVarBins[ind] )
     {
-      continue;
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        len += lengthGolomb( abs( pDiffQFilterCoeffIntPP[ind][i] ), 3 ); // alf_coeff_luma_delta[i][j]
+      }
     }
-    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+    else
     {
-      int coeffVal = abs( pDiffQFilterCoeffIntPP[ind][i] );
-      for( int k = 1; k < 15; k++ )
+      for (int i = 0; i < alfShape.numCoeff - 1; i++)
       {
-        m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k );
+        len += lengthGolomb(0, 3); // alf_coeff_luma_delta[i][j]
       }
     }
   }
-
-  int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
-
-  // Coding parameters
-  int len = kMin           //min_golomb_order
-          + maxGolombIdx   //golomb_order_increase_flag
-          + numFilters;    //filter_coefficient_flag[i]
-
-  // Filter coefficients
-  for( int ind = 0; ind < numFilters; ++ind )
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] )
+#else
+  if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] )
+#endif
   {
-    if( codedVarBins[ind] )
+    for (int ind = 0; ind < numFilters; ++ind)
     {
-      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      for (int i = 0; i < alfShape.numCoeff - 1; i++)
       {
-        len += lengthGolomb( abs( pDiffQFilterCoeffIntPP[ind][i] ), m_kMinTab[alfShape.golombIdx[i]] ); // alf_coeff_luma_delta[i][j]
+        if (!abs(pDiffQFilterCoeffIntPP[ind][i]))
+        {
+          m_filterClippSet[ind][i] = 0;
+        }
+        len += 2;
       }
     }
   }
@@ -850,168 +1377,94 @@ int EncAdaptiveLoopFilter::getCostFilterCoeffForce0( AlfFilterShape& alfShape, i
   return len;
 }
 
-int EncAdaptiveLoopFilter::deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters, int& predMode )
+int EncAdaptiveLoopFilter::deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters )
+{
+  return (m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? getCostFilterClipp(alfShape, filterSet, numFilters) : 0) + getCostFilterCoeff(alfShape, filterSet, numFilters);
+}
+
+int EncAdaptiveLoopFilter::getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters )
 {
-  int ratePredMode0 = getCostFilterCoeff( alfShape, filterSet, numFilters );
+  return lengthFilterCoeffs( alfShape, numFilters, pDiffQFilterCoeffIntPP );  // alf_coeff_luma_delta[i][j];
+}
 
-  for( int ind = 0; ind < numFilters; ++ind )
+int EncAdaptiveLoopFilter::getCostFilterClipp( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters )
+{
+  for (int filterIdx = 0; filterIdx < numFilters; ++filterIdx)
   {
-    if( ind == 0 )
-    {
-      memcpy( filterCoeffDiff[ind], filterSet[ind], sizeof( int ) * alfShape.numCoeff );
-    }
-    else
+    for (int i = 0; i < alfShape.numCoeff - 1; i++)
     {
-      for( int i = 0; i < alfShape.numCoeff; i++ )
+      if (!abs(pDiffQFilterCoeffIntPP[filterIdx][i]))
       {
-        filterCoeffDiff[ind][i] = filterSet[ind][i] - filterSet[ind - 1][i];
+        m_filterClippSet[filterIdx][i] = 0;
       }
     }
   }
-
-  int ratePredMode1 = getCostFilterCoeff( alfShape, filterCoeffDiff, numFilters );
-
-  predMode = ( ratePredMode1 < ratePredMode0 && numFilters > 1 ) ? 1 : 0;
-
-  return ( numFilters > 1 ? 1 : 0 )        // coeff_delta_pred_mode_flag
-       + ( predMode ? ratePredMode1 : ratePredMode0 ); // min_golomb_order, golomb_order_increase_flag, alf_coeff_luma_delta
+  return (numFilters * (alfShape.numCoeff - 1)) << 1;
 }
 
-int EncAdaptiveLoopFilter::getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters )
+int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff )
 {
-  const int maxGolombIdx = getMaxGolombIdx( alfShape.filterType );
-
-  memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
+  int bitCnt = 0;
 
   for( int ind = 0; ind < numFilters; ++ind )
   {
     for( int i = 0; i < alfShape.numCoeff - 1; i++ )
     {
-      int coeffVal = abs( pDiffQFilterCoeffIntPP[ind][i] );
-      for( int k = 1; k < 15; k++ )
-      {
-        m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k );
-      }
-    }
-  }
-
-  int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
-
-  // Coding parameters
-  int len = kMin           //min_golomb_order
-          + maxGolombIdx;  //golomb_order_increase_flag
-
-  // Filter coefficients
-  len += lengthFilterCoeffs( alfShape, numFilters, pDiffQFilterCoeffIntPP, m_kMinTab );  // alf_coeff_luma_delta[i][j]
-
-  return len;
-}
-
-int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab )
-{
-  int bitCnt = 0;
-
-  for( int ind = 0; ind < numFilters; ++ind )
-  {
-    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
-    {
-      bitCnt += lengthGolomb( abs( FilterCoeff[ind][i] ), kMinTab[alfShape.golombIdx[i]] );
+      bitCnt += lengthGolomb( abs( FilterCoeff[ind][i] ), 3 );
     }
   }
   return bitCnt;
 }
 
+
 double EncAdaptiveLoopFilter::getDistForce0( AlfFilterShape& alfShape, const int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], bool* codedVarBins )
 {
   static int bitsVarBin[MAX_NUM_ALF_CLASSES];
 
-  memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
   for( int ind = 0; ind < numFilters; ++ind )
   {
+    bitsVarBin[ind] = 0;
     for( int i = 0; i < alfShape.numCoeff - 1; i++ )
     {
-      int coeffVal = abs( m_filterCoeffSet[ind][i] );
-      for( int k = 1; k < 15; k++ )
-      {
-        m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k );
-      }
+      bitsVarBin[ind] += lengthGolomb( abs( m_filterCoeffSet[ind][i] ), 3 );
     }
   }
 
-  getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
-
-  for( int ind = 0; ind < numFilters; ++ind )
+  static int zeroBitsVarBin = 0;
+  for (int i = 0; i < alfShape.numCoeff - 1; i++)
   {
-    bitsVarBin[ind] = 0;
-    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
-    {
-      bitsVarBin[ind] += lengthGolomb( abs( m_filterCoeffSet[ind][i] ), m_kMinTab[alfShape.golombIdx[i]] );
-    }
+    zeroBitsVarBin += lengthGolomb(0, 3);
   }
-
-  double distForce0 = getDistCoeffForce0( codedVarBins, errorTabForce0Coeff, bitsVarBin, numFilters );
-
-  return distForce0;
-}
-
-int EncAdaptiveLoopFilter::getGolombKMin( AlfFilterShape& alfShape, const int numFilters, int kMinTab[MAX_NUM_ALF_LUMA_COEFF], int bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB] )
-{
-  int kStart;
-  const int maxGolombIdx = getMaxGolombIdx( alfShape.filterType );
-
-  int minBitsKStart = MAX_INT;
-  int minKStart = -1;
-
-  for( int k = 1; k < 8; k++ )
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] )
+#else
+  if( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] )
+#endif
   {
-    int bitsKStart = 0; kStart = k;
-    for( int scanPos = 0; scanPos < maxGolombIdx; scanPos++ )
+    for (int ind = 0; ind < numFilters; ++ind)
     {
-      int kMin = kStart;
-      int minBits = bitsCoeffScan[scanPos][kMin];
-
-      if( bitsCoeffScan[scanPos][kStart + 1] < minBits )
+      for (int i = 0; i < alfShape.numCoeff - 1; i++)
       {
-        kMin = kStart + 1;
-        minBits = bitsCoeffScan[scanPos][kMin];
+        if (!abs(m_filterCoeffSet[ind][i]))
+        {
+          m_filterClippSet[ind][i] = 0;
+        }
       }
-      kStart = kMin;
-      bitsKStart += minBits;
-    }
-    if( bitsKStart < minBitsKStart )
-    {
-      minBitsKStart = bitsKStart;
-      minKStart = k;
     }
   }
 
-  kStart = minKStart;
-  for( int scanPos = 0; scanPos < maxGolombIdx; scanPos++ )
-  {
-    int kMin = kStart;
-    int minBits = bitsCoeffScan[scanPos][kMin];
-
-    if( bitsCoeffScan[scanPos][kStart + 1] < minBits )
-    {
-      kMin = kStart + 1;
-      minBits = bitsCoeffScan[scanPos][kMin];
-    }
-
-    kMinTab[scanPos] = kMin;
-    kStart = kMin;
-  }
+  double distForce0 = getDistCoeffForce0( codedVarBins, errorTabForce0Coeff, bitsVarBin, zeroBitsVarBin, numFilters);
 
-  return minKStart;
+  return distForce0;
 }
-
-double EncAdaptiveLoopFilter::getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, const int numFilters )
+double EncAdaptiveLoopFilter::getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, int zeroBitsVarBin, const int numFilters)
 {
   double distForce0 = 0;
   std::memset( codedVarBins, 0, sizeof( *codedVarBins ) * MAX_NUM_ALF_CLASSES );
 
   for( int filtIdx = 0; filtIdx < numFilters; filtIdx++ )
   {
-    double costDiff = errorForce0CoeffTab[filtIdx][0] - ( errorForce0CoeffTab[filtIdx][1] + m_lambda[COMPONENT_Y] * bitsVarBin[filtIdx] );
+    double costDiff = (errorForce0CoeffTab[filtIdx][0] + m_lambda[COMPONENT_Y] * zeroBitsVarBin) - (errorForce0CoeffTab[filtIdx][1] + m_lambda[COMPONENT_Y] * bitsVarBin[filtIdx]);
     codedVarBins[filtIdx] = costDiff > 0 ? true : false;
     distForce0 += errorForce0CoeffTab[filtIdx][codedVarBins[filtIdx] ? 1 : 0];
   }
@@ -1035,209 +1488,111 @@ int EncAdaptiveLoopFilter::lengthUvlc( int uiCode )
   return ( uiLength >> 1 ) + ( ( uiLength + 1 ) >> 1 );
 }
 
-int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k )
+int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k, bool signed_coeff )
 {
-  int m = 2 << ( k - 1 );
-  int q = coeffVal / m;
-  if( coeffVal != 0 )
+  int numBins = 0;
+  unsigned int symbol = abs(coeffVal);
+  while (symbol >= (unsigned int)(1 << k))
   {
-    return q + 2 + k;
+    numBins++;
+    symbol -= 1 << k;
+    k++;
   }
-  else
+  numBins += ( k + 1) ;
+  if (signed_coeff && coeffVal != 0)
   {
-    return q + 1 + k;
+    numBins++;
   }
+  return numBins;
 }
 
-double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] )
+double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], AlfParam& alfParam )
 {
   double error = 0.0;
   AlfCovariance& tmpCov = covMerged[MAX_NUM_ALF_CLASSES];
+
+
+
   for( int filtIdx = 0; filtIdx < numFilters; filtIdx++ )
   {
     tmpCov.reset();
+    bool found_clip = false;
     for( int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++ )
     {
       if( filterIndices[classIdx] == filtIdx )
       {
         tmpCov += cov[classIdx];
+        if( !found_clip )
+        {
+          found_clip = true; // clip should be at the adress of shortest one
+          memcpy(m_filterClippSet[filtIdx], clipMerged[numFilters-1][classIdx], sizeof(int[MAX_NUM_ALF_LUMA_COEFF]));
+        }
       }
     }
 
     // Find coeffcients
-    errorTabForce0Coeff[filtIdx][1] = tmpCov.pixAcc + deriveCoeffQuant( m_filterCoeffQuant, tmpCov.E, tmpCov.y, alfShape.numCoeff, alfShape.weights, m_NUM_BITS );
+    assert(alfShape.numCoeff == tmpCov.numCoeff);
+    errorTabForce0Coeff[filtIdx][1] = tmpCov.pixAcc + deriveCoeffQuant( m_filterClippSet[filtIdx], m_filterCoeffSet[filtIdx], tmpCov, alfShape, m_NUM_BITS, false );
     errorTabForce0Coeff[filtIdx][0] = tmpCov.pixAcc;
     error += errorTabForce0Coeff[filtIdx][1];
-
-    // store coeff
-    memcpy( m_filterCoeffSet[filtIdx], m_filterCoeffQuant, sizeof( int )*alfShape.numCoeff );
   }
   return error;
 }
 
-double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterCoeffQuant, double **E, double *y, const int numCoeff, std::vector<int>& weights, const int bitDepth, const bool bChroma )
+double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterClipp, int *filterCoeffQuant, const AlfCovariance& cov, const AlfFilterShape& shape, const int bitDepth, const bool optimizeClip )
 {
   const int factor = 1 << ( bitDepth - 1 );
-  static int filterCoeffQuantMod[MAX_NUM_ALF_LUMA_COEFF];
+  const int max_value = factor - 1;
+  const int min_value = -factor + 1;
+
+const int numCoeff = shape.numCoeff;
   static double filterCoeff[MAX_NUM_ALF_LUMA_COEFF];
 
-  gnsSolveByChol( E, y, filterCoeff, numCoeff );
+  cov.optimizeFilter( shape, filterClipp, filterCoeff, optimizeClip );
   roundFiltCoeff( filterCoeffQuant, filterCoeff, numCoeff, factor );
-  const int targetCoeffSumInt = 0;
-  int quantCoeffSum = 0;
-  for( int i = 0; i < numCoeff; i++ )
-  {
-    quantCoeffSum += weights[i] * filterCoeffQuant[i];
-  }
-
-  int count = 0;
-  while( quantCoeffSum != targetCoeffSumInt && count < 10 )
-  {
-    int sign = quantCoeffSum > targetCoeffSumInt ? 1 : -1;
-    int diff = ( quantCoeffSum - targetCoeffSumInt ) * sign;
-
-    double errMin = MAX_DOUBLE;
-    int minInd = -1;
-
-    for( int k = 0; k < numCoeff; k++ )
-    {
-      if( weights[k] <= diff )
-      {
-        memcpy( filterCoeffQuantMod, filterCoeffQuant, sizeof( int ) * numCoeff );
-
-        filterCoeffQuantMod[k] -= sign;
-        double error = calcErrorForCoeffs( E, y, filterCoeffQuantMod, numCoeff, bitDepth );
-
-        if( error < errMin )
-        {
-          errMin = error;
-          minInd = k;
-        }
-      }
-    }
-
-    if( minInd != -1 )
-    {
-      filterCoeffQuant[minInd] -= sign;
-    }
-
-    quantCoeffSum = 0;
-    for( int i = 0; i < numCoeff; i++ )
-    {
-      quantCoeffSum += weights[i] * filterCoeffQuant[i];
-    }
-    ++count;
-  }
-  if( count == 10 )
-  {
-    memset( filterCoeffQuant, 0, sizeof( int ) * numCoeff );
-  }
-
-  int max_value = factor - 1;
-  int min_value = -factor;
 
   for ( int i = 0; i < numCoeff - 1; i++ )
   {
     filterCoeffQuant[i] = std::min( max_value, std::max( min_value, filterCoeffQuant[i] ) );
-    filterCoeff[i] = filterCoeffQuant[i] / double( factor );
-  }
-
-  quantCoeffSum = 0;
-  for ( int i = 0; i < numCoeff - 1; i++ )
-  {
-    quantCoeffSum += weights[i] * filterCoeffQuant[i];
-    filterCoeff[i] = filterCoeffQuant[i] / double(factor);
   }
-  filterCoeffQuant[numCoeff - 1] = -quantCoeffSum;
-  filterCoeff[numCoeff - 1] = filterCoeffQuant[numCoeff - 1] / double(factor);
-
+  filterCoeffQuant[numCoeff - 1] = 0;
 
-  //Restrict the range of the center coefficient
-  int max_value_center = (2 * factor - 1) - factor;
-  int min_value_center = 0 - factor;
+  int modified=1;
 
-  filterCoeffQuant[numCoeff - 1] = std::min(max_value_center, std::max(min_value_center, filterCoeffQuant[numCoeff - 1]));
-  filterCoeff[numCoeff - 1] = filterCoeffQuant[numCoeff - 1] / double(factor);
-
-  int coeffQuantAdjust[MAX_NUM_ALF_LUMA_COEFF];
-  int adjustedTotalCoeff = (numCoeff - 1) << 1;
-
-  count = 0;
-  quantCoeffSum += filterCoeffQuant[numCoeff - 1];
-  while (quantCoeffSum != targetCoeffSumInt && count < 15)
+  double errRef=cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth );
+  while( modified )
   {
-    int sign = quantCoeffSum > targetCoeffSumInt ? 1 : -1;
-    int diff = (quantCoeffSum - targetCoeffSumInt) * sign;
-
-    if (diff > 4 * adjustedTotalCoeff)     sign = sign * 8;
-    else if (diff > 2 * adjustedTotalCoeff)     sign = sign * 4;
-    else if (diff >     adjustedTotalCoeff)     sign = sign * 2;
-
-    double errMin = MAX_DOUBLE;
-    int    minInd = -1;
-
-    for (int k = 0; k < numCoeff - 1; k++)
+    modified=0;
+    for( int sign: {1, -1} )
     {
-      memcpy(coeffQuantAdjust, filterCoeffQuant, sizeof(int) * numCoeff);
-
-      coeffQuantAdjust[k] -= sign;
+      double errMin = MAX_DOUBLE;
+      int minInd = -1;
 
-      if (coeffQuantAdjust[k] <= max_value && coeffQuantAdjust[k] >= min_value)
+      for( int k = 0; k < numCoeff-1; k++ )
       {
-        double error = calcErrorForCoeffs(E, y, coeffQuantAdjust, numCoeff, bitDepth);
+        if( filterCoeffQuant[k] - sign > max_value || filterCoeffQuant[k] - sign < min_value )
+          continue;
 
-        if (error < errMin)
+        filterCoeffQuant[k] -= sign;
+
+        double error = cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth );
+        if( error < errMin )
         {
           errMin = error;
           minInd = k;
         }
+        filterCoeffQuant[k] += sign;
+      }
+      if( errMin < errRef )
+      {
+        filterCoeffQuant[minInd] -= sign;
+        modified++;
+        errRef = errMin;
       }
     }
-
-    if (minInd != -1)
-    {
-      filterCoeffQuant[minInd] -= sign;
-      quantCoeffSum -= (weights[minInd] * sign);
-    }
-
-    ++count;
-  }
-
-  if (quantCoeffSum != targetCoeffSumInt)
-  {
-    memset(filterCoeffQuant, 0, sizeof(int) * numCoeff);
-  }
-
-  for (int i = 0; i < numCoeff - 1; i++)
-  {
-    CHECK(filterCoeffQuant[i] > max_value || filterCoeffQuant[i] < min_value, "filterCoeffQuant[i]>max_value || filterCoeffQuant[i]<min_value");
-    filterCoeff[i] = filterCoeffQuant[i] / double(factor);
-  }
-  CHECK(filterCoeffQuant[numCoeff - 1] > max_value_center || filterCoeffQuant[numCoeff - 1] < min_value_center, "filterCoeffQuant[numCoeff-1]>max_value_center || filterCoeffQuant[numCoeff-1]<min_value_center");
-  filterCoeff[numCoeff - 1] = filterCoeffQuant[numCoeff - 1] / double(factor);
-
-
-  double error = calcErrorForCoeffs( E, y, filterCoeffQuant, numCoeff, bitDepth );
-  return error;
-}
-
-double EncAdaptiveLoopFilter::calcErrorForCoeffs( double **E, double *y, int *coeff, const int numCoeff, const int bitDepth )
-{
-  double factor = 1 << ( bitDepth - 1 );
-  double error = 0;
-
-  for( int i = 0; i < numCoeff; i++ )   //diagonal
-  {
-    double sum = 0;
-    for( int j = i + 1; j < numCoeff; j++ )
-    {
-      // E[j][i] = E[i][j], sum will be multiplied by 2 later
-      sum += E[i][j] * coeff[j];
-    }
-    error += ( ( E[i][i] * coeff[i] + sum * 2 ) / factor - 2 * y[i] ) * coeff[i];
   }
 
-  return error / factor;
+  return errRef;
 }
 
 void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filterCoeff, const int numCoeff, const int factor )
@@ -1249,8 +1604,12 @@ void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filte
   }
 }
 
-void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* covMerged, const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] )
+void EncAdaptiveLoopFilter::mergeClasses( const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] )
 {
+  static int tmpClip[MAX_NUM_ALF_LUMA_COEFF];
+  static int bestMergeClip[MAX_NUM_ALF_LUMA_COEFF];
+  static double err[MAX_NUM_ALF_CLASSES];
+  static double bestMergeErr;
   static bool availableClass[MAX_NUM_ALF_CLASSES];
   static uint8_t indexList[MAX_NUM_ALF_CLASSES];
   static uint8_t indexListTemp[MAX_NUM_ALF_CLASSES];
@@ -1264,14 +1623,43 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
     indexList[i] = i;
     availableClass[i] = true;
     covMerged[i] = cov[i];
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    covMerged[i].numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[COMPONENT_Y] : 1;
+#else
+    covMerged[i].numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? AlfNumClippingValues[COMPONENT_Y] : 1;
+#endif
   }
 
   // Try merging different covariance matrices
 
   // temporal AlfCovariance structure is allocated as the last element in covMerged array, the size of covMerged is MAX_NUM_ALF_CLASSES + 1
   AlfCovariance& tmpCov = covMerged[MAX_NUM_ALF_CLASSES];
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  tmpCov.numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[COMPONENT_Y] : 1;
+#else
+  tmpCov.numBins = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? AlfNumClippingValues[COMPONENT_Y] : 1;
+#endif
+
+  // init Clip
+  for( int i = 0; i < numClasses; i++ )
+  {
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    std::fill_n(clipMerged[numRemaining-1][i], MAX_NUM_ALF_LUMA_COEFF, m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0);
+    if ( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] )
+#else
+    std::fill_n(clipMerged[numRemaining-1][i], MAX_NUM_ALF_LUMA_COEFF, m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0);
+    if ( m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] )
+#endif
+    {
+      err[i] = covMerged[i].optimizeFilterClip( alfShape, clipMerged[numRemaining-1][i] );
+    }
+    else
+    {
+      err[i] = covMerged[i].calculateError( clipMerged[numRemaining-1][i] );
+    }
+  }
 
-  while( numRemaining > 2 )
+  while( numRemaining >= 2 )
   {
     double errorMin = std::numeric_limits<double>::max();
     int bestToMergeIdx1 = 0, bestToMergeIdx2 = 1;
@@ -1284,14 +1672,25 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
         {
           if( availableClass[j] )
           {
-            double error1 = calculateError( covMerged[i] );
-            double error2 = calculateError( covMerged[j] );
+            double error1 = err[i];
+            double error2 = err[j];
 
             tmpCov.add( covMerged[i], covMerged[j] );
-            double error = calculateError( tmpCov ) - error1 - error2;
+            for( int l = 0; l < MAX_NUM_ALF_LUMA_COEFF; ++l )
+            {
+              tmpClip[l] = (clipMerged[numRemaining-1][i][l] + clipMerged[numRemaining-1][j][l] + 1 ) >> 1;
+            }
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+            double errorMerged = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? tmpCov.optimizeFilterClip(alfShape, tmpClip) : tmpCov.calculateError(tmpClip);
+#else
+            double errorMerged = m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] ? tmpCov.optimizeFilterClip( alfShape, tmpClip ) : tmpCov.calculateError( tmpClip );
+#endif
+            double error = errorMerged - error1 - error2;
 
             if( error < errorMin )
             {
+              bestMergeErr = errorMerged;
+              memcpy(bestMergeClip, tmpClip, sizeof(bestMergeClip));
               errorMin = error;
               bestToMergeIdx1 = i;
               bestToMergeIdx2 = j;
@@ -1302,6 +1701,9 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
     }
 
     covMerged[bestToMergeIdx1] += covMerged[bestToMergeIdx2];
+    memcpy(clipMerged[numRemaining-2], clipMerged[numRemaining-1], sizeof(int[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF]));
+    memcpy(clipMerged[numRemaining-2][bestToMergeIdx1], bestMergeClip, sizeof(bestMergeClip));
+    err[bestToMergeIdx1] = bestMergeErr;
     availableClass[bestToMergeIdx2] = false;
 
     for( int i = 0; i < numClasses; i++ )
@@ -1352,36 +1754,46 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
 void EncAdaptiveLoopFilter::getFrameStats( ChannelType channel, int iShapeIdx )
 {
   int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1;
-  for( int i = 0; i < numClasses; i++ )
-  {
-    m_alfCovarianceFrame[channel][iShapeIdx][i].reset();
-  }
-  if( isLuma( channel ) )
-  {
-    getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_LUMA][iShapeIdx], m_alfCovariance[COMPONENT_Y][iShapeIdx], m_ctuEnableFlag[COMPONENT_Y], numClasses );
-  }
-  else
+  int numAlternatives = isLuma( channel ) ? 1 : m_alfParamTemp.numAlternativesChroma;
+  // When calling this function m_ctuEnableFlag shall be set to 0 for CTUs using alternative APS
+  // Here we compute frame stats for building new alternative filters
+  for( int altIdx = 0; altIdx < numAlternatives; ++altIdx )
   {
-    getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cb][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cb], numClasses );
-    getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cr][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cr], numClasses );
+    for( int i = 0; i < numClasses; i++ )
+    {
+      m_alfCovarianceFrame[channel][iShapeIdx][isLuma( channel ) ? i : altIdx].reset(AlfNumClippingValues[channel]);
+    }
+    if( isLuma( channel ) )
+    {
+      getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_LUMA][iShapeIdx], m_alfCovariance[COMPONENT_Y][iShapeIdx], m_ctuEnableFlag[COMPONENT_Y], nullptr, numClasses, altIdx );
+    }
+    else
+    {
+      getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cb][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cb], m_ctuAlternative[COMPONENT_Cb], numClasses, altIdx );
+      getFrameStat( m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][iShapeIdx], m_alfCovariance[COMPONENT_Cr][iShapeIdx], m_ctuEnableFlag[COMPONENT_Cr], m_ctuAlternative[COMPONENT_Cr], numClasses, altIdx );
+    }
   }
 }
 
-void EncAdaptiveLoopFilter::getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, const int numClasses )
+void EncAdaptiveLoopFilter::getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, uint8_t* ctbAltIdx, const int numClasses, int altIdx )
 {
-  for( int i = 0; i < m_numCTUsInPic; i++ )
+  const ChannelType channel = (!ctbAltIdx ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA);
+  for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ )
   {
-    if( ctbEnableFlags[i] )
+    if( ctbEnableFlags[ctuIdx]  )
     {
-      for( int j = 0; j < numClasses; j++ )
+      for( int classIdx = 0; classIdx < numClasses; classIdx++ )
       {
-        frameCov[j] += ctbCov[i][j];
+        if( isLuma( channel ) || altIdx == ctbAltIdx[ctuIdx] )
+        {
+          frameCov[isLuma( channel ) ? classIdx : altIdx] += ctbCov[ctuIdx][classIdx];
+        }
       }
     }
   }
 }
 
-void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv )
+void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv, CodingStructure& cs )
 {
   int ctuRsAddr = 0;
   const int numberOfComponents = getNumberValidComponents( m_chromaFormat );
@@ -1398,7 +1810,7 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
       {
         for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ )
         {
-          m_alfCovariance[compIdx][shape][ctuIdx][classIdx].reset();
+          m_alfCovariance[compIdx][shape][ctuIdx][classIdx].reset(AlfNumClippingValues[toChannelType( compID )]);
         }
       }
     }
@@ -1409,23 +1821,115 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
   for( int channelIdx = 0; channelIdx < numberOfChannels; channelIdx++ )
   {
     const ChannelType channelID = ChannelType( channelIdx );
+    const int numAlts = channelID == CHANNEL_TYPE_LUMA ? 1 : MAX_NUM_ALF_ALTERNATIVES_CHROMA;
     const int numClasses = isLuma( channelID ) ? MAX_NUM_ALF_CLASSES : 1;
 
+    for( int altIdx = 0; altIdx < numAlts; ++altIdx )
     for( int shape = 0; shape != m_filterShapes[channelIdx].size(); shape++ )
     {
       for( int classIdx = 0; classIdx < numClasses; classIdx++ )
       {
-        m_alfCovarianceFrame[channelIdx][shape][classIdx].reset();
+        m_alfCovarianceFrame[channelIdx][shape][isLuma( channelID ) ? classIdx : altIdx].reset(AlfNumClippingValues[channelID]);
       }
     }
   }
 
+  const PreCalcValues& pcv = *cs.pcv;
+  bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false;
+  int numHorVirBndry = 0, numVerVirBndry = 0;
+  int horVirBndryPos[] = { 0, 0, 0 };
+  int verVirBndryPos[] = { 0, 0, 0 };
+
   for( int yPos = 0; yPos < m_picHeight; yPos += m_maxCUHeight )
   {
     for( int xPos = 0; xPos < m_picWidth; xPos += m_maxCUWidth )
     {
       const int width = ( xPos + m_maxCUWidth > m_picWidth ) ? ( m_picWidth - xPos ) : m_maxCUWidth;
       const int height = ( yPos + m_maxCUHeight > m_picHeight ) ? ( m_picHeight - yPos ) : m_maxCUHeight;
+      int rasterSliceAlfPad = 0;
+      if( isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) )
+      {
+        int yStart = yPos;
+        for( int i = 0; i <= numHorVirBndry; i++ )
+        {
+          const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i];
+          const int h = yEnd - yStart;
+          const bool clipT = ( i == 0 && clipTop ) || ( i > 0 ) || ( yStart == 0 );
+          const bool clipB = ( i == numHorVirBndry && clipBottom ) || ( i < numHorVirBndry ) || ( yEnd == pcv.lumaHeight );
+          int xStart = xPos;
+          for( int j = 0; j <= numVerVirBndry; j++ )
+          {
+            const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j];
+            const int w = xEnd - xStart;
+            const bool clipL = ( j == 0 && clipLeft ) || ( j > 0 ) || ( xStart == 0 );
+            const bool clipR = ( j == numVerVirBndry && clipRight ) || ( j < numVerVirBndry ) || ( xEnd == pcv.lumaWidth );
+            const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE);
+            const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE);
+            PelUnitBuf recBuf = m_tempBuf2.subBuf( UnitArea( cs.area.chromaFormat, Area( 0, 0, wBuf, hBuf ) ) );
+            recBuf.copyFrom( recYuv.subBuf( UnitArea( cs.area.chromaFormat, Area( xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf ) ) ) );
+            // pad top-left unavailable samples for raster slice
+            if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) )
+            {
+              recBuf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 );
+            }
+
+            // pad bottom-right unavailable samples for raster slice
+            if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) )
+            {
+              recBuf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 );
+            }
+            recBuf.extendBorderPel( MAX_ALF_PADDING_SIZE );
+            recBuf = recBuf.subBuf( UnitArea ( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
+
+            const UnitArea area( m_chromaFormat, Area( 0, 0, w, h ) );
+            const UnitArea areaDst( m_chromaFormat, Area( xStart, yStart, w, h ) );
+            for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
+            {
+              const ComponentID compID = ComponentID( compIdx );
+              const CompArea& compArea = area.block( compID );
+
+              int  recStride = recBuf.get( compID ).stride;
+              Pel* rec = recBuf.get( compID ).bufAt( compArea );
+
+              int  orgStride = orgYuv.get(compID).stride;
+              Pel* org = orgYuv.get(compID).bufAt(xStart >> ::getComponentScaleX(compID, m_chromaFormat), yStart >> ::getComponentScaleY(compID, m_chromaFormat));
+              ChannelType chType = toChannelType( compID );
+
+              for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ )
+              {
+              const CompArea& compAreaDst = areaDst.block( compID );
+                getBlkStats(m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compAreaDst, compArea, chType
+                  , ((compIdx == 0) ? m_alfVBLumaCTUHeight : m_alfVBChmaCTUHeight)
+                  , (compIdx == 0) ? m_alfVBLumaPos : m_alfVBChmaPos
+                );
+              }
+            }
+
+            xStart = xEnd;
+          }
+
+          yStart = yEnd;
+        }
+
+        for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
+        {
+          const ComponentID compID = ComponentID( compIdx );
+
+          ChannelType chType = toChannelType( compID );
+
+          for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ )
+          {
+            const int numClasses = isLuma( compID ) ? MAX_NUM_ALF_CLASSES : 1;
+
+            for( int classIdx = 0; classIdx < numClasses; classIdx++ )
+            {
+              m_alfCovarianceFrame[chType][shape][isLuma( compID ) ? classIdx : 0] += m_alfCovariance[compIdx][shape][ctuRsAddr][classIdx];
+            }
+          }
+        }
+      }
+      else
+      {
       const UnitArea area( m_chromaFormat, Area( xPos, yPos, width, height ) );
 
       for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
@@ -1443,40 +1947,49 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
 
         for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ )
         {
-          getBlkStats( m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compArea );
+          getBlkStats(m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compArea, compArea, chType
+            , ((compIdx == 0) ? m_alfVBLumaCTUHeight : m_alfVBChmaCTUHeight)
+            , (compIdx == 0) ? m_alfVBLumaPos : m_alfVBChmaPos
+          );
+
 
           const int numClasses = isLuma( compID ) ? MAX_NUM_ALF_CLASSES : 1;
 
           for( int classIdx = 0; classIdx < numClasses; classIdx++ )
           {
-            m_alfCovarianceFrame[chType][shape][classIdx] += m_alfCovariance[compIdx][shape][ctuRsAddr][classIdx];
+            m_alfCovarianceFrame[chType][shape][isLuma( compID ) ? classIdx : 0] += m_alfCovariance[compIdx][shape][ctuRsAddr][classIdx];
           }
         }
       }
+      }
       ctuRsAddr++;
     }
   }
 }
 
-void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area )
+void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& areaDst, const CompArea& area, const ChannelType channel, int vbCTUHeight, int vbPos)
+
+
 {
-  static int ELocal[MAX_NUM_ALF_LUMA_COEFF];
+  static int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues];
 
+  const int numBins = AlfNumClippingValues[channel];
   int transposeIdx = 0;
   int classIdx = 0;
 
   for( int i = 0; i < area.height; i++ )
   {
+    int vbDistance = ((areaDst.y + i) % vbCTUHeight) - vbPos;
     for( int j = 0; j < area.width; j++ )
     {
-      if( classifier && classifier[area.y + i][area.x + j].classIdx == m_ALF_UNUSED_CLASSIDX && classifier[area.y + i][area.x + j].transposeIdx == m_ALF_UNUSED_TRANSPOSIDX )
+      if( classifier && classifier[areaDst.y + i][areaDst.x + j].classIdx == m_ALF_UNUSED_CLASSIDX && classifier[areaDst.y + i][areaDst.x + j].transposeIdx == m_ALF_UNUSED_TRANSPOSIDX )
       {
         continue;
       }
-      std::memset( ELocal, 0, shape.numCoeff * sizeof( int ) );
+      std::memset( ELocal, 0, sizeof( ELocal ) );
       if( classifier )
       {
-        AlfClassifier& cl = classifier[area.y + i][area.x + j];
+        AlfClassifier& cl = classifier[areaDst.y + i][areaDst.x + j];
         transposeIdx = cl.transposeIdx;
         classIdx = cl.classIdx;
       }
@@ -1487,31 +2000,46 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF
         weight = m_lumaLevelToWeightPLUT[org[j]];
       }
       int yLocal = org[j] - rec[j];
-      calcCovariance( ELocal, rec + j, recStride, shape.pattern.data(), shape.filterLength >> 1, transposeIdx );
+      calcCovariance(ELocal, rec + j, recStride, shape, transposeIdx, channel, vbDistance);
       for( int k = 0; k < shape.numCoeff; k++ )
       {
         for( int l = k; l < shape.numCoeff; l++ )
         {
-          if (m_alfWSSD)
+          for( int b0 = 0; b0 < numBins; b0++ )
           {
-            alfCovariace[classIdx].E[k][l] += weight * (double)(ELocal[k] * ELocal[l]);
+            for( int b1 = 0; b1 < numBins; b1++ )
+            {
+              if (m_alfWSSD)
+              {
+                alfCovariance[classIdx].E[b0][b1][k][l] += weight * (double)(ELocal[k][b0] * ELocal[l][b1]);
+              }
+              else
+              {
+                alfCovariance[classIdx].E[b0][b1][k][l] += ELocal[k][b0] * ELocal[l][b1];
+              }
+            }
           }
-          else
-          alfCovariace[classIdx].E[k][l] += ELocal[k] * ELocal[l];
         }
-        if (m_alfWSSD)
+        for( int b = 0; b < numBins; b++ )
         {
-          alfCovariace[classIdx].y[k] += weight * (double)(ELocal[k] * yLocal);
+          if (m_alfWSSD)
+          {
+            alfCovariance[classIdx].y[b][k] += weight * (double)(ELocal[k][b] * yLocal);
+          }
+          else
+          {
+            alfCovariance[classIdx].y[b][k] += ELocal[k][b] * yLocal;
+          }
         }
-        else
-        alfCovariace[classIdx].y[k] += ELocal[k] * yLocal;
       }
       if (m_alfWSSD)
       {
-        alfCovariace[classIdx].pixAcc += weight * (double)(yLocal * yLocal);
+        alfCovariance[classIdx].pixAcc += weight * (double)(yLocal * yLocal);
       }
       else
-      alfCovariace[classIdx].pixAcc += yLocal * yLocal;
+      {
+        alfCovariance[classIdx].pixAcc += yLocal * yLocal;
+      }
     }
     org += orgStride;
     rec += recStride;
@@ -1524,31 +2052,61 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF
     {
       for( int l = 0; l < k; l++ )
       {
-        alfCovariace[classIdx].E[k][l] = alfCovariace[classIdx].E[l][k];
+        for( int b0 = 0; b0 < numBins; b0++ )
+        {
+          for( int b1 = 0; b1 < numBins; b1++ )
+          {
+            alfCovariance[classIdx].E[b0][b1][k][l] = alfCovariance[classIdx].E[b1][b0][l][k];
+          }
+        }
       }
     }
   }
 }
 
-void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const int stride, const int *filterPattern, const int halfFilterLength, const int transposeIdx )
+void EncAdaptiveLoopFilter::calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance)
 {
+  int clipTopRow = -4;
+  int clipBotRow = 4;
+  if (vbDistance >= -3 && vbDistance < 0)
+  {
+    clipBotRow = -vbDistance - 1;
+    clipTopRow = -clipBotRow; // symmetric
+  }
+  else if (vbDistance >= 0 && vbDistance < 3)
+  {
+    clipTopRow = -vbDistance;
+    clipBotRow = -clipTopRow; // symmetric
+  }
+  const int *filterPattern = shape.pattern.data();
+  const int halfFilterLength = shape.filterLength >> 1;
+  const Pel* clip = m_alfClippingValues[channel];
+  const int numBins = AlfNumClippingValues[channel];
+
   int k = 0;
 
+  const short curr = rec[0];
+
   if( transposeIdx == 0 )
   {
     for( int i = -halfFilterLength; i < 0; i++ )
     {
-      const Pel* rec0 = rec + i * stride;
-      const Pel* rec1 = rec - i * stride;
-
-      for( int j = -halfFilterLength - i; j <= halfFilterLength + i; j++ )
+      const Pel* rec0 = rec + std::max(i, clipTopRow) * stride;
+      const Pel* rec1 = rec - std::max(i, -clipBotRow) * stride;
+      for( int j = -halfFilterLength - i; j <= halfFilterLength + i; j++, k++ )
       {
-        ELocal[filterPattern[k++]] += rec0[j] + rec1[-j];
+        for( int b = 0; b < numBins; b++ )
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[j], rec1[-j]);
+        }
       }
     }
-    for( int j = -halfFilterLength; j < 0; j++ )
+    for( int j = -halfFilterLength; j < 0; j++, k++ )
     {
-      ELocal[filterPattern[k++]] += rec[j] + rec[-j];
+      for( int b = 0; b < numBins; b++ )
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[j], rec[-j]);
+      }
     }
   }
   else if( transposeIdx == 1 )
@@ -1557,32 +2115,43 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i
     {
       const Pel* rec0 = rec + j;
       const Pel* rec1 = rec - j;
-
-      for( int i = -halfFilterLength - j; i <= halfFilterLength + j; i++ )
+      for (int i = -halfFilterLength - j; i <= halfFilterLength + j; i++, k++)
       {
-        ELocal[filterPattern[k++]] += rec0[i * stride] + rec1[-i * stride];
-      }
+        for (int b = 0; b < numBins; b++)
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[std::max(i, clipTopRow) * stride], rec1[-std::max(i, -clipBotRow) * stride]);
+        }
     }
-    for( int i = -halfFilterLength; i < 0; i++ )
+    }
+    for (int i = -halfFilterLength; i < 0; i++, k++)
     {
-      ELocal[filterPattern[k++]] += rec[i*stride] + rec[-i * stride];
+      for (int b = 0; b < numBins; b++)
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[std::max(i, clipTopRow) * stride], rec[-std::max(i, -clipBotRow) * stride]);
+      }
     }
   }
   else if( transposeIdx == 2 )
   {
     for( int i = -halfFilterLength; i < 0; i++ )
     {
-      const Pel* rec0 = rec + i * stride;
-      const Pel* rec1 = rec - i * stride;
+      const Pel* rec0 = rec + std::max(i, clipTopRow) * stride;
+      const Pel* rec1 = rec - std::max(i, -clipBotRow) * stride;
 
-      for( int j = halfFilterLength + i; j >= -halfFilterLength - i; j-- )
+      for( int j = halfFilterLength + i; j >= -halfFilterLength - i; j--, k++ )
       {
-        ELocal[filterPattern[k++]] += rec0[j] + rec1[-j];
+        for( int b = 0; b < numBins; b++ )
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[j], rec1[-j]);
+        }
       }
     }
-    for( int j = -halfFilterLength; j < 0; j++ )
+    for( int j = -halfFilterLength; j < 0; j++, k++ )
     {
-      ELocal[filterPattern[k++]] += rec[j] + rec[-j];
+      for( int b = 0; b < numBins; b++ )
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[j], rec[-j]);
+      }
     }
   }
   else
@@ -1591,221 +2160,814 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i
     {
       const Pel* rec0 = rec + j;
       const Pel* rec1 = rec - j;
-
-      for( int i = halfFilterLength + j; i >= -halfFilterLength - j; i-- )
+      for (int i = halfFilterLength + j; i >= -halfFilterLength - j; i--, k++)
       {
-        ELocal[filterPattern[k++]] += rec0[i * stride] + rec1[-i * stride];
+        for (int b = 0; b < numBins; b++)
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[std::max(i, clipTopRow) * stride], rec1[-std::max(i, -clipBotRow) * stride]);
+        }
       }
     }
-    for( int i = -halfFilterLength; i < 0; i++ )
+    for (int i = -halfFilterLength; i < 0; i++, k++)
     {
-      ELocal[filterPattern[k++]] += rec[i*stride] + rec[-i * stride];
+      for (int b = 0; b < numBins; b++)
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[std::max(i, clipTopRow) * stride], rec[-std::max(i, -clipBotRow) * stride]);
+      }
     }
-  }
-  ELocal[filterPattern[k++]] += rec[0];
-}
 
-
-
-double EncAdaptiveLoopFilter::calculateError( AlfCovariance& cov )
-{
-  static double c[MAX_NUM_ALF_COEFF];
-
-  gnsSolveByChol( cov.E, cov.y, c, cov.numCoeff );
-
-  double sum = 0;
-  for( int i = 0; i < cov.numCoeff; i++ )
+  }
+  for( int b = 0; b < numBins; b++ )
   {
-    sum += c[i] * cov.y[i];
+    ELocal[filterPattern[k]][b] += curr;
   }
-
-  return cov.pixAcc - sum;
 }
 
-//********************************
-// Cholesky decomposition
-//********************************
 
-#define ROUND(a)  (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5))
-#define REG              0.0001
-#define REG_SQR          0.0000001
 
-//Find filter coeff related
-int EncAdaptiveLoopFilter::gnsCholeskyDec( double **inpMatr, double outMatr[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], int numEq )
+void EncAdaptiveLoopFilter::setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, bool val )
 {
-  static double invDiag[MAX_NUM_ALF_COEFF];  /* Vector of the inverse of diagonal entries of outMatr */
+  if( channel == CHANNEL_TYPE_LUMA )
+  {
+    alfSlicePara.enabledFlag[COMPONENT_Y] = val;
+  }
+  else
+  {
+    alfSlicePara.enabledFlag[COMPONENT_Cb] = alfSlicePara.enabledFlag[COMPONENT_Cr] = val;
+  }
+}
 
-  for( int i = 0; i < numEq; i++ )
+void EncAdaptiveLoopFilter::setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags )
+{
+  const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb;
+  const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr;
+  for( int compId = compIDFirst; compId <= compIDLast; compId++ )
   {
-    for( int j = i; j < numEq; j++ )
+    alfSlicePara.enabledFlag[compId] = false;
+    for( int i = 0; i < m_numCTUsInPic; i++ )
     {
-      /* Compute the scaling factor */
-      double scale = inpMatr[i][j];
-      if( i > 0 )
-      {
-        for( int k = i - 1; k >= 0; k-- )
-        {
-          scale -= outMatr[k][j] * outMatr[k][i];
-        }
-      }
-
-      /* Compute i'th row of outMatr */
-      if( i == j )
-      {
-        if( scale <= REG_SQR ) // if(scale <= 0 )  /* If inpMatr is singular */
-        {
-          return 0;
-        }
-        else              /* Normal operation */
-          invDiag[i] = 1.0 / ( outMatr[i][i] = sqrt( scale ) );
-      }
-      else
+      if( ctuFlags[compId][i] )
       {
-        outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part          */
-        outMatr[j][i] = 0.0;              /* Lower triangular part set to 0 */
+        alfSlicePara.enabledFlag[compId] = true;
+        break;
       }
     }
   }
-  return 1; /* Signal that Cholesky factorization is successfully performed */
 }
 
-void EncAdaptiveLoopFilter::gnsTransposeBacksubstitution( double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* rhs, double* x, int order )
+void EncAdaptiveLoopFilter::copyCtuEnableFlag( uint8_t** ctuFlagsDst, uint8_t** ctuFlagsSrc, ChannelType channel )
 {
-  /* Backsubstitution starts */
-  x[0] = rhs[0] / U[0][0];               /* First row of U'                   */
-  for( int i = 1; i < order; i++ )
-  {         /* For the rows 1..order-1           */
-
-    double sum = 0; //Holds backsubstitution from already handled rows
-
-    for( int j = 0; j < i; j++ ) /* Backsubst already solved unknowns */
-    {
-      sum += x[j] * U[j][i];
-    }
+  if( isLuma( channel ) )
+  {
+    memcpy( ctuFlagsDst[COMPONENT_Y], ctuFlagsSrc[COMPONENT_Y], sizeof( uint8_t ) * m_numCTUsInPic );
+  }
+  else
+  {
+    memcpy( ctuFlagsDst[COMPONENT_Cb], ctuFlagsSrc[COMPONENT_Cb], sizeof( uint8_t ) * m_numCTUsInPic );
+    memcpy( ctuFlagsDst[COMPONENT_Cr], ctuFlagsSrc[COMPONENT_Cr], sizeof( uint8_t ) * m_numCTUsInPic );
+  }
+}
 
-    x[i] = ( rhs[i] - sum ) / U[i][i];       /* i'th component of solution vect.  */
+void EncAdaptiveLoopFilter::setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val )
+{
+  if( isLuma( channel ) )
+  {
+    memset( ctuFlags[COMPONENT_Y], val, sizeof( uint8_t ) * m_numCTUsInPic );
+  }
+  else
+  {
+    memset( ctuFlags[COMPONENT_Cb], val, sizeof( uint8_t ) * m_numCTUsInPic );
+    memset( ctuFlags[COMPONENT_Cr], val, sizeof( uint8_t ) * m_numCTUsInPic );
   }
 }
 
-void EncAdaptiveLoopFilter::gnsBacksubstitution( double R[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* z, int size, double* A )
+std::vector<int> EncAdaptiveLoopFilter::getAvaiApsIdsLuma(CodingStructure& cs, int &newApsId)
 {
-  size--;
-  A[size] = z[size] / R[size][size];
+  APS** apss = cs.slice->getAlfAPSs();
+  for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
+  {
+    apss[i] = m_apsMap->getPS((i << NUM_APS_TYPE_LEN) + ALF_APS);
+  }
 
-  for( int i = size - 1; i >= 0; i-- )
+  std::vector<int> result;
+  int apsIdChecked = 0, curApsId = m_apsIdStart;
+  if (curApsId < ALF_CTB_MAX_NUM_APS)
   {
-    double sum = 0;
+    while (apsIdChecked < ALF_CTB_MAX_NUM_APS && !cs.slice->isIntra() && result.size() < ALF_CTB_MAX_NUM_APS && !cs.slice->getPendingRasInit() && !cs.slice->isIDRorBLA())
+    {
+      APS* curAPS = cs.slice->getAlfAPSs()[curApsId];
 
-    for( int j = i + 1; j <= size; j++ )
+      if( curAPS && curAPS->getLayerId() == cs.slice->getPic()->layerId && curAPS->getTemporalId() <= cs.slice->getTLayer() && curAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] )
+      {
+        result.push_back(curApsId);
+      }
+      apsIdChecked++;
+      curApsId = (curApsId + 1) % ALF_CTB_MAX_NUM_APS;
+    }
+  }
+  cs.slice->setTileGroupNumAps((int)result.size());
+  cs.slice->setAlfAPSs(result);
+  newApsId = m_apsIdStart - 1;
+  if (newApsId < 0)
+  {
+    newApsId = ALF_CTB_MAX_NUM_APS - 1;
+  }
+  CHECK(newApsId >= ALF_CTB_MAX_NUM_APS, "Wrong APS index assignment in getAvaiApsIdsLuma");
+  return result;
+}
+void  EncAdaptiveLoopFilter::initDistortion()
+{
+  for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++)
+  {
+    for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
     {
-      sum += R[i][j] * A[j];
+      m_ctbDistortionUnfilter[comp][ctbIdx] = getUnfilteredDistortion(m_alfCovariance[comp][0][ctbIdx], comp == 0 ? MAX_NUM_ALF_CLASSES : 1);
     }
-
-    A[i] = ( z[i] - sum ) / R[i][i];
   }
 }
-
-int EncAdaptiveLoopFilter::gnsSolveByChol( double **LHS, double *rhs, double *x, int numEq )
+void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfParamNewFilters
+#if ENABLE_QPA
+  , const double lambdaChromaWeight
+#endif
+)
 {
-  static double aux[MAX_NUM_ALF_COEFF];     /* Auxiliary vector */
-  static double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF];    /* Upper triangular Cholesky factor of LHS */
-  int res = 1;  // Signal that Cholesky factorization is successfully performed
-
-                /* The equation to be solved is LHSx = rhs */
+  TempCtx        ctxStart(m_CtxCache, AlfCtx(m_CABACEstimator->getCtx()));
+  TempCtx        ctxBest(m_CtxCache);
+  TempCtx        ctxTempStart(m_CtxCache);
+  TempCtx        ctxTempBest(m_CtxCache);
+  TempCtx        ctxTempAltStart( m_CtxCache );
+  TempCtx        ctxTempAltBest( m_CtxCache );
+  AlfParam  alfParamNewFiltersBest = alfParamNewFilters;
+  APS**          apss = cs.slice->getAlfAPSs();
+  short*     alfCtbFilterSetIndex = cs.picture->getAlfCtbFilterIndex();
+  bool     hasNewFilters[2] = { alfParamNewFilters.enabledFlag[COMPONENT_Y] , alfParamNewFilters.enabledFlag[COMPONENT_Cb] || alfParamNewFilters.enabledFlag[COMPONENT_Cr] };
+  initDistortion();
+
+  //luma
+  m_alfParamTemp = alfParamNewFilters;
+  setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 1);
+  getFrameStats(CHANNEL_TYPE_LUMA, 0);
+  setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 0);
+  double costOff = getUnfilteredDistortion(m_alfCovarianceFrame[CHANNEL_TYPE_LUMA][0], CHANNEL_TYPE_LUMA);
+
+  int newApsId;
+  std::vector<int> apsIds = getAvaiApsIdsLuma(cs, newApsId);
+  std::vector<int> bestApsIds;
+  double costMin = MAX_DOUBLE;
+  reconstructCoeffAPSs(cs, true, false, true);
 
-                /* Compute upper triangular U such that U'*U = LHS */
-  if( gnsCholeskyDec( LHS, U, numEq ) ) /* If Cholesky decomposition has been successful */
+  int numLoops = hasNewFilters[CHANNEL_TYPE_LUMA] ? 2 : 1;
+  for (int useNewFilter = 0; useNewFilter < numLoops; useNewFilter++)
   {
-    /* Now, the equation is  U'*U*x = rhs, where U is upper triangular
-    * Solve U'*aux = rhs for aux
-    */
-    gnsTransposeBacksubstitution( U, rhs, aux, numEq );
+    int bitsNewFilter = 0;
+    if (useNewFilter == 1)
+    {
+      if (!hasNewFilters[CHANNEL_TYPE_LUMA])
+      {
+        continue;
+      }
+      else
+      {
+        bitsNewFilter = m_bitsNewFilter[CHANNEL_TYPE_LUMA];
+        reconstructCoeff(alfParamNewFilters, CHANNEL_TYPE_LUMA, true, true);
+      }
+    }
+    int numIter = useNewFilter ? 2 : 1;
+    for (int numTemporalAps = 0; numTemporalAps <= apsIds.size(); numTemporalAps++)
+    {
+      if (numTemporalAps + useNewFilter >= ALF_CTB_MAX_NUM_APS)
+      {
+        continue;
+      }
+      cs.slice->setTileGroupNumAps(numTemporalAps + useNewFilter);
+      int numFilterSet = NUM_FIXED_FILTER_SETS + numTemporalAps + useNewFilter;
+      if (numTemporalAps == apsIds.size() && numTemporalAps > 0 && useNewFilter && newApsId == apsIds.back()) //last temporalAPS is occupied by new filter set and this temporal APS becomes unavailable
+      {
+        continue;
+      }
+      for (int iter = 0; iter < numIter; iter++)
+      {
+        m_alfParamTemp = alfParamNewFilters;
+        m_alfParamTemp.enabledFlag[CHANNEL_TYPE_LUMA] = true;
+        double curCost = 3 * m_lambda[CHANNEL_TYPE_LUMA];
+        if (iter > 0)  //re-derive new filter-set
+        {
+          double dDistOrgNewFilter = 0;
+          int blocksUsingNewFilter = 0;
+          for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+          {
+            if (m_ctuEnableFlag[COMPONENT_Y][ctbIdx] && alfCtbFilterSetIndex[ctbIdx] != NUM_FIXED_FILTER_SETS)
+            {
+              m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 0;
+            }
+            else if (m_ctuEnableFlag[COMPONENT_Y][ctbIdx] && alfCtbFilterSetIndex[ctbIdx] == NUM_FIXED_FILTER_SETS)
+            {
+              blocksUsingNewFilter++;
+              dDistOrgNewFilter += m_ctbDistortionUnfilter[COMPONENT_Y][ctbIdx];
+              for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++)
+              {
+                short* pCoeff = m_coeffFinal;
+                short* pClipp = m_clippFinal;
+                for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++)
+                {
+                  m_filterTmp[i] = pCoeff[classIdx * MAX_NUM_ALF_LUMA_COEFF + i];
+                  m_clipTmp[i] = pClipp[classIdx * MAX_NUM_ALF_LUMA_COEFF + i];
+                }
+                dDistOrgNewFilter += m_alfCovariance[COMPONENT_Y][0][ctbIdx][classIdx].calcErrorForCoeffs(m_clipTmp, m_filterTmp, MAX_NUM_ALF_LUMA_COEFF, m_NUM_BITS);
+              }
+            }
+          }
+          if (blocksUsingNewFilter > 0 && blocksUsingNewFilter < m_numCTUsInPic)
+          {
+            int bitNL[2] = { 0, 0 };
+            double errNL[2] = { 0.0, 0.0 };
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+            m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] = 1;
+#else
+            m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] = 1;
+#endif
+            if (m_encCfg->getUseNonLinearAlfLuma())
+            {
+              errNL[1] = getFilterCoeffAndCost(cs, 0, CHANNEL_TYPE_LUMA, true, 0, bitNL[1], true);
+              m_alfParamTempNL = m_alfParamTemp;
+            }
+            else
+            {
+              errNL[1] = MAX_DOUBLE;
+            }
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+            m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] = 0;
+#else
+            m_alfParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA][0] = 0;
+#endif
+            errNL[0] = getFilterCoeffAndCost(cs, 0, CHANNEL_TYPE_LUMA, true, 0, bitNL[0], true);
 
-    /* The equation is now U*x = aux, solve it for x (new motion coefficients) */
-    gnsBacksubstitution( U, aux, numEq, x );
+            int bitsNewFilterTempLuma = bitNL[0];
+            double err = errNL[0];
+            if (errNL[1]  < errNL[0])
+            {
+              err = errNL[1];
+              bitsNewFilterTempLuma = bitNL[1];
+              m_alfParamTemp = m_alfParamTempNL;
+            }
+            if (dDistOrgNewFilter + m_lambda[CHANNEL_TYPE_LUMA] * m_bitsNewFilter[CHANNEL_TYPE_LUMA] < err) //re-derived filter is not good, skip
+            {
+              continue;
+            }
+            reconstructCoeff(m_alfParamTemp, CHANNEL_TYPE_LUMA, true, true);
+            bitsNewFilter = bitsNewFilterTempLuma;
+          }
+          else //no blocks using new filter, skip
+          {
+            continue;
+          }
+        }
 
+        m_CABACEstimator->getCtx() = ctxStart;
+        for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+        {
+          double distUnfilterCtb = m_ctbDistortionUnfilter[COMPONENT_Y][ctbIdx];
+          //ctb on
+          m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 1;
+          double         costOn = MAX_DOUBLE;
+          ctxTempStart = AlfCtx(m_CABACEstimator->getCtx());
+          int iBestFilterSetIdx = 0;
+          for (int filterSetIdx = 0; filterSetIdx < numFilterSet; filterSetIdx++)
+          {
+            //rate
+            m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart);
+            m_CABACEstimator->resetBits();
+            m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, COMPONENT_Y, &m_alfParamTemp);
+            alfCtbFilterSetIndex[ctbIdx] = filterSetIdx;
+            m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctbIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]);
+            double rateOn = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+            //distortion
+            double dist = distUnfilterCtb;
+            for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++)
+            {
+              if (filterSetIdx < NUM_FIXED_FILTER_SETS)
+              {
+                int filterIdx = m_classToFilterMapping[filterSetIdx][classIdx];
+                dist += m_alfCovariance[COMPONENT_Y][0][ctbIdx][classIdx].calcErrorForCoeffs(m_clipDefaultEnc, m_fixedFilterSetCoeff[filterIdx], MAX_NUM_ALF_LUMA_COEFF, m_NUM_BITS);
+              }
+              else
+              {
+                short *pCoeff;
+                short *pClipp;
+                if (useNewFilter && filterSetIdx == NUM_FIXED_FILTER_SETS)
+                {
+                  pCoeff = m_coeffFinal;
+                  pClipp = m_clippFinal;
+                }
+                else if (useNewFilter)
+                {
+                  pCoeff = m_coeffApsLuma[filterSetIdx - 1 - NUM_FIXED_FILTER_SETS];
+                  pClipp = m_clippApsLuma[filterSetIdx - 1 - NUM_FIXED_FILTER_SETS];
+                }
+                else
+                {
+                  pCoeff = m_coeffApsLuma[filterSetIdx - NUM_FIXED_FILTER_SETS];
+                  pClipp = m_clippApsLuma[filterSetIdx - NUM_FIXED_FILTER_SETS];
+                }
+                for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++)
+                {
+                  m_filterTmp[i] = pCoeff[classIdx * MAX_NUM_ALF_LUMA_COEFF + i];
+                  m_clipTmp[i] = pClipp[classIdx * MAX_NUM_ALF_LUMA_COEFF + i];
+                }
+                dist += m_alfCovariance[COMPONENT_Y][0][ctbIdx][classIdx].calcErrorForCoeffs(m_clipTmp, m_filterTmp, MAX_NUM_ALF_LUMA_COEFF, m_NUM_BITS);
+              }
+            }
+            //cost
+            double costOnTmp = dist + m_lambda[COMPONENT_Y] * rateOn;
+            if (costOnTmp < costOn)
+            {
+              ctxTempBest = AlfCtx(m_CABACEstimator->getCtx());
+              costOn = costOnTmp;
+              iBestFilterSetIdx = filterSetIdx;
+            }
+          }
+          //ctb off
+          m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 0;
+          //rate
+          m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart);
+          m_CABACEstimator->resetBits();
+          m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, COMPONENT_Y, &m_alfParamTemp);
+          //cost
+          double costOff =
+            distUnfilterCtb + m_lambda[COMPONENT_Y] * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+          if (costOn < costOff)
+          {
+            m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest);
+            m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 1;
+            alfCtbFilterSetIndex[ctbIdx] = iBestFilterSetIdx;
+            curCost += costOn;
+          }
+          else
+          {
+            m_ctuEnableFlag[COMPONENT_Y][ctbIdx] = 0;
+            curCost += costOff;
+          }
+        } //for(ctbIdx)
+        int tmpBits = bitsNewFilter + 3 * (numFilterSet - NUM_FIXED_FILTER_SETS);
+        curCost += tmpBits * m_lambda[COMPONENT_Y];
+        if (curCost < costMin)
+        {
+          costMin = curCost;
+          bestApsIds.resize(numFilterSet - NUM_FIXED_FILTER_SETS);
+          for (int i = 0; i < bestApsIds.size(); i++)
+          {
+            if (i == 0 && useNewFilter)
+            {
+              bestApsIds[i] = newApsId;
+            }
+            else
+            {
+              bestApsIds[i] = apsIds[i - useNewFilter];
+            }
+          }
+          alfParamNewFiltersBest = m_alfParamTemp;
+          ctxBest = AlfCtx(m_CABACEstimator->getCtx());
+          copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, CHANNEL_TYPE_LUMA);
+          for (int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++)
+          {
+            m_alfCtbFilterSetIndexTmp[ctuIdx] = alfCtbFilterSetIndex[ctuIdx];
+          }
+          alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA] = useNewFilter;
+        }
+      }//for (int iter = 0; iter < numIter; iter++)
+    }// for (int numTemporalAps = 0; numTemporalAps < apsIds.size(); numTemporalAps++)
+  }//for (int useNewFilter = 0; useNewFilter <= 1; useNewFilter++)
+
+  if (costOff <= costMin)
+  {
+    cs.slice->resetTileGroupAlfEnabledFlag();
+    cs.slice->setTileGroupNumAps(0);
+    setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 0);
+    setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 0);
+    return;
   }
-  else /* LHS was singular */
+  else
   {
-    res = 0;
+    cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Y, true);
+    cs.slice->setTileGroupNumAps((int)bestApsIds.size());
+    cs.slice->setAlfAPSs(bestApsIds);
+    copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_LUMA);
+    for (int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++)
+    {
+      alfCtbFilterSetIndex[ctuIdx] = m_alfCtbFilterSetIndexTmp[ctuIdx];
+    }
+    if (alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA])
+    {
+      APS* newAPS = m_apsMap->getPS((newApsId << NUM_APS_TYPE_LEN) + ALF_APS);
+      if (newAPS == NULL)
+      {
+        newAPS = m_apsMap->allocatePS((newApsId << NUM_APS_TYPE_LEN) + ALF_APS);
+        newAPS->setAPSId(newApsId);
+        newAPS->setAPSType(ALF_APS);
+      }
+      newAPS->setAlfAPSParam(alfParamNewFiltersBest);
+      newAPS->setTemporalId( cs.slice->getTLayer() );
+      newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] = false;
+      m_apsMap->setChangedFlag((newApsId << NUM_APS_TYPE_LEN) + ALF_APS);
+      m_apsIdStart = newApsId;
+    }
 
-    /* Regularize LHS */
-    for( int i = 0; i < numEq; i++ )
+    std::vector<int> apsIds = cs.slice->getTileGroupApsIdLuma();
+    for (int i = 0; i < (int)cs.slice->getTileGroupNumAps(); i++)
     {
-      LHS[i][i] += REG;
+      apss[apsIds[i]] = m_apsMap->getPS((apsIds[i] << NUM_APS_TYPE_LEN) + ALF_APS);
     }
+  }
 
-    /* Compute upper triangular U such that U'*U = regularized LHS */
-    res = gnsCholeskyDec( LHS, U, numEq );
+  //chroma
+  m_alfParamTemp = alfParamNewFiltersBest;
+  if( m_alfParamTemp.numAlternativesChroma < 1 )
+  {
+    m_alfParamTemp.numAlternativesChroma = 1;
+  }
+  setCtuAlternativeChroma( m_ctuAlternative, 0 );
+  setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 1);
+  getFrameStats(CHANNEL_TYPE_CHROMA, 0);
+  costOff = getUnfilteredDistortion(m_alfCovarianceFrame[CHANNEL_TYPE_CHROMA][0], CHANNEL_TYPE_CHROMA);
+  costMin = MAX_DOUBLE;
+  m_CABACEstimator->getCtx() = AlfCtx(ctxBest);
+  ctxStart = AlfCtx(m_CABACEstimator->getCtx());
+  int newApsIdChroma = -1;
+  if (alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA] && (alfParamNewFiltersBest.enabledFlag[COMPONENT_Cb] || alfParamNewFiltersBest.enabledFlag[COMPONENT_Cr]))
+  {
+    newApsIdChroma = newApsId;
+  }
+  else if (alfParamNewFiltersBest.enabledFlag[COMPONENT_Cb] || alfParamNewFiltersBest.enabledFlag[COMPONENT_Cr])
+  {
+    int curId = m_apsIdStart;
+    while (newApsIdChroma < 0)
+    {
+      curId--;
+      if (curId < 0)
+      {
+        curId = ALF_CTB_MAX_NUM_APS - 1;
+      }
+      if (std::find(bestApsIds.begin(), bestApsIds.end(), curId) == bestApsIds.end())
+      {
+        newApsIdChroma = curId;
+      }
+    }
+  }
+  for (int curApsId = 0; curApsId < ALF_CTB_MAX_NUM_APS; curApsId++)
+  {
+    if ((cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() || cs.slice->isIntra()) && curApsId != newApsIdChroma)
+    {
+      continue;
+    }
+    APS* curAPS = m_apsMap->getPS((curApsId << NUM_APS_TYPE_LEN) + ALF_APS);
 
-    if( !res )
+    if( curAPS && curAPS->getLayerId() != cs.slice->getPic()->layerId )
     {
-      std::memset( x, 0, sizeof( double )*numEq );
-      return 0;
+      continue;
     }
 
-    /* Solve  U'*aux = rhs for aux */
-    gnsTransposeBacksubstitution( U, rhs, aux, numEq );
+    double curCost = m_lambda[CHANNEL_TYPE_CHROMA] * 3;
+    if (curApsId == newApsIdChroma)
+    {
+      m_alfParamTemp = alfParamNewFilters;
+      curCost += m_lambda[CHANNEL_TYPE_CHROMA] * m_bitsNewFilter[CHANNEL_TYPE_CHROMA];
+    }
+    else if (curAPS && curAPS->getTemporalId() <= cs.slice->getTLayer() && curAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA])
+    {
+      m_alfParamTemp = curAPS->getAlfAPSParam();
+    }
+    else
+    {
+      continue;
+    }
+    reconstructCoeff(m_alfParamTemp, CHANNEL_TYPE_CHROMA, true, true);
+    m_CABACEstimator->getCtx() = AlfCtx(ctxStart);
+    for (int compId = 1; compId < MAX_NUM_COMPONENT; compId++)
+    {
+      m_alfParamTemp.enabledFlag[compId] = true;
+      for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+      {
+        double distUnfilterCtu = m_ctbDistortionUnfilter[compId][ctbIdx];
+        //cost on
+        m_ctuEnableFlag[compId][ctbIdx] = 1;
+        ctxTempStart = AlfCtx(m_CABACEstimator->getCtx());
+        //rate
+        m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart);
+        m_CABACEstimator->resetBits();
+        //ctb flag
+        m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, compId, &m_alfParamTemp);
+        double rateOn = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+#if ENABLE_QPA
+        const double ctuLambda = lambdaChromaWeight > 0.0 ? cs.picture->m_uEnerHpCtu[ctbIdx] / lambdaChromaWeight : m_lambda[compId];
+#else
+        const double ctuLambda = m_lambda[compId];
+#endif
+        double dist = MAX_DOUBLE;
+        int numAlts = m_alfParamTemp.numAlternativesChroma;
+        ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() );
+        double bestAltRate = 0;
+        double bestAltCost = MAX_DOUBLE;
+        int bestAltIdx = -1;
+        ctxTempAltStart = AlfCtx( ctxTempBest );
+        for( int altIdx = 0; altIdx < numAlts; ++altIdx )
+        {
+          if( altIdx )
+            m_CABACEstimator->getCtx() = AlfCtx( ctxTempAltStart );
+          m_CABACEstimator->resetBits();
+          m_ctuAlternative[compId][ctbIdx] = altIdx;
+          m_CABACEstimator->codeAlfCtuAlternative( cs, ctbIdx, compId, &m_alfParamTemp );
+          double altRate   = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+          double r_altCost = ctuLambda * altRate;
+
+          //distortion
+          for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++)
+          {
+            m_filterTmp[i] = m_chromaCoeffFinal[altIdx][i];
+            m_clipTmp[i] = m_chromaClippFinal[altIdx][i];
+          }
+          double altDist = m_alfCovariance[compId][0][ctbIdx][0].calcErrorForCoeffs( m_clipTmp, m_filterTmp, MAX_NUM_ALF_CHROMA_COEFF, m_NUM_BITS );
+          double altCost = altDist + r_altCost;
+          if( altCost < bestAltCost )
+          {
+            bestAltCost = altCost;
+            bestAltIdx = altIdx;
+            bestAltRate = altRate;
+            ctxTempBest = AlfCtx( m_CABACEstimator->getCtx() );
+            dist = altDist;
+          }
+        }
+        m_ctuAlternative[compId][ctbIdx] = bestAltIdx;
+        rateOn += bestAltRate;
+        dist += distUnfilterCtu;
+        //cost
+        double costOn = dist + ctuLambda * rateOn;
+        //cost off
+        m_ctuEnableFlag[compId][ctbIdx] = 0;
+        //rate
+        m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart);
+        m_CABACEstimator->resetBits();
+        m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, compId, &m_alfParamTemp);
+        //cost
+        double costOff = distUnfilterCtu + m_lambda[compId] * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+        if (costOn < costOff)
+        {
+          m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest);
+          m_ctuEnableFlag[compId][ctbIdx] = 1;
+          curCost += costOn;
+        }
+        else
+        {
+          m_ctuEnableFlag[compId][ctbIdx] = 0;
+          curCost += costOff;
+        }
+      }
+    }
+    //chroma idc
+    setEnableFlag(m_alfParamTemp, CHANNEL_TYPE_CHROMA, m_ctuEnableFlag);
 
-    /* Solve U*x = aux for x */
-    gnsBacksubstitution( U, aux, numEq, x );
+    if (curCost < costMin)
+    {
+      costMin = curCost;
+      cs.slice->setTileGroupApsIdChroma(curApsId);
+      cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, m_alfParamTemp.enabledFlag[COMPONENT_Cb]);
+      cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, m_alfParamTemp.enabledFlag[COMPONENT_Cr]);
+      copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, CHANNEL_TYPE_CHROMA);
+      copyCtuAlternativeChroma(m_ctuAlternativeTmp, m_ctuAlternative);
+    }
   }
-  return res;
-}
-//////////////////////////////////////////////////////////////////////////////////////////
-void EncAdaptiveLoopFilter::setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, bool val )
-{
-  if( channel == CHANNEL_TYPE_LUMA )
+  if (costOff < costMin)
   {
-    alfSlicePara.enabledFlag[COMPONENT_Y] = val;
+    cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, false);
+    cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, false);
+    setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 0);
   }
   else
   {
-    alfSlicePara.enabledFlag[COMPONENT_Cb] = alfSlicePara.enabledFlag[COMPONENT_Cr] = val;
+    copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_CHROMA);
+    copyCtuAlternativeChroma(m_ctuAlternative, m_ctuAlternativeTmp);
+    if (cs.slice->getTileGroupApsIdChroma() == newApsIdChroma)  //new filter
+    {
+      APS* newAPS = m_apsMap->getPS((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS);
+      if (newAPS == NULL)
+      {
+        newAPS = m_apsMap->allocatePS((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS);
+        newAPS->setAPSType(ALF_APS);
+        newAPS->setAPSId(newApsIdChroma);
+        newAPS->getAlfAPSParam().reset();
+      }
+      newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] = true;
+      if (!alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA])
+      {
+        newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] = false;
+      }
+      newAPS->getAlfAPSParam().numAlternativesChroma = alfParamNewFilters.numAlternativesChroma;
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+      newAPS->getAlfAPSParam().nonLinearFlag[CHANNEL_TYPE_CHROMA] = alfParamNewFilters.nonLinearFlag[CHANNEL_TYPE_CHROMA];
+#else
+      for( int altIdx = 0; altIdx < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++altIdx )
+        newAPS->getAlfAPSParam().nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx] = alfParamNewFilters.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx];
+#endif
+      newAPS->setTemporalId( cs.slice->getTLayer() );
+      for (int altIdx = 0; altIdx  < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++altIdx )
+      for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++)
+      {
+        newAPS->getAlfAPSParam().chromaCoeff[altIdx][i] = alfParamNewFilters.chromaCoeff[altIdx][i];
+        newAPS->getAlfAPSParam().chromaClipp[altIdx][i] = alfParamNewFilters.chromaClipp[altIdx][i];
+      }
+      m_apsMap->setChangedFlag((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS);
+      m_apsIdStart = newApsIdChroma;
+    }
+    apss[cs.slice->getTileGroupApsIdChroma()] = m_apsMap->getPS((cs.slice->getTileGroupApsIdChroma() << NUM_APS_TYPE_LEN) + ALF_APS);
   }
 }
 
-void EncAdaptiveLoopFilter::setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags )
+void EncAdaptiveLoopFilter::alfReconstructor(CodingStructure& cs, const PelUnitBuf& recExtBuf)
 {
-  const ComponentID compIDFirst = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cb;
-  const ComponentID compIDLast = isLuma( channel ) ? COMPONENT_Y : COMPONENT_Cr;
-  for( int compId = compIDFirst; compId <= compIDLast; compId++ )
+  if (!cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
   {
-    alfSlicePara.enabledFlag[compId] = false;
-    for( int i = 0; i < m_numCTUsInPic; i++ )
+    return;
+  }
+  reconstructCoeffAPSs(cs, true, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), false);
+  short* alfCtuFilterIndex = cs.slice->getPic()->getAlfCtbFilterIndex();
+  PelUnitBuf& recBuf = cs.getRecoBufRef();
+  const PreCalcValues& pcv = *cs.pcv;
+
+  int ctuIdx = 0;
+  bool clipTop = false, clipBottom = false, clipLeft = false, clipRight = false;
+  int numHorVirBndry = 0, numVerVirBndry = 0;
+  int horVirBndryPos[] = { 0, 0, 0 };
+  int verVirBndryPos[] = { 0, 0, 0 };
+  for (int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight)
+  {
+    for (int xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth)
     {
-      if( ctuFlags[compId][i] )
+      const int width = (xPos + pcv.maxCUWidth > pcv.lumaWidth) ? (pcv.lumaWidth - xPos) : pcv.maxCUWidth;
+      const int height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight;
+
+      bool ctuEnableFlag = m_ctuEnableFlag[COMPONENT_Y][ctuIdx];
+      for (int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++)
       {
-        alfSlicePara.enabledFlag[compId] = true;
-        break;
+        ctuEnableFlag |= m_ctuEnableFlag[compIdx][ctuIdx] > 0;
+      }
+      int rasterSliceAlfPad = 0;
+      if ( ctuEnableFlag && isCrossedByVirtualBoundaries( cs, xPos, yPos, width, height, clipTop, clipBottom, clipLeft, clipRight, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, rasterSliceAlfPad ) )
+      {
+        int yStart = yPos;
+        for (int i = 0; i <= numHorVirBndry; i++)
+        {
+          const int yEnd = i == numHorVirBndry ? yPos + height : horVirBndryPos[i];
+          const int h = yEnd - yStart;
+          const bool clipT = (i == 0 && clipTop) || (i > 0) || (yStart == 0);
+          const bool clipB = (i == numHorVirBndry && clipBottom) || (i < numHorVirBndry ) || (yEnd == pcv.lumaHeight);
+          int xStart = xPos;
+          for (int j = 0; j <= numVerVirBndry; j++)
+          {
+            const int xEnd = j == numVerVirBndry ? xPos + width : verVirBndryPos[j];
+            const int w = xEnd - xStart;
+            const bool clipL = (j == 0 && clipLeft) || (j > 0) || (xStart == 0);
+            const bool clipR = (j == numVerVirBndry && clipRight) || (j < numVerVirBndry ) || (xEnd == pcv.lumaWidth);
+            const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE);
+            const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE);
+            PelUnitBuf buf = m_tempBuf2.subBuf(UnitArea(cs.area.chromaFormat, Area(0, 0, wBuf, hBuf)));
+            buf.copyFrom(recExtBuf.subBuf(UnitArea(cs.area.chromaFormat, Area(xStart - (clipL ? 0 : MAX_ALF_PADDING_SIZE), yStart - (clipT ? 0 : MAX_ALF_PADDING_SIZE), wBuf, hBuf))));
+            // pad top-left unavailable samples for raster slice
+            if ( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) )
+            {
+              buf.padBorderPel( MAX_ALF_PADDING_SIZE, 1 );
+            }
+
+            // pad bottom-right unavailable samples for raster slice
+            if ( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) )
+            {
+              buf.padBorderPel( MAX_ALF_PADDING_SIZE, 2 );
+            }
+            buf.extendBorderPel(MAX_ALF_PADDING_SIZE);
+            buf = buf.subBuf(UnitArea(cs.area.chromaFormat, Area(clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h)));
+
+            if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx])
+            {
+              const Area blkSrc(0, 0, w, h);
+              const Area blkDst(xStart, yStart, w, h);
+              short filterSetIndex = alfCtuFilterIndex[ctuIdx];
+              short *coeff;
+              short *clip;
+              if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
+              {
+                coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+                clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+              }
+              else
+              {
+                coeff = m_fixedFilterSetCoeffDec[filterSetIndex];
+                clip = m_clipDefault;
+              }
+              m_filter7x7Blk(m_classifier, recBuf, buf, blkDst, blkSrc, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs
+                , m_alfVBLumaCTUHeight
+                , m_alfVBLumaPos
+              );
+            }
+
+            for (int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++)
+            {
+              ComponentID compID = ComponentID(compIdx);
+              const int chromaScaleX = getComponentScaleX(compID, recBuf.chromaFormat);
+              const int chromaScaleY = getComponentScaleY(compID, recBuf.chromaFormat);
+              if (m_ctuEnableFlag[compIdx][ctuIdx])
+              {
+                const Area blkSrc(0, 0, w >> chromaScaleX, h >> chromaScaleY);
+                const Area blkDst(xStart >> chromaScaleX, yStart >> chromaScaleY, w >> chromaScaleX, h >> chromaScaleY);
+                const int alt_num = m_ctuAlternative[compID][ctuIdx];
+                m_filter5x5Blk(m_classifier, recBuf, buf, blkDst, blkSrc, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs
+                  , m_alfVBChmaCTUHeight
+                  , m_alfVBChmaPos
+                );
+              }
+            }
+
+            xStart = xEnd;
+          }
+
+          yStart = yEnd;
+        }
+      }
+      else
+      {
+
+      const UnitArea area(cs.area.chromaFormat, Area(xPos, yPos, width, height));
+      if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx])
+      {
+        Area blk(xPos, yPos, width, height);
+        short filterSetIndex = alfCtuFilterIndex[ctuIdx];
+        short *coeff;
+        short *clip;
+        if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
+        {
+          coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+          clip = m_clippApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
+        }
+        else
+        {
+          coeff = m_fixedFilterSetCoeffDec[filterSetIndex];
+          clip = m_clipDefault;
+        }
+        m_filter7x7Blk(m_classifier, recBuf, recExtBuf, blk, blk, COMPONENT_Y, coeff, clip, m_clpRngs.comp[COMPONENT_Y], cs
+          , m_alfVBLumaCTUHeight
+          , m_alfVBLumaPos
+        );
+      }
+
+      for (int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++)
+      {
+        ComponentID compID = ComponentID(compIdx);
+        const int chromaScaleX = getComponentScaleX(compID, recBuf.chromaFormat);
+        const int chromaScaleY = getComponentScaleY(compID, recBuf.chromaFormat);
+        if (m_ctuEnableFlag[compIdx][ctuIdx])
+        {
+          Area blk(xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY);
+          const int alt_num = m_ctuAlternative[compID][ctuIdx];
+          m_filter5x5Blk(m_classifier, recBuf, recExtBuf, blk, blk, compID, m_chromaCoeffFinal[alt_num], m_chromaClippFinal[alt_num], m_clpRngs.comp[compIdx], cs
+            , m_alfVBChmaCTUHeight
+            , m_alfVBChmaPos
+          );
+        }
+      }
       }
+      ctuIdx++;
     }
   }
 }
 
-void EncAdaptiveLoopFilter::copyCtuEnableFlag( uint8_t** ctuFlagsDst, uint8_t** ctuFlagsSrc, ChannelType channel )
+void EncAdaptiveLoopFilter::copyCtuAlternativeChroma( uint8_t* ctuAltsDst[MAX_NUM_COMPONENT], uint8_t* ctuAltsSrc[MAX_NUM_COMPONENT] )
 {
-  if( isLuma( channel ) )
-  {
-    memcpy( ctuFlagsDst[COMPONENT_Y], ctuFlagsSrc[COMPONENT_Y], sizeof( uint8_t ) * m_numCTUsInPic );
-  }
-  else
-  {
-    memcpy( ctuFlagsDst[COMPONENT_Cb], ctuFlagsSrc[COMPONENT_Cb], sizeof( uint8_t ) * m_numCTUsInPic );
-    memcpy( ctuFlagsDst[COMPONENT_Cr], ctuFlagsSrc[COMPONENT_Cr], sizeof( uint8_t ) * m_numCTUsInPic );
-  }
+  std::copy_n( ctuAltsSrc[COMPONENT_Cb], m_numCTUsInPic, ctuAltsDst[COMPONENT_Cb] );
+  std::copy_n( ctuAltsSrc[COMPONENT_Cr], m_numCTUsInPic, ctuAltsDst[COMPONENT_Cr] );
 }
 
-void EncAdaptiveLoopFilter::setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val )
+void EncAdaptiveLoopFilter::setCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT], uint8_t val )
 {
-  if( isLuma( channel ) )
+  std::fill_n( ctuAlts[COMPONENT_Cb], m_numCTUsInPic, val );
+  std::fill_n( ctuAlts[COMPONENT_Cr], m_numCTUsInPic, val );
+}
+
+void EncAdaptiveLoopFilter::initCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT] )
+{
+  uint8_t altIdx = 0;
+  for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ++ctuIdx )
   {
-    memset( ctuFlags[COMPONENT_Y], val, sizeof( uint8_t ) * m_numCTUsInPic );
+    ctuAlts[COMPONENT_Cb][ctuIdx] = altIdx;
+    if( (ctuIdx+1) * m_alfParamTemp.numAlternativesChroma >= (altIdx+1)*m_numCTUsInPic )
+      ++altIdx;
   }
-  else
+  altIdx = 0;
+  for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ++ctuIdx )
   {
-    memset( ctuFlags[COMPONENT_Cb], val, sizeof( uint8_t ) * m_numCTUsInPic );
-    memset( ctuFlags[COMPONENT_Cr], val, sizeof( uint8_t ) * m_numCTUsInPic );
+    ctuAlts[COMPONENT_Cr][ctuIdx] = altIdx;
+    if( (ctuIdx+1) * m_alfParamTemp.numAlternativesChroma >= (altIdx+1)*m_numCTUsInPic )
+      ++altIdx;
   }
 }
 
+int EncAdaptiveLoopFilter::getMaxNumAlternativesChroma( )
+{
+  return std::min<int>( m_numCTUsInPic * 2, m_encCfg->getMaxNumAlfAlternativesChroma() );
+}
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
index d2b02d902520a609957a6f9f0a30f953093df072..d2ceb026fe13f0c41ea4a5ec403b568965120afb 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,62 +41,52 @@
 #include "CommonLib/AdaptiveLoopFilter.h"
 
 #include "CABACWriter.h"
+#include "EncCfg.h"
 
 struct AlfCovariance
 {
+  static constexpr int MaxAlfNumClippingValues = AdaptiveLoopFilter::MaxAlfNumClippingValues;
+  using TE = double[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF];
+  using Ty = double[MAX_NUM_ALF_LUMA_COEFF];
+  using TKE = TE[AdaptiveLoopFilter::MaxAlfNumClippingValues][AdaptiveLoopFilter::MaxAlfNumClippingValues];
+  using TKy = Ty[AdaptiveLoopFilter::MaxAlfNumClippingValues];
+
   int numCoeff;
-  double *y;
-  double **E;
+  int numBins;
+  TKy y;
+  TKE E;
   double pixAcc;
 
   AlfCovariance() {}
   ~AlfCovariance() {}
 
-  void create( int size )
+  void create( int size, int num_bins = MaxAlfNumClippingValues )
   {
     numCoeff = size;
-
-    y = new double[numCoeff];
-    E = new double*[numCoeff];
-
-    for( int i = 0; i < numCoeff; i++ )
-    {
-      E[i] = new double[numCoeff];
-    }
+    numBins = num_bins;
+    std::memset( y, 0, sizeof( y ) );
+    std::memset( E, 0, sizeof( E ) );
   }
 
   void destroy()
   {
-    for( int i = 0; i < numCoeff; i++ )
-    {
-      delete[] E[i];
-      E[i] = nullptr;
-    }
-
-    delete[] E;
-    E = nullptr;
-
-    delete[] y;
-    y = nullptr;
   }
 
-  void reset()
+  void reset( int num_bins = -1 )
   {
+    if ( num_bins > 0 )
+      numBins = num_bins;
     pixAcc = 0;
-    std::memset( y, 0, sizeof( *y ) * numCoeff );
-    for( int i = 0; i < numCoeff; i++ )
-    {
-      std::memset( E[i], 0, sizeof( *E[i] ) * numCoeff );
-    }
+    std::memset( y, 0, sizeof( y ) );
+    std::memset( E, 0, sizeof( E ) );
   }
 
   const AlfCovariance& operator=( const AlfCovariance& src )
   {
-    for( int i = 0; i < numCoeff; i++ )
-    {
-      std::memcpy( E[i], src.E[i], sizeof( *E[i] ) * numCoeff );
-    }
-    std::memcpy( y, src.y, sizeof( *y ) * numCoeff );
+    numCoeff = src.numCoeff;
+    numBins = src.numBins;
+    std::memcpy( E, src.E, sizeof( E ) );
+    std::memcpy( y, src.y, sizeof( y ) );
     pixAcc = src.pixAcc;
 
     return *this;
@@ -104,26 +94,52 @@ struct AlfCovariance
 
   void add( const AlfCovariance& lhs, const AlfCovariance& rhs )
   {
-    for( int j = 0; j < numCoeff; j++ )
+    numCoeff = lhs.numCoeff;
+    numBins = lhs.numBins;
+    for( int b0 = 0; b0 < numBins; b0++ )
     {
-      for( int i = 0; i < numCoeff; i++ )
+      for( int b1 = 0; b1 < numBins; b1++ )
       {
-        E[j][i] = lhs.E[j][i] + rhs.E[j][i];
+        for( int j = 0; j < numCoeff; j++ )
+        {
+          for( int i = 0; i < numCoeff; i++ )
+          {
+            E[b0][b1][j][i] = lhs.E[b0][b1][j][i] + rhs.E[b0][b1][j][i];
+          }
+        }
+      }
+    }
+    for( int b = 0; b < numBins; b++ )
+    {
+      for( int j = 0; j < numCoeff; j++ )
+      {
+        y[b][j] = lhs.y[b][j] + rhs.y[b][j];
       }
-      y[j] = lhs.y[j] + rhs.y[j];
     }
     pixAcc = lhs.pixAcc + rhs.pixAcc;
   }
 
   const AlfCovariance& operator+= ( const AlfCovariance& src )
   {
-    for( int j = 0; j < numCoeff; j++ )
+    for( int b0 = 0; b0 < numBins; b0++ )
     {
-      for( int i = 0; i < numCoeff; i++ )
+      for( int b1 = 0; b1 < numBins; b1++ )
       {
-        E[j][i] += src.E[j][i];
+        for( int j = 0; j < numCoeff; j++ )
+        {
+          for( int i = 0; i < numCoeff; i++ )
+          {
+            E[b0][b1][j][i] += src.E[b0][b1][j][i];
+          }
+        }
+      }
+    }
+    for( int b = 0; b < numBins; b++ )
+    {
+      for( int j = 0; j < numCoeff; j++ )
+      {
+        y[b][j] += src.y[b][j];
       }
-      y[j] += src.y[j];
     }
     pixAcc += src.pixAcc;
 
@@ -132,88 +148,157 @@ struct AlfCovariance
 
   const AlfCovariance& operator-= ( const AlfCovariance& src )
   {
-    for( int j = 0; j < numCoeff; j++ )
+    for( int b0 = 0; b0 < numBins; b0++ )
+    {
+      for( int b1 = 0; b1 < numBins; b1++ )
+      {
+        for( int j = 0; j < numCoeff; j++ )
+        {
+          for( int i = 0; i < numCoeff; i++ )
+          {
+            E[b0][b1][j][i] -= src.E[b0][b1][j][i];
+          }
+        }
+      }
+    }
+    for( int b = 0; b < numBins; b++ )
     {
-      for( int i = 0; i < numCoeff; i++ )
+      for( int j = 0; j < numCoeff; j++ )
       {
-        E[j][i] -= src.E[j][i];
+        y[b][j] -= src.y[b][j];
       }
-      y[j] -= src.y[j];
     }
     pixAcc -= src.pixAcc;
 
     return *this;
   }
+
+  void setEyFromClip(const int* clip, TE _E, Ty _y, int size) const
+  {
+    for (int k=0; k<size; k++)
+    {
+      _y[k] = y[clip[k]][k];
+      for (int l=0; l<size; l++)
+      {
+        _E[k][l] = E[clip[k]][clip[l]][k][l];
+      }
+    }
+  }
+
+  double optimizeFilter(const int* clip, double *f, int size) const
+  {
+    gnsSolveByChol( clip, f, size );
+    return calculateError( clip, f );
+  }
+
+  double optimizeFilter(const AlfFilterShape& alfShape, int* clip, double *f, bool optimize_clip) const;
+  double optimizeFilterClip(const AlfFilterShape& alfShape, int* clip) const
+  {
+    Ty f;
+    return optimizeFilter(alfShape, clip, f, true);
+  }
+
+  double calculateError( const int *clip ) const;
+  double calculateError( const int *clip, const double *coeff ) const { return calculateError(clip, coeff, numCoeff); }
+  double calculateError( const int *clip, const double *coeff, const int numCoeff ) const;
+  double calcErrorForCoeffs( const int *clip, const int *coeff, const int numCoeff, const int bitDepth ) const;
+
+  void getClipMax(const AlfFilterShape& alfShape, int *clip_max) const;
+  void reduceClipCost(const AlfFilterShape& alfShape, int *clip) const;
+
+private:
+  // Cholesky decomposition
+
+  int  gnsSolveByChol( const int *clip, double *x, int numEq ) const;
+  int  gnsSolveByChol( TE LHS, double* rhs, double *x, int numEq ) const;
+  void gnsBacksubstitution( TE R, double* z, int size, double* A ) const;
+  void gnsTransposeBacksubstitution( TE U, double* rhs, double* x, int order ) const;
+  int  gnsCholeskyDec( TE inpMatr, TE outMatr, int numEq ) const;
 };
 
 class EncAdaptiveLoopFilter : public AdaptiveLoopFilter
 {
 public:
-  static constexpr int   m_MAX_SCAN_VAL = 11;
-  static constexpr int   m_MAX_EXP_GOLOMB = 16;
-  int m_alfWSSD;
   inline void           setAlfWSSD(int alfWSSD) { m_alfWSSD = alfWSSD; }
   static std::vector<double>  m_lumaLevelToWeightPLUT;
   inline std::vector<double>& getLumaLevelWeightTable() { return m_lumaLevelToWeightPLUT; }
 
 private:
+  int                    m_alfWSSD;
+  const EncCfg*          m_encCfg;
   AlfCovariance***       m_alfCovariance[MAX_NUM_COMPONENT];          // [compIdx][shapeIdx][ctbAddr][classIdx]
-  AlfCovariance**        m_alfCovarianceFrame[MAX_NUM_CHANNEL_TYPE];   // [CHANNEL][shapeIdx][classIdx]
-  uint8_t*                 m_ctuEnableFlagTmp[MAX_NUM_COMPONENT];
+  AlfCovariance**        m_alfCovarianceFrame[MAX_NUM_CHANNEL_TYPE];   // [CHANNEL][shapeIdx][lumaClassIdx/chromaAltIdx]
+  uint8_t*               m_ctuEnableFlagTmp[MAX_NUM_COMPONENT];
+  uint8_t*               m_ctuEnableFlagTmp2[MAX_NUM_COMPONENT];
+  uint8_t*               m_ctuAlternativeTmp[MAX_NUM_COMPONENT];
 
   //for RDO
-  AlfSliceParam          m_alfSliceParamTemp;
-  AlfCovariance          m_alfCovarianceMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES + 1];
+  AlfParam               m_alfParamTemp;
+  ParameterSetMap<APS>*  m_apsMap;
+  AlfCovariance          m_alfCovarianceMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES + 2];
+  int                    m_alfClipMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF];
   CABACWriter*           m_CABACEstimator;
   CtxCache*              m_CtxCache;
   double                 m_lambda[MAX_NUM_COMPONENT];
-  const double           FracBitsScale = 1.0 / double( 1 << SCALE_BITS );
 
-  int*                   m_filterCoeffQuant;
-  int**                  m_filterCoeffSet;
+  int**                  m_filterCoeffSet; // [lumaClassIdx/chromaAltIdx][coeffIdx]
+  int**                  m_filterClippSet; // [lumaClassIdx/chromaAltIdx][coeffIdx]
   int**                  m_diffFilterCoeff;
-  int                    m_kMinTab[MAX_NUM_ALF_LUMA_COEFF];
-  int                    m_bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB];
   short                  m_filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES];
+  unsigned               m_bitsNewFilter[MAX_NUM_CHANNEL_TYPE];
+  int&                   m_apsIdStart;
+  double                 *m_ctbDistortionFixedFilter;
+  double                 *m_ctbDistortionUnfilter[MAX_NUM_COMPONENT];
+  std::vector<short>     m_alfCtbFilterSetIndexTmp;
+  AlfParam               m_alfParamTempNL;
+  int                    m_clipDefaultEnc[MAX_NUM_ALF_LUMA_COEFF];
+  int                    m_filterTmp[MAX_NUM_ALF_LUMA_COEFF];
+  int                    m_clipTmp[MAX_NUM_ALF_LUMA_COEFF];
 
 public:
-  EncAdaptiveLoopFilter();
+  EncAdaptiveLoopFilter( int& apsIdStart );
   virtual ~EncAdaptiveLoopFilter() {}
-
-  void ALFProcess( CodingStructure& cs, const double *lambdas,
+  void  initDistortion();
+  std::vector<int> getAvaiApsIdsLuma(CodingStructure& cs, int &newApsId);
+  void  alfEncoderCtb(CodingStructure& cs, AlfParam& alfParamNewFilters
 #if ENABLE_QPA
-                   const double lambdaChromaWeight,
+    , const double lambdaChromaWeight
 #endif
-                   AlfSliceParam& alfSliceParam );
-  void initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice );
-  void create( const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] );
+  );
+  void   alfReconstructor(CodingStructure& cs, const PelUnitBuf& recExtBuf);
+  void ALFProcess(CodingStructure& cs, const double *lambdas
+#if ENABLE_QPA
+    , const double lambdaChromaWeight
+#endif
+  );
+  void initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice, ParameterSetMap<APS>* apsMap );
+  void create( const EncCfg* encCfg, const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] );
   void destroy();
-  static int lengthGolomb( int coeffVal, int k );
-  static int getGolombKMin( AlfFilterShape& alfShape, const int numFilters, int kMinTab[MAX_NUM_ALF_LUMA_COEFF], int bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB] );
+  static int lengthGolomb( int coeffVal, int k, bool signed_coeff=true );
+  void setApsIdStart( int i) { m_apsIdStart = i; }
 
 private:
-  void   alfEncoder( CodingStructure& cs, AlfSliceParam& alfSliceParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel
+  void   alfEncoder( CodingStructure& cs, AlfParam& alfParam, const PelUnitBuf& orgUnitBuf, const PelUnitBuf& recExtBuf, const PelUnitBuf& recBuf, const ChannelType channel
 #if ENABLE_QPA
                    , const double lambdaChromaWeight = 0.0
 #endif
                    );
 
-  void   copyAlfSliceParam( AlfSliceParam& alfSliceParamDst, AlfSliceParam& alfSliceParamSrc, ChannelType channel );
-  double mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int& uiCoeffBits );
+  void   copyAlfParam( AlfParam& alfParamDst, AlfParam& alfParamSrc, ChannelType channel );
+  double mergeFiltersAndCost( AlfParam& alfParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], int& uiCoeffBits );
 
   void   getFrameStats( ChannelType channel, int iShapeIdx );
-  void   getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, const int numClasses );
-  void   deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv );
-  void   getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area );
-  void   calcCovariance( int *ELocal, const Pel *rec, const int stride, const int *filterPattern, const int halfFilterLength, const int transposeIdx );
-  void   mergeClasses( AlfCovariance* cov, AlfCovariance* covMerged, const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] );
-
-  double calculateError( AlfCovariance& cov );
-  double calcErrorForCoeffs( double **E, double *y, int *coeff, const int numCoeff, const int bitDepth );
-  double getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits );
-  double deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] );
-  int    deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters, int& predMode );
-  double deriveCoeffQuant( int *filterCoeffQuant, double **E, double *y, const int numCoeff, std::vector<int>& weights, const int bitDepth, const bool bChroma = false );
+  void   getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, uint8_t* ctbAltIdx, const int numClasses, int altIdx );
+  void   deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv, CodingStructure& cs );
+  void   getBlkStats(AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& areaDst, const CompArea& area, const ChannelType channel, int vbCTUHeight, int vbPos);
+  void   calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance);
+  void   mergeClasses(const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES]);
+
+
+  double getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits, bool onlyFilterCost = false );
+  double deriveFilterCoeffs(AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], AlfParam& alfParam);
+  int    deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters );
+  double deriveCoeffQuant( int *filterClipp, int *filterCoeffQuant, const AlfCovariance& cov, const AlfFilterShape& shape, const int bitDepth, const bool optimizeClip );
   double deriveCtbAlfEnableFlags( CodingStructure& cs, const int iShapeIdx, ChannelType channel,
 #if ENABLE_QPA
                                   const double chromaWeight,
@@ -221,32 +306,29 @@ private:
                                   const int numClasses, const int numCoeff, double& distUnfilter );
   void   roundFiltCoeff( int *filterCoeffQuant, double *filterCoeff, const int numCoeff, const int factor );
 
-  double getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, const int numFilters );
-  int    lengthTruncatedUnary( int symbol, int maxSymbol );
+  double getDistCoeffForce0( bool* codedVarBins, double errorForce0CoeffTab[MAX_NUM_ALF_CLASSES][2], int* bitsVarBin, int zeroBitsVarBin, const int numFilters);
   int    lengthUvlc( int uiCode );
-  int    getNonFilterCoeffRate( AlfSliceParam& alfSliceParam );
-  int    getTBlength( int uiSymbol, const int uiMaxSymbol );
+  int    getNonFilterCoeffRate( AlfParam& alfParam );
 
   int    getCostFilterCoeffForce0( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters, bool* codedVarBins );
   int    getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters );
-  int    lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab );
+  int    getCostFilterClipp( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters );
+  int    lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff );
   double getDistForce0( AlfFilterShape& alfShape, const int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], bool* codedVarBins );
-  int    getCoeffRate( AlfSliceParam& alfSliceParam, bool isChroma );
+  int    getChromaCoeffRate( AlfParam& alfParam, int altIdx );
 
   double getUnfilteredDistortion( AlfCovariance* cov, ChannelType channel );
   double getUnfilteredDistortion( AlfCovariance* cov, const int numClasses );
   double getFilteredDistortion( AlfCovariance* cov, const int numClasses, const int numFiltersMinus1, const int numCoeff );
 
-  // Cholesky decomposition
-  int  gnsSolveByChol( double **LHS, double *rhs, double *x, int numEq );
-  void gnsBacksubstitution( double R[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* z, int size, double* A );
-  void gnsTransposeBacksubstitution( double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* rhs, double* x, int order );
-  int  gnsCholeskyDec( double **inpMatr, double outMatr[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], int numEq );
-
-  void setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, bool val );
-  void setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags );
+  void setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, bool val );
+  void setEnableFlag( AlfParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags );
   void setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val );
   void copyCtuEnableFlag( uint8_t** ctuFlagsDst, uint8_t** ctuFlagsSrc, ChannelType channel );
+  void initCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT] );
+  void setCtuAlternativeChroma( uint8_t* ctuAlts[MAX_NUM_COMPONENT], uint8_t val );
+  void copyCtuAlternativeChroma( uint8_t* ctuAltsDst[MAX_NUM_COMPONENT], uint8_t* ctuAltsSrc[MAX_NUM_COMPONENT] );
+  int getMaxNumAlternativesChroma( );
 };
 
 
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 41d8a29e4af1b8f928b33957767b91d53da9c68e..f27bb3c1001942819569517a46d851507e6be885 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -47,6 +47,10 @@
 
 #include "CommonLib/Unit.h"
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+#include "HDRLib/inc/DistortionMetric.H"
+#endif
+
 struct GOPEntry
 {
   int m_POC;
@@ -64,16 +68,15 @@ struct GOPEntry
   int m_betaOffsetDiv2;
   int m_temporalId;
   bool m_refPic;
-  int m_numRefPicsActive;
   int8_t m_sliceType;
-  int m_numRefPics;
-  int m_referencePics[MAX_NUM_REF_PICS];
-  int m_usedByCurrPic[MAX_NUM_REF_PICS];
-  int m_interRPSPrediction;
-  int m_deltaRPS;
-  int m_numRefIdc;
-  int m_refIdc[MAX_NUM_REF_PICS+1];
+  int m_numRefPicsActive0;
+  int m_numRefPics0;
+  int m_deltaRefPics0[MAX_NUM_REF_PICS];
+  int m_numRefPicsActive1;
+  int m_numRefPics1;
+  int m_deltaRefPics1[MAX_NUM_REF_PICS];
   bool m_isEncoded;
+  bool m_ltrp_in_slice_header_flag;
   GOPEntry()
   : m_POC(-1)
   , m_QPOffset(0)
@@ -90,21 +93,47 @@ struct GOPEntry
   , m_betaOffsetDiv2(0)
   , m_temporalId(0)
   , m_refPic(false)
-  , m_numRefPicsActive(0)
   , m_sliceType('P')
-  , m_numRefPics(0)
-  , m_interRPSPrediction(false)
-  , m_deltaRPS(0)
-  , m_numRefIdc(0)
-  , m_isEncoded(false)
+    , m_numRefPicsActive0(0)
+    , m_numRefPics0(0)
+    , m_numRefPicsActive1(0)
+    , m_numRefPics1(0)
+    , m_isEncoded(false)
+    , m_ltrp_in_slice_header_flag(false)
   {
-    ::memset( m_referencePics, 0, sizeof(m_referencePics) );
-    ::memset( m_usedByCurrPic, 0, sizeof(m_usedByCurrPic) );
-    ::memset( m_refIdc,        0, sizeof(m_refIdc) );
+    ::memset(m_deltaRefPics0, 0, sizeof(m_deltaRefPics0));
+    ::memset(m_deltaRefPics1, 0, sizeof(m_deltaRefPics1));
+  }
+};
+
+struct RPLEntry
+{
+  int m_POC;
+  int m_temporalId;
+  bool m_refPic;
+  int m_numRefPicsActive;
+  int8_t m_sliceType;
+  int m_numRefPics;
+  int m_deltaRefPics[MAX_NUM_REF_PICS];
+  bool m_isEncoded;
+  bool m_ltrp_in_slice_header_flag;
+  RPLEntry()
+    : m_POC(-1)
+    , m_temporalId(0)
+    , m_refPic(false)
+    , m_numRefPicsActive(0)
+    , m_sliceType('P')
+    , m_numRefPics(0)
+    , m_isEncoded(false)
+    , m_ltrp_in_slice_header_flag(false)
+  {
+    ::memset(m_deltaRefPics, 0, sizeof(m_deltaRefPics));
   }
 };
 
 std::istringstream &operator>>(std::istringstream &in, GOPEntry &entry);     //input
+
+
 //! \ingroup EncoderLib
 //! \{
 
@@ -139,53 +168,69 @@ protected:
   uint32_t  m_maxChromaFormatConstraintIdc;
   bool      m_bFrameConstraintFlag;
   bool      m_bNoQtbttDualTreeIntraConstraintFlag;
+  bool      m_noPartitionConstraintsOverrideConstraintFlag;
   bool      m_bNoSaoConstraintFlag;
   bool      m_bNoAlfConstraintFlag;
-  bool      m_bNoPcmConstraintFlag;
   bool      m_bNoRefWraparoundConstraintFlag;
   bool      m_bNoTemporalMvpConstraintFlag;
   bool      m_bNoSbtmvpConstraintFlag;
   bool      m_bNoAmvrConstraintFlag;
   bool      m_bNoBdofConstraintFlag;
+  bool      m_noDmvrConstraintFlag;
   bool      m_bNoCclmConstraintFlag;
   bool      m_bNoMtsConstraintFlag;
+  bool      m_noSbtConstraintFlag;
   bool      m_bNoAffineMotionConstraintFlag;
-  bool      m_bNoGbiConstraintFlag;
-  bool      m_bNoMhIntraConstraintFlag;
+  bool      m_bNoBcwConstraintFlag;
+  bool      m_noIbcConstraintFlag;
+  bool      m_bNoCiipConstraintFlag;
+  bool      m_noFPelMmvdConstraintFlag;
   bool      m_bNoTriangleConstraintFlag;
   bool      m_bNoLadfConstraintFlag;
-  bool      m_bNoCurrPicRefConstraintFlag;
+  bool      m_noTransformSkipConstraintFlag;
+  bool      m_noBDPCMConstraintFlag;
+  bool      m_noJointCbCrConstraintFlag;
   bool      m_bNoQpDeltaConstraintFlag;
   bool      m_bNoDepQuantConstraintFlag;
   bool      m_bNoSignDataHidingConstraintFlag;
+  bool      m_noTrailConstraintFlag;
+  bool      m_noStsaConstraintFlag;
+  bool      m_noRaslConstraintFlag;
+  bool      m_noRadlConstraintFlag;
+  bool      m_noIdrConstraintFlag;
+  bool      m_noCraConstraintFlag;
+  bool      m_noGdrConstraintFlag;
+  bool      m_noApsConstraintFlag;
 
   /* profile & level */
   Profile::Name m_profile;
   Level::Tier   m_levelTier;
   Level::Name   m_level;
+  std::vector<uint32_t>      m_subProfile;
+  uint8_t       m_numSubProfile;
   bool m_progressiveSourceFlag;
   bool m_interlacedSourceFlag;
   bool m_nonPackedConstraintFlag;
   bool m_frameOnlyConstraintFlag;
-  uint32_t              m_bitDepthConstraintValue;
-  ChromaFormat      m_chromaFormatConstraintValue;
-  bool              m_intraConstraintFlag;
-  bool              m_onePictureOnlyConstraintFlag;
-  bool              m_lowerBitRateConstraintFlag;
+  bool m_intraConstraintFlag;
 
   //====== Coding Structure ========
-  uint32_t      m_uiIntraPeriod;                    // TODO: make this an int - it can be -1!
+  int       m_uiIntraPeriod;                        // needs to be signed to allow '-1' for no intra period
   uint32_t      m_uiDecodingRefreshType;            ///< the type of decoding refresh employed for the random access.
-#if JCTVC_Y0038_PARAMS
   bool      m_rewriteParamSets;
-#endif
+  bool      m_idrRefParamList;
   int       m_iGOPSize;
+  RPLEntry  m_RPLList0[MAX_GOP];
+  RPLEntry  m_RPLList1[MAX_GOP];
+  int       m_numRPLList0;
+  int       m_numRPLList1;
   GOPEntry  m_GOPList[MAX_GOP];
-  int       m_extraRPSs;
   int       m_maxDecPicBuffering[MAX_TLAYER];
   int       m_numReorderPics[MAX_TLAYER];
+  int       m_drapPeriod;
 
   int       m_iQP;                              //  if (AdaptiveQP == OFF)
+  ChromaQpMappingTableParams m_chromaQpMappingTableParams;
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
   int       m_intraQPOffset;                    ///< QP offset for intra slice (integer)
   int       m_lambdaFromQPEnable;               ///< enable lambda derivation from QP
@@ -199,11 +244,23 @@ protected:
 
   int       m_maxTempLayer;                      ///< Max temporal layer
   unsigned  m_CTUSize;
+  bool                  m_subPicPresentFlag;
+  unsigned              m_numSubPics;
+  uint32_t              m_subPicCtuTopLeftX[MAX_NUM_SUB_PICS];
+  uint32_t              m_subPicCtuTopLeftY[MAX_NUM_SUB_PICS];
+  uint32_t              m_subPicWidth[MAX_NUM_SUB_PICS];
+  uint32_t              m_subPicHeight[MAX_NUM_SUB_PICS];
+  uint32_t              m_subPicTreatedAsPicFlag[MAX_NUM_SUB_PICS];
+  uint32_t              m_loopFilterAcrossSubpicEnabledFlag[MAX_NUM_SUB_PICS];
+  bool                  m_subPicIdPresentFlag;
+  bool                  m_subPicIdSignallingPresentFlag;
+  unsigned              m_subPicIdLen;
+  uint32_t              m_subPicId[MAX_NUM_SUB_PICS];
   bool      m_useSplitConsOverride;
   unsigned  m_uiMinQT[3]; //0: I slice; 1: P/B slice, 2: I slice chroma
-  unsigned  m_uiMaxBTDepth;
-  unsigned  m_uiMaxBTDepthI;
-  unsigned  m_uiMaxBTDepthIChroma;
+  unsigned  m_uiMaxMTTHierarchyDepth;
+  unsigned  m_uiMaxMTTHierarchyDepthI;
+  unsigned  m_uiMaxMTTHierarchyDepthIChroma;
   bool      m_dualITree;
   unsigned  m_maxCUWidth;
   unsigned  m_maxCUHeight;
@@ -211,21 +268,28 @@ protected:
   unsigned  m_log2DiffMaxMinCodingBlockSize;
 
   int       m_LMChroma;
-  bool      m_cclmCollocatedChromaFlag;
+  bool      m_horCollocatedChromaFlag;
+  bool      m_verCollocatedChromaFlag;
   int       m_IntraMTS;
   int       m_InterMTS;
-  int       m_IntraMTSMaxCand;
-  int       m_InterMTSMaxCand;
+  int       m_MTSIntraMaxCand;
+  int       m_MTSInterMaxCand;
   int       m_ImplicitMTS;
   bool      m_SBT;                                ///< Sub-Block Transform for inter blocks
+  int       m_SBTFast64WidthTh;                   ///< Enable size-64 SBT in encoder RDO check for HD and above sequences
+
+  bool      m_LFNST;
+  bool      m_useFastLFNST;
   int       m_SubPuMvpMode;
   bool      m_Affine;
   bool      m_AffineType;
+  bool      m_PROF;
   bool      m_BIO;
 
+  bool      m_SMVD;
   bool      m_compositeRefEnabled;        //composite reference
-  bool      m_GBi;
-  bool      m_GBiFast;
+  bool      m_bcw;
+  bool      m_BcwFast;
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   bool      m_LadfEnabled;
   int       m_LadfNumIntervals;
@@ -233,13 +297,19 @@ protected:
   int       m_LadfIntervalLowerBound[MAX_LADF_INTERVALS];
 #endif
 
-  bool      m_MHIntra;
+  bool      m_ciip;
   bool      m_Triangle;
   bool      m_allowDisFracMMVD;
   bool      m_AffineAmvr;
   bool      m_HashME;
   bool      m_AffineAmvrEncOpt;
   bool      m_DMVR;
+  bool      m_MMVD;
+  int       m_MmvdDisNum;
+  bool      m_rgbFormat;
+  bool      m_useColorTrans;
+  unsigned  m_PLTMode;
+  bool      m_JointCbCrMode;
   unsigned  m_IBCMode;
   unsigned  m_IBCLocalSearchRangeX;
   unsigned  m_IBCLocalSearchRangeY;
@@ -252,10 +322,16 @@ protected:
   unsigned  m_wrapAroundOffset;
 
   // ADD_NEW_TOOL : (encoder lib) add tool enabling flags and associated parameters here
-  bool      m_lumaReshapeEnable;
+  bool      m_loopFilterAcrossVirtualBoundariesDisabledFlag;
+  unsigned  m_numVerVirtualBoundaries;
+  unsigned  m_numHorVirtualBoundaries;
+  unsigned  m_virtualBoundariesPosX[3];
+  unsigned  m_virtualBoundariesPosY[3];
+  bool      m_lmcsEnabled;
   unsigned  m_reshapeSignalType;
   unsigned  m_intraCMD;
   ReshapeCW m_reshapeCW;
+  int       m_CSoffset;
   bool      m_encDbOpt;
   bool      m_useFastLCTU;
   bool      m_useFastMrg;
@@ -263,10 +339,14 @@ protected:
   bool      m_useAMaxBT;
   bool      m_e0023FastEnc;
   bool      m_contentBasedFastQtbt;
-
-#if MAX_TB_SIZE_SIGNALLING
+  bool      m_useNonLinearAlfLuma;
+  bool      m_useNonLinearAlfChroma;
+  unsigned  m_maxNumAlfAlternativesChroma;
+  bool      m_MRL;
+  bool      m_MIP;
+  bool      m_useFastMIP;
+  int       m_fastLocalDualTreeMode;
   uint32_t  m_log2MaxTbSize;
-#endif
 
   //====== Loop/Deblock Filter ========
   bool      m_bLoopFilterDisable;
@@ -285,9 +365,7 @@ protected:
   int       m_maxNumOffsetsPerPic;
   bool      m_saoCtuBoundary;
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   bool      m_saoGreedyMergeEnc;
-#endif
   //====== Motion search ========
   bool      m_bDisableIntraPUsInInterSlices;
   MESearchMethod m_motionEstimationSearchMethod;
@@ -307,6 +385,8 @@ protected:
   int       m_chromaCrQpOffset;                 //  Chroma Cr Qp Offset (0:default)
   int       m_chromaCbQpOffsetDualTree;         //  Chroma Cb QP Offset for dual tree
   int       m_chromaCrQpOffsetDualTree;         //  Chroma Cr Qp Offset for dual tree
+  int       m_chromaCbCrQpOffset;               //  QP Offset for the joint Cb-Cr mode
+  int       m_chromaCbCrQpOffsetDualTree;       //  QP Offset for the joint Cb-Cr mode in dual tree
 #if ER_CHROMA_QP_WCG_PPS
   WCGChromaQPControl m_wcgChromaQpControl;                    ///< Wide-colour-gamut chroma QP control.
 #endif
@@ -347,6 +427,8 @@ protected:
   uint32_t      m_log2SaoOffsetScale[MAX_NUM_CHANNEL_TYPE];
   bool      m_useTransformSkip;
   bool      m_useTransformSkipFast;
+  bool      m_useChromaTS;
+  int       m_useBDPCM;
   uint32_t      m_log2MaxTransformSkipBlockSize;
   bool      m_transformSkipRotationEnabledFlag;
   bool      m_transformSkipContextEnabledFlag;
@@ -359,122 +441,177 @@ protected:
   int*      m_aidQP;
   uint32_t      m_uiDeltaQpRD;
   bool      m_bFastDeltaQP;
+  bool      m_ISP;
   bool      m_useFastISP;
 
-  bool      m_bUseConstrainedIntraPred;
   bool      m_bFastUDIUseMPMEnabled;
   bool      m_bFastMEForGenBLowDelayEnabled;
   bool      m_bUseBLambdaForNonKeyLowDelayPictures;
-  bool      m_usePCM;
-  int       m_PCMBitDepth[MAX_NUM_CHANNEL_TYPE];
-  uint32_t      m_pcmLog2MaxSize;
-  uint32_t      m_uiPCMLog2MinSize;
-  //====== Slice ========
-  SliceConstraint m_sliceMode;
-  int       m_sliceArgument;
-  //====== Dependent Slice ========
-  SliceConstraint m_sliceSegmentMode;
-  int       m_sliceSegmentArgument;
-  bool      m_bLFCrossSliceBoundaryFlag;
-
-  bool      m_bPCMInputBitDepthFlag;
-  bool      m_bPCMFilterDisableFlag;
-  bool      m_intraSmoothingDisabledFlag;
-#if HEVC_TILES_WPP
-  bool      m_loopFilterAcrossTilesEnabledFlag;
-  bool      m_tileUniformSpacingFlag;
-  int       m_iNumColumnsMinus1;
-  int       m_iNumRowsMinus1;
-  std::vector<int> m_tileColumnWidth;
-  std::vector<int> m_tileRowHeight;
+  bool      m_gopBasedTemporalFilterEnabled;
+  bool      m_noPicPartitionFlag;                             ///< no picture partitioning flag (single tile, single slice)
+  std::vector<uint32_t> m_tileColumnWidth;                    ///< tile column widths in units of CTUs (last column width will be repeated uniformly to cover any remaining picture width)
+  std::vector<uint32_t> m_tileRowHeight;                      ///< tile row heights in units of CTUs (last row height will be repeated uniformly to cover any remaining picture height)
+  bool      m_rectSliceFlag;                                  ///< indicates if using rectangular or raster-scan slices
+  uint32_t  m_numSlicesInPic;                                 ///< number of rectangular slices in the picture (raster-scan slice specified at slice level)
+  bool      m_tileIdxDeltaPresentFlag;                        ///< rectangular slice tile index delta present flag
+  std::vector<RectSlice> m_rectSlices;                        ///< list of rectanglar slice syntax parameters
+  std::vector<uint32_t> m_rasterSliceSize;                    ///< raster-scan slice sizes in units of tiles
+  bool      m_bLFCrossTileBoundaryFlag;                       ///< 1: filter across tile boundaries  0: do not filter across tile boundaries
+  bool      m_bLFCrossSliceBoundaryFlag;                      ///< 1: filter across slice boundaries 0: do not filter across slice boundaries
 
+  bool      m_intraSmoothingDisabledFlag;
+  //====== Sub-picture and Slices ========
+  bool      m_singleSlicePerSubPicFlag;
   bool      m_entropyCodingSyncEnabledFlag;
-#endif
+
 
   HashType  m_decodedPictureHashSEIType;
   bool      m_bufferingPeriodSEIEnabled;
   bool      m_pictureTimingSEIEnabled;
-  bool      m_recoveryPointSEIEnabled;
-  bool      m_toneMappingInfoSEIEnabled;
-  int       m_toneMapId;
-  bool      m_toneMapCancelFlag;
-  bool      m_toneMapPersistenceFlag;
-  int       m_codedDataBitDepth;
-  int       m_targetBitDepth;
-  int       m_modelId;
-  int       m_minValue;
-  int       m_maxValue;
-  int       m_sigmoidMidpoint;
-  int       m_sigmoidWidth;
-  int       m_numPivots;
-  int       m_cameraIsoSpeedIdc;
-  int       m_cameraIsoSpeedValue;
-  int       m_exposureIndexIdc;
-  int       m_exposureIndexValue;
-  bool      m_exposureCompensationValueSignFlag;
-  int       m_exposureCompensationValueNumerator;
-  int       m_exposureCompensationValueDenomIdc;
-  int       m_refScreenLuminanceWhite;
-  int       m_extendedRangeWhiteLevel;
-  int       m_nominalBlackLevelLumaCodeValue;
-  int       m_nominalWhiteLevelLumaCodeValue;
-  int       m_extendedWhiteLevelLumaCodeValue;
-  int*      m_startOfCodedInterval;
-  int*      m_codedPivotValue;
-  int*      m_targetPivotValue;
+  bool      m_frameFieldInfoSEIEnabled;
+  bool      m_dependentRAPIndicationSEIEnabled;
   bool      m_framePackingSEIEnabled;
   int       m_framePackingSEIType;
   int       m_framePackingSEIId;
   int       m_framePackingSEIQuincunx;
   int       m_framePackingSEIInterpretation;
-  bool      m_segmentedRectFramePackingSEIEnabled;
-  bool      m_segmentedRectFramePackingSEICancel;
-  int       m_segmentedRectFramePackingSEIType;
-  bool      m_segmentedRectFramePackingSEIPersistence;
-  int       m_displayOrientationSEIAngle;
-  bool      m_temporalLevel0IndexSEIEnabled;
-  bool      m_gradualDecodingRefreshInfoEnabled;
-  int       m_noDisplaySEITLayer;
+  bool      m_bpDeltasGOPStructure;
   bool      m_decodingUnitInfoSEIEnabled;
+#if HEVC_SEI
   bool      m_SOPDescriptionSEIEnabled;
   bool      m_scalableNestingSEIEnabled;
   bool      m_tmctsSEIEnabled;
+#endif
+  bool      m_erpSEIEnabled;
+  bool      m_erpSEICancelFlag;
+  bool      m_erpSEIPersistenceFlag;
+  bool      m_erpSEIGuardBandFlag;
+  uint32_t  m_erpSEIGuardBandType;
+  uint32_t  m_erpSEILeftGuardBandWidth;
+  uint32_t  m_erpSEIRightGuardBandWidth;
+  bool      m_sphereRotationSEIEnabled;
+  bool      m_sphereRotationSEICancelFlag;
+  bool      m_sphereRotationSEIPersistenceFlag;
+  int       m_sphereRotationSEIYaw;
+  int       m_sphereRotationSEIPitch;
+  int       m_sphereRotationSEIRoll;
+  bool      m_omniViewportSEIEnabled;
+  uint32_t  m_omniViewportSEIId;
+  bool      m_omniViewportSEICancelFlag;
+  bool      m_omniViewportSEIPersistenceFlag;
+  uint32_t  m_omniViewportSEICntMinus1;
+  std::vector<int>      m_omniViewportSEIAzimuthCentre;
+  std::vector<int>      m_omniViewportSEIElevationCentre;
+  std::vector<int>      m_omniViewportSEITiltCentre;
+  std::vector<uint32_t> m_omniViewportSEIHorRange;
+  std::vector<uint32_t> m_omniViewportSEIVerRange;
+  bool                  m_rwpSEIEnabled;
+  bool                  m_rwpSEIRwpCancelFlag;
+  bool                  m_rwpSEIRwpPersistenceFlag;
+  bool                  m_rwpSEIConstituentPictureMatchingFlag;
+  int                   m_rwpSEINumPackedRegions;
+  int                   m_rwpSEIProjPictureWidth;
+  int                   m_rwpSEIProjPictureHeight;
+  int                   m_rwpSEIPackedPictureWidth;
+  int                   m_rwpSEIPackedPictureHeight;
+  std::vector<uint8_t>  m_rwpSEIRwpTransformType;
+  std::vector<bool>     m_rwpSEIRwpGuardBandFlag;
+  std::vector<uint32_t> m_rwpSEIProjRegionWidth;
+  std::vector<uint32_t> m_rwpSEIProjRegionHeight;
+  std::vector<uint32_t> m_rwpSEIRwpSEIProjRegionTop;
+  std::vector<uint32_t> m_rwpSEIProjRegionLeft;
+  std::vector<uint16_t> m_rwpSEIPackedRegionWidth;
+  std::vector<uint16_t> m_rwpSEIPackedRegionHeight;
+  std::vector<uint16_t> m_rwpSEIPackedRegionTop;
+  std::vector<uint16_t> m_rwpSEIPackedRegionLeft;
+  std::vector<uint8_t>  m_rwpSEIRwpLeftGuardBandWidth;
+  std::vector<uint8_t>  m_rwpSEIRwpRightGuardBandWidth;
+  std::vector<uint8_t>  m_rwpSEIRwpTopGuardBandHeight;
+  std::vector<uint8_t>  m_rwpSEIRwpBottomGuardBandHeight;
+  std::vector<bool>     m_rwpSEIRwpGuardBandNotUsedForPredFlag;
+  std::vector<uint8_t>  m_rwpSEIRwpGuardBandType;
+  bool                 m_gcmpSEIEnabled;
+  bool                 m_gcmpSEICancelFlag;
+  bool                 m_gcmpSEIPersistenceFlag;
+  uint8_t              m_gcmpSEIPackingType;
+  uint8_t              m_gcmpSEIMappingFunctionType;
+  std::vector<uint8_t> m_gcmpSEIFaceIndex;
+  std::vector<uint8_t> m_gcmpSEIFaceRotation;
+  std::vector<double>  m_gcmpSEIFunctionCoeffU;
+  std::vector<bool>    m_gcmpSEIFunctionUAffectedByVFlag;
+  std::vector<double>  m_gcmpSEIFunctionCoeffV;
+  std::vector<bool>    m_gcmpSEIFunctionVAffectedByUFlag;
+  bool                 m_gcmpSEIGuardBandFlag;
+  bool                 m_gcmpSEIGuardBandBoundaryType;
+  uint8_t              m_gcmpSEIGuardBandSamplesMinus1;
+  bool m_subpicureLevelInfoSEIEnabled;
+  bool                  m_sampleAspectRatioInfoSEIEnabled;
+  bool                  m_sariCancelFlag;
+  bool                  m_sariPersistenceFlag;
+  int                   m_sariAspectRatioIdc;
+  int                   m_sariSarWidth;
+  int                   m_sariSarHeight;
   bool      m_MCTSEncConstraint;
-  bool      m_timeCodeSEIEnabled;
-  int       m_timeCodeSEINumTs;
-  SEITimeSet   m_timeSetArray[MAX_TIMECODE_SEI_SETS];
-  bool      m_kneeSEIEnabled;
-  int       m_kneeSEIId;
-  bool      m_kneeSEICancelFlag;
-  bool      m_kneeSEIPersistenceFlag;
-  int       m_kneeSEIInputDrange;
-  int       m_kneeSEIInputDispLuminance;
-  int       m_kneeSEIOutputDrange;
-  int       m_kneeSEIOutputDispLuminance;
-  int       m_kneeSEINumKneePointsMinus1;
-  int*      m_kneeSEIInputKneePoint;
-  int*      m_kneeSEIOutputKneePoint;
-  std::string m_colourRemapSEIFileRoot;          ///< SEI Colour Remapping File (initialized from external file)
   SEIMasteringDisplay m_masteringDisplay;
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   bool      m_alternativeTransferCharacteristicsSEIEnabled;
   uint8_t     m_preferredTransferCharacteristics;
 #endif
-  bool      m_greenMetadataInfoSEIEnabled;
-  uint8_t     m_greenMetadataType;
-  uint8_t     m_xsdMetricType;
+  // film grain characterstics sei
+  bool      m_fgcSEIEnabled;
+  bool      m_fgcSEICancelFlag;
+  bool      m_fgcSEIPersistenceFlag;
+  uint8_t   m_fgcSEIModelID;
+  bool      m_fgcSEISepColourDescPresentFlag;
+  uint8_t   m_fgcSEIBlendingModeID;
+  uint8_t   m_fgcSEILog2ScaleFactor;
+  bool      m_fgcSEICompModelPresent[MAX_NUM_COMPONENT];
+// cll SEI
+  bool      m_cllSEIEnabled;
+  uint16_t  m_cllSEIMaxContentLevel;
+  uint16_t  m_cllSEIMaxPicAvgLevel;
+// ave sei
+  bool      m_aveSEIEnabled;
+  uint32_t  m_aveSEIAmbientIlluminance;
+  uint16_t  m_aveSEIAmbientLightX;
+  uint16_t  m_aveSEIAmbientLightY;
+// ccv sei
+  bool      m_ccvSEIEnabled;
+  bool      m_ccvSEICancelFlag;
+  bool      m_ccvSEIPersistenceFlag;
+  bool      m_ccvSEIPrimariesPresentFlag;
+  bool      m_ccvSEIMinLuminanceValuePresentFlag;
+  bool      m_ccvSEIMaxLuminanceValuePresentFlag;
+  bool      m_ccvSEIAvgLuminanceValuePresentFlag;
+  double    m_ccvSEIPrimariesX[MAX_NUM_COMPONENT];
+  double    m_ccvSEIPrimariesY[MAX_NUM_COMPONENT];
+  double    m_ccvSEIMinLuminanceValue;
+  double    m_ccvSEIMaxLuminanceValue;
+  double    m_ccvSEIAvgLuminanceValue;
   //====== Weighted Prediction ========
   bool      m_useWeightedPred;       //< Use of Weighting Prediction (P_SLICE)
   bool      m_useWeightedBiPred;    //< Use of Bi-directional Weighting Prediction (B_SLICE)
   WeightedPredictionMethod m_weightedPredictionMethod;
-  uint32_t      m_log2ParallelMergeLevelMinus2;       ///< Parallel merge estimation region
   uint32_t      m_maxNumMergeCand;                    ///< Maximum number of merge candidates
   uint32_t      m_maxNumAffineMergeCand;              ///< Maximum number of affine merge candidates
-#if HEVC_USE_SCALING_LISTS
+  uint32_t      m_maxNumTriangleCand;
+  uint32_t      m_maxNumIBCMergeCand;                 ///< Max number of IBC merge candidates
   ScalingListMode m_useScalingListId;             ///< Using quantization matrix i.e. 0=off, 1=default, 2=file.
   std::string m_scalingListFileName;              ///< quantization matrix file name
-#endif
+  bool      m_sliceLevelRpl;                      ///< code reference picture lists in slice headers rather than picture header
+  bool      m_sliceLevelDblk;                     ///< code deblocking filter parameters in slice headers rather than picture header
+  bool      m_sliceLevelSao;                      ///< code SAO parameters in slice headers rather than picture header
+  bool      m_sliceLevelAlf;                      ///< code ALF parameters in slice headers rather than picture header
+  bool      m_disableScalingMatrixForLfnstBlks;
   int       m_TMVPModeId;
+  bool      m_constantSliceHeaderParamsEnabledFlag;
+  int       m_PPSDepQuantEnabledIdc;
+  int       m_PPSRefPicListSPSIdc0;
+  int       m_PPSRefPicListSPSIdc1;
+  int       m_PPSMvdL1ZeroIdc;
+  int       m_PPSCollocatedFromL0Idc;
+  uint32_t  m_PPSSixMinusMaxNumMergeCandPlus1;
+  uint32_t  m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1;
   bool      m_DepQuantEnabledFlag;
   bool      m_SignDataHidingEnabledFlag;
   bool      m_RCEnableRateControl;
@@ -489,29 +626,26 @@ protected:
   uint32_t      m_RCCpbSize;
   double    m_RCInitialCpbFullness;
 #endif
-  bool      m_TransquantBypassEnabledFlag;                    ///< transquant_bypass_enabled_flag setting in PPS.
-  bool      m_CUTransquantBypassFlagForce;                    ///< if transquant_bypass_enabled_flag, then, if true, all CU transquant bypass flags will be set to true.
-
   CostMode  m_costMode;                                       ///< The cost function to use, primarily when considering lossless coding.
 
-#if HEVC_VPS
   VPS       m_cVPS;
-#endif
+  DPS       m_dps;
+  bool      m_decodingParameterSetEnabled;                   ///< enable decoding parameter set
   bool      m_recalculateQPAccordingToLambda;                 ///< recalculate QP value according to the lambda value
+#if HEVC_SEI
   int       m_activeParameterSetsSEIEnabled;                  ///< enable active parameter set SEI message
+#endif
+  bool      m_hrdParametersPresentFlag;                       ///< enable generation of HRD parameters
   bool      m_vuiParametersPresentFlag;                       ///< enable generation of VUI parameters
   bool      m_aspectRatioInfoPresentFlag;                     ///< Signals whether aspect_ratio_idc is present
+#if HEVC_SEI
   bool      m_chromaResamplingFilterHintEnabled;              ///< Signals whether chroma sampling filter hint data is present
   int       m_chromaResamplingHorFilterIdc;                   ///< Specifies the Index of filter to use
   int       m_chromaResamplingVerFilterIdc;                   ///< Specifies the Index of filter to use
+#endif
   int       m_aspectRatioIdc;                                 ///< aspect_ratio_idc
   int       m_sarWidth;                                       ///< horizontal size of the sample aspect ratio
   int       m_sarHeight;                                      ///< vertical size of the sample aspect ratio
-  bool      m_overscanInfoPresentFlag;                        ///< Signals whether overscan_appropriate_flag is present
-  bool      m_overscanAppropriateFlag;                        ///< Indicates whether conformant decoded pictures are suitable for display using overscan
-  bool      m_videoSignalTypePresentFlag;                     ///< Signals whether video_format, video_full_range_flag, and colour_description_present_flag are present
-  int       m_videoFormat;                                    ///< Indicates representation of pictures
-  bool      m_videoFullRangeFlag;                             ///< Indicates the black level and range of luma and chroma signals
   bool      m_colourDescriptionPresentFlag;                   ///< Signals whether colour_primaries, transfer_characteristics and matrix_coefficients are present
   int       m_colourPrimaries;                                ///< Indicates chromaticity coordinates of the source primaries
   int       m_transferCharacteristics;                        ///< Indicates the opto-electronic transfer characteristics of the source
@@ -519,25 +653,11 @@ protected:
   bool      m_chromaLocInfoPresentFlag;                       ///< Signals whether chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field are present
   int       m_chromaSampleLocTypeTopField;                    ///< Specifies the location of chroma samples for top field
   int       m_chromaSampleLocTypeBottomField;                 ///< Specifies the location of chroma samples for bottom field
-  bool      m_neutralChromaIndicationFlag;                    ///< Indicates that the value of all decoded chroma samples is equal to 1<<(BitDepthCr-1)
-  Window    m_defaultDisplayWindow;                           ///< Represents the default display window parameters
-  bool      m_frameFieldInfoPresentFlag;                      ///< Indicates that pic_struct and other field coding related values are present in picture timing SEI messages
-  bool      m_pocProportionalToTimingFlag;                    ///< Indicates that the POC value is proportional to the output time w.r.t. first picture in CVS
-  int       m_numTicksPocDiffOneMinus1;                       ///< Number of ticks minus 1 that for a POC difference of one
-  bool      m_bitstreamRestrictionFlag;                       ///< Signals whether bitstream restriction parameters are present
-#if HEVC_TILES_WPP
-  bool      m_tilesFixedStructureFlag;                        ///< Indicates that each active picture parameter set has the same values of the syntax elements related to tiles
-#endif
-  bool      m_motionVectorsOverPicBoundariesFlag;             ///< Indicates that no samples outside the picture boundaries are used for inter prediction
-  int       m_minSpatialSegmentationIdc;                      ///< Indicates the maximum size of the spatial segments in the pictures in the coded video sequence
-  int       m_maxBytesPerPicDenom;                            ///< Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units associated with any coded picture
-  int       m_maxBitsPerMinCuDenom;                           ///< Indicates an upper bound for the number of bits of coding_unit() data
-  int       m_log2MaxMvLengthHorizontal;                      ///< Indicate the maximum absolute value of a decoded horizontal MV component in quarter-pel luma units
-  int       m_log2MaxMvLengthVertical;                        ///< Indicate the maximum absolute value of a decoded vertical MV component in quarter-pel luma units
-
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  bool      m_useStrongIntraSmoothing;                        ///< enable the use of strong intra smoothing (bi_linear interpolation) for 32x32 blocks when reference samples are flat.
-#endif
+  int       m_chromaSampleLocType;                            ///< Specifies the location of chroma samples for progressive content
+  bool      m_overscanInfoPresentFlag;                        ///< Signals whether overscan_appropriate_flag is present
+  bool      m_overscanAppropriateFlag;                        ///< Indicates whether conformant decoded pictures are suitable for display using overscan
+  bool      m_videoFullRangeFlag;                             ///< Indicates the black level and range of luma and chroma signals
+
   bool      m_bEfficientFieldIRAPEnabled;                     ///< enable to code fields in a specific, potentially more efficient, order.
   bool      m_bHarmonizeGopFirstFieldCoupleEnabled;
 
@@ -561,23 +681,32 @@ protected:
   int         m_numSplitThreads;
   bool        m_forceSingleSplitThread;
 #endif
-#if ENABLE_WPP_PARALLELISM
-  int         m_numWppThreads;
-  int         m_numWppExtraLines;
-  bool        m_ensureWppBitEqual;
-#endif
 
   bool        m_alf;                                          ///< Adaptive Loop Filter
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  double                       m_whitePointDeltaE[hdrtoolslib::NB_REF_WHITE];
+  double                       m_maxSampleValue;
+  hdrtoolslib::SampleRange     m_sampleRange;
+  hdrtoolslib::ColorPrimaries  m_colorPrimaries;
+  bool                         m_enableTFunctionLUT;
+  hdrtoolslib::ChromaLocation  m_chromaLocation[2];
+  int                          m_chromaUPFilter;
+  int                          m_cropOffsetLeft;
+  int                          m_cropOffsetTop;
+  int                          m_cropOffsetRight;
+  int                          m_cropOffsetBottom;
+  bool                         m_calculateHdrMetrics;
+#endif
+  double      m_scalingRatioHor;
+  double      m_scalingRatioVer;
+  bool        m_rprEnabled;
+  int         m_switchPocPeriod;
+  int         m_upscaledOutput;
+  int         m_numRefLayers[MAX_VPS_LAYERS];
 
 public:
   EncCfg()
- #if HEVC_TILES_WPP
-  : m_tileColumnWidth()
-  , m_tileRowHeight()
-#endif
   {
-    m_PCMBitDepth[CHANNEL_TYPE_LUMA]=8;
-    m_PCMBitDepth[CHANNEL_TYPE_CHROMA]=8;
   }
 
   virtual ~EncCfg()
@@ -585,7 +714,8 @@ public:
 
   void setProfile(Profile::Name profile) { m_profile = profile; }
   void setLevel(Level::Tier tier, Level::Name level) { m_levelTier = tier; m_level = level; }
-
+  void setNumSubProfile( uint8_t numSubProfile) { m_numSubProfile = numSubProfile; m_subProfile.resize(m_numSubProfile); }
+  void setSubProfile( int i, uint32_t subProfile) { m_subProfile[i] = subProfile; }
   bool      getIntraOnlyConstraintFlag() const { return m_bIntraOnlyConstraintFlag; }
   void      setIntraOnlyConstraintFlag(bool bVal) { m_bIntraOnlyConstraintFlag = bVal; }
   uint32_t  getMaxBitDepthConstraintIdc() const { return m_maxBitDepthConstraintIdc; }
@@ -596,12 +726,12 @@ public:
   void      setFrameConstraintFlag(bool bVal) { m_bFrameConstraintFlag = bVal; }
   bool      getNoQtbttDualTreeIntraConstraintFlag() const { return m_bNoQtbttDualTreeIntraConstraintFlag; }
   void      setNoQtbttDualTreeIntraConstraintFlag(bool bVal) { m_bNoQtbttDualTreeIntraConstraintFlag = bVal; }
+  bool      getNoPartitionConstraintsOverrideConstraintFlag() const { return m_noPartitionConstraintsOverrideConstraintFlag; }
+  void      setNoPartitionConstraintsOverrideConstraintFlag(bool bVal) { m_noPartitionConstraintsOverrideConstraintFlag = bVal; }
   bool      getNoSaoConstraintFlag() const { return m_bNoSaoConstraintFlag; }
   void      setNoSaoConstraintFlag(bool bVal) { m_bNoSaoConstraintFlag = bVal; }
   bool      getNoAlfConstraintFlag() const { return m_bNoAlfConstraintFlag; }
   void      setNoAlfConstraintFlag(bool bVal) { m_bNoAlfConstraintFlag = bVal; }
-  bool      getNoPcmConstraintFlag() const { return m_bNoPcmConstraintFlag; }
-  void      setNoPcmConstraintFlag(bool bVal) { m_bNoPcmConstraintFlag = bVal; }
   bool      getNoRefWraparoundConstraintFlag() const { return m_bNoRefWraparoundConstraintFlag; }
   void      setNoRefWraparoundConstraintFlag(bool bVal) { m_bNoRefWraparoundConstraintFlag = bVal; }
   bool      getNoTemporalMvpConstraintFlag() const { return m_bNoTemporalMvpConstraintFlag; }
@@ -612,28 +742,57 @@ public:
   void      setNoAmvrConstraintFlag(bool bVal) { m_bNoAmvrConstraintFlag = bVal; }
   bool      getNoBdofConstraintFlag() const { return m_bNoBdofConstraintFlag; }
   void      setNoBdofConstraintFlag(bool bVal) { m_bNoBdofConstraintFlag = bVal; }
+  bool      getNoDmvrConstraintFlag() const { return m_noDmvrConstraintFlag; }
+  void      setNoDmvrConstraintFlag(bool bVal) { m_noDmvrConstraintFlag = bVal; }
   bool      getNoCclmConstraintFlag() const { return m_bNoCclmConstraintFlag; }
   void      setNoCclmConstraintFlag(bool bVal) { m_bNoCclmConstraintFlag = bVal; }
   bool      getNoMtsConstraintFlag() const { return m_bNoMtsConstraintFlag; }
   void      setNoMtsConstraintFlag(bool bVal) { m_bNoMtsConstraintFlag = bVal; }
+  bool      getNoSbtConstraintFlag() const { return m_noSbtConstraintFlag; }
+  void      setNoSbtConstraintFlag(bool bVal) { m_noSbtConstraintFlag = bVal; }
   bool      getNoAffineMotionConstraintFlag() const { return m_bNoAffineMotionConstraintFlag; }
   void      setNoAffineMotionConstraintFlag(bool bVal) { m_bNoAffineMotionConstraintFlag = bVal; }
-  bool      getNoGbiConstraintFlag() const { return m_bNoGbiConstraintFlag; }
-  void      setNoGbiConstraintFlag(bool bVal) { m_bNoGbiConstraintFlag = bVal; }
-  bool      getNoMhIntraConstraintFlag() const { return m_bNoMhIntraConstraintFlag; }
-  void      setNoMhIntraConstraintFlag(bool bVal) { m_bNoMhIntraConstraintFlag = bVal; }
+  bool      getNoBcwConstraintFlag() const { return m_bNoBcwConstraintFlag; }
+  void      setNoBcwConstraintFlag(bool bVal) { m_bNoBcwConstraintFlag = bVal; }
+  bool      getNoIbcConstraintFlag() const { return m_noIbcConstraintFlag; }
+  void      setNoIbcConstraintFlag(bool bVal) { m_noIbcConstraintFlag = bVal; }
+  bool      getNoCiipConstraintFlag() const { return m_bNoCiipConstraintFlag; }
+  void      setNoCiipConstraintFlag(bool bVal) { m_bNoCiipConstraintFlag = bVal; }
+  bool      getNoFPelMmvdConstraintFlag() const { return m_noFPelMmvdConstraintFlag; }
+  void      setNoFPelMmvdConstraintFlag(bool bVal) { m_noFPelMmvdConstraintFlag = bVal; }
   bool      getNoTriangleConstraintFlag() const { return m_bNoTriangleConstraintFlag; }
   void      setNoTriangleConstraintFlag(bool bVal) { m_bNoTriangleConstraintFlag = bVal; }
   bool      getNoLadfConstraintFlag() const { return m_bNoLadfConstraintFlag; }
   void      setNoLadfConstraintFlag(bool bVal) { m_bNoLadfConstraintFlag = bVal; }
-  bool      getNoCurrPicRefConstraintFlag() const { return m_bNoCurrPicRefConstraintFlag; }
-  void      setNoCurrPicRefConstraintFlag(bool bVal) { m_bNoCurrPicRefConstraintFlag = bVal; }
+  bool      getNoTransformSkipConstraintFlag() const { return m_noTransformSkipConstraintFlag; }
+  void      setNoTransformSkipConstraintFlag(bool bVal) { m_noTransformSkipConstraintFlag = bVal; }
+  bool      getNoBDPCMConstraintFlag() const { return m_noBDPCMConstraintFlag; }
+  void      setNoBDPCMConstraintFlag(bool bVal) { m_noBDPCMConstraintFlag = bVal; }
+  bool      getNoJointCbCrConstraintFlag() const { return m_noJointCbCrConstraintFlag; }
+  void      setNoJointCbCrConstraintFlag(bool bVal) { m_noJointCbCrConstraintFlag = bVal; }
   bool      getNoQpDeltaConstraintFlag() const { return m_bNoQpDeltaConstraintFlag; }
   void      setNoQpDeltaConstraintFlag(bool bVal) { m_bNoQpDeltaConstraintFlag = bVal; }
   bool      getNoDepQuantConstraintFlag() const { return m_bNoDepQuantConstraintFlag; }
   void      setNoDepQuantConstraintFlag(bool bVal) { m_bNoDepQuantConstraintFlag = bVal; }
   bool      getNoSignDataHidingConstraintFlag() const { return m_bNoSignDataHidingConstraintFlag; }
   void      setNoSignDataHidingConstraintFlag(bool bVal) { m_bNoSignDataHidingConstraintFlag = bVal; }
+  bool      getNoTrailConstraintFlag() const { return m_noTrailConstraintFlag; }
+  void      setNoTrailConstraintFlag(bool bVal) { m_noTrailConstraintFlag = bVal; }
+  bool      getNoStsaConstraintFlag() const { return m_noStsaConstraintFlag; }
+  void      setNoStsaConstraintFlag(bool bVal) { m_noStsaConstraintFlag = bVal; }
+  bool      getNoRaslConstraintFlag() const { return m_noRaslConstraintFlag; }
+  void      setNoRaslConstraintFlag(bool bVal) { m_noRaslConstraintFlag = bVal; }
+  bool      getNoRadlConstraintFlag() const { return m_noRadlConstraintFlag; }
+  void      setNoRadlConstraintFlag(bool bVal) { m_noRadlConstraintFlag = bVal; }
+  bool      getNoIdrConstraintFlag() const { return m_noIdrConstraintFlag; }
+  void      setNoIdrConstraintFlag(bool bVal) { m_noIdrConstraintFlag = bVal; }
+  bool      getNoCraConstraintFlag() const { return m_noCraConstraintFlag; }
+  void      setNoCraConstraintFlag(bool bVal) { m_noCraConstraintFlag = bVal; }
+  bool      getNoGdrConstraintFlag() const { return m_noGdrConstraintFlag; }
+  void      setNoGdrConstraintFlag(bool bVal) { m_noGdrConstraintFlag = bVal; }
+  bool      getNoApsConstraintFlag() const { return m_noApsConstraintFlag; }
+  void      setNoApsConstraintFlag(bool bVal) { m_noApsConstraintFlag = bVal; }
+
 
   void      setFrameRate                    ( int   i )      { m_iFrameRate = i; }
   void      setFrameSkip                    ( uint32_t  i )      { m_FrameSkip = i; }
@@ -662,24 +821,46 @@ public:
   void      setCabacZeroWordPaddingEnabled(bool value)       { m_cabacZeroWordPaddingEnabled = value; }
 
   //====== Coding Structure ========
-  void      setIntraPeriod                  ( int   i )      { m_uiIntraPeriod = (uint32_t)i; }
+  void      setIntraPeriod                  (int   i)        { m_uiIntraPeriod = i;                   }
   void      setDecodingRefreshType          ( int   i )      { m_uiDecodingRefreshType = (uint32_t)i; }
-#if JCTVC_Y0038_PARAMS
   void      setReWriteParamSets             ( bool  b )      { m_rewriteParamSets = b; }
-#endif
+  void      setIDRRefParamListPresent       ( bool  b )      { m_idrRefParamList  = b; }
+  bool      getIDRRefParamListPresent       ()        const  { return m_idrRefParamList; }
   void      setGOPSize                      ( int   i )      { m_iGOPSize = i; }
-  void      setGopList                      ( const GOPEntry GOPList[MAX_GOP] ) {  for ( int i = 0; i < MAX_GOP; i++ ) m_GOPList[i] = GOPList[i]; }
-  void      setExtraRPSs                    ( int   i )      { m_extraRPSs = i; }
+  void      setGopList(const GOPEntry GOPList[MAX_GOP]) { for (int i = 0; i < MAX_GOP; i++) m_GOPList[i] = GOPList[i]; }
   const GOPEntry &getGOPEntry               ( int   i ) const { return m_GOPList[i]; }
-  void      setEncodedFlag                  ( int  i, bool value )  { m_GOPList[i].m_isEncoded = value; }
+  void      setRPLList0(const RPLEntry RPLList[MAX_GOP])
+  {
+    m_numRPLList0 = 0;
+    for (int i = 0; i < MAX_GOP; i++)
+    {
+      m_RPLList0[i] = RPLList[i];
+      if (m_RPLList0[i].m_POC != -1) m_numRPLList0++;
+    }
+  }
+  void      setRPLList1(const RPLEntry RPLList[MAX_GOP])
+  {
+    m_numRPLList1 = 0;
+    for (int i = 0; i < MAX_GOP; i++)
+    {
+      m_RPLList1[i] = RPLList[i];
+      if (m_RPLList1[i].m_POC != -1) m_numRPLList1++;
+    }
+  }
+  const RPLEntry &getRPLEntry(int L01, int idx) const { return (L01 == 0) ? m_RPLList0[idx] : m_RPLList1[idx]; }
+  int       getRPLCandidateSize(int L01) const { return  (L01 == 0) ? m_numRPLList0 : m_numRPLList1; }
+  void      setEncodedFlag(uint32_t  i, bool value) { m_RPLList0[i].m_isEncoded = value; m_RPLList1[i].m_isEncoded = value; m_GOPList[i].m_isEncoded = value; }
   void      setMaxDecPicBuffering           ( uint32_t u, uint32_t tlayer ) { m_maxDecPicBuffering[tlayer] = u;    }
   void      setNumReorderPics               ( int  i, uint32_t tlayer ) { m_numReorderPics[tlayer] = i;    }
+  void      setDrapPeriod                   (int drapPeriod) { m_drapPeriod = drapPeriod; }
 
   void      setBaseQP                       ( int   i )      { m_iQP = i; }
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
   void      setIntraQPOffset                ( int   i )         { m_intraQPOffset = i; }
   void      setLambdaFromQPEnable           ( bool  b )         { m_lambdaFromQPEnable = b; }
 #endif
+  void      setChromaQpMappingTableParams   (const ChromaQpMappingTableParams &params) { m_chromaQpMappingTableParams = params; }
+
   void      setPad                          ( int*  iPad                   )      { for ( int i = 0; i < 2; i++ ) m_aiPad[i] = iPad[i]; }
 
   int       getMaxRefPicNum                 ()                              { return m_iMaxRefPicNum;           }
@@ -690,21 +871,52 @@ public:
 
   void      setCTUSize                      ( unsigned  u )      { m_CTUSize  = u; }
   void      setMinQTSizes                   ( unsigned* minQT)   { m_uiMinQT[0] = minQT[0]; m_uiMinQT[1] = minQT[1]; m_uiMinQT[2] = minQT[2]; }
-  void      setMaxBTDepth                   ( unsigned uiMaxBTDepth, unsigned uiMaxBTDepthI, unsigned uiMaxBTDepthIChroma )
-                                                             { m_uiMaxBTDepth = uiMaxBTDepth; m_uiMaxBTDepthI = uiMaxBTDepthI; m_uiMaxBTDepthIChroma = uiMaxBTDepthIChroma; }
-  unsigned  getMaxBTDepth                   ()         const { return m_uiMaxBTDepth; }
-  unsigned  getMaxBTDepthI                  ()         const { return m_uiMaxBTDepthI; }
-  unsigned  getMaxBTDepthIChroma            ()         const { return m_uiMaxBTDepthIChroma; }
+  void      setMaxMTTHierarchyDepth         ( unsigned uiMaxMTTHierarchyDepth, unsigned uiMaxMTTHierarchyDepthI, unsigned uiMaxMTTHierarchyDepthIChroma )
+                                                             { m_uiMaxMTTHierarchyDepth = uiMaxMTTHierarchyDepth; m_uiMaxMTTHierarchyDepthI = uiMaxMTTHierarchyDepthI; m_uiMaxMTTHierarchyDepthIChroma = uiMaxMTTHierarchyDepthIChroma; }
+  unsigned  getMaxMTTHierarchyDepth         ()         const { return m_uiMaxMTTHierarchyDepth; }
+  unsigned  getMaxMTTHierarchyDepthI        ()         const { return m_uiMaxMTTHierarchyDepthI; }
+  unsigned  getMaxMTTHierarchyDepthIChroma  ()         const { return m_uiMaxMTTHierarchyDepthIChroma; }
   int       getCTUSize                      ()         const { return m_CTUSize; }
   void      setUseSplitConsOverride         (bool  n)        { m_useSplitConsOverride = n; }
   bool      getUseSplitConsOverride         ()         const { return m_useSplitConsOverride; }
   void      setDualITree                    ( bool b )       { m_dualITree = b; }
   bool      getDualITree                    ()         const { return m_dualITree; }
+  void      setSubPicPresentFlag                        (bool b)                    { m_subPicPresentFlag = b; }
+  void      setNumSubPics                               (uint32_t u)                { m_numSubPics = u; }
+  void      setSubPicCtuTopLeftX                        (uint32_t u, int i)         { m_subPicCtuTopLeftX[i] = u; }
+  void      setSubPicCtuTopLeftY                        (uint32_t u, int i)         { m_subPicCtuTopLeftY[i] = u; }
+  void      setSubPicWidth                              (uint32_t u, int i)         { m_subPicWidth[i] = u; }
+  void      setSubPicHeight                             (uint32_t u, int i)         { m_subPicHeight[i] = u; }
+  void      setSubPicTreatedAsPicFlag                   (bool b, int i)             { m_subPicTreatedAsPicFlag[i] = b; }
+  void      setLoopFilterAcrossSubpicEnabledFlag        (uint32_t u, int i)         { m_loopFilterAcrossSubpicEnabledFlag[i] = u; }
+  void      setSubPicIdPresentFlag                      (bool b)                    { m_subPicIdPresentFlag = b; }
+  void      setSubPicIdSignallingPresentFlag            (bool b)                    { m_subPicIdSignallingPresentFlag = b; }
+  void      setSubPicIdLen                              (uint32_t u)                { m_subPicIdLen = u; }
+  void      setSubPicId                                 (uint32_t b, int i)         { m_subPicId[i] = b; }
+
+  bool      getSubPicPresentFlag                        ()                          { return m_subPicPresentFlag; }
+  uint32_t  getNumSubPics                               ()                          { return m_numSubPics; }
+  uint32_t  getSubPicCtuTopLeftX                        (int i)                     { return m_subPicCtuTopLeftX[i]; }
+  uint32_t  getSubPicCtuTopLeftY                        (int i)                     { return m_subPicCtuTopLeftY[i]; }
+  uint32_t  getSubPicWidth                              (int i)                     { return m_subPicWidth[i]; }
+  uint32_t  getSubPicHeight                             (int i)                     { return m_subPicHeight[i]; }
+  bool      getSubPicTreatedAsPicFlag                   (int i)                     { return m_subPicTreatedAsPicFlag[i]; }
+  uint32_t  getLoopFilterAcrossSubpicEnabledFlag        (int i)                     { return m_loopFilterAcrossSubpicEnabledFlag[i]; }
+  bool      getSubPicIdPresentFlag                      ()                          { return m_subPicIdPresentFlag; }
+  bool      getSubPicIdSignallingPresentFlag            ()                          { return m_subPicIdSignallingPresentFlag; }
+  uint32_t  getSubPicIdLen                              ()                          { return m_subPicIdLen; }
+  uint32_t  getSubPicId                                 (int i)                     { return m_subPicId[i]; }
+  void      setLFNST                        ( bool b )       { m_LFNST = b; }
+  bool      getLFNST()                                 const { return m_LFNST; }
+  void      setUseFastLFNST                 ( bool b )       { m_useFastLFNST = b; }
+  bool      getUseFastLFNST()                          const { return m_useFastLFNST; }
 
   void      setUseLMChroma                  ( int n )        { m_LMChroma = n; }
   int       getUseLMChroma()                           const { return m_LMChroma; }
-  void      setCclmCollocatedChromaFlag     ( bool b )       { m_cclmCollocatedChromaFlag = b; }
-  bool      getCclmCollocatedChromaFlag     ()         const { return m_cclmCollocatedChromaFlag; }
+  void      setHorCollocatedChromaFlag( bool b )             { m_horCollocatedChromaFlag = b; }
+  bool      getHorCollocatedChromaFlag()               const { return m_horCollocatedChromaFlag; }
+  void      setVerCollocatedChromaFlag( bool b )             { m_verCollocatedChromaFlag = b; }
+  bool      getVerCollocatedChromaFlag()               const { return m_verCollocatedChromaFlag; }
 
   void      setSubPuMvpMode(int n)          { m_SubPuMvpMode = n; }
   bool      getSubPuMvpMode()         const { return m_SubPuMvpMode; }
@@ -713,13 +925,15 @@ public:
   bool      getAffine                       ()         const { return m_Affine; }
   void      setAffineType( bool b )                          { m_AffineType = b; }
   bool      getAffineType()                            const { return m_AffineType; }
+  void      setPROF                         (bool b)         { m_PROF = b; }
+  bool      getPROF                         ()         const { return m_PROF; }
   void      setBIO(bool b)                                   { m_BIO = b; }
   bool      getBIO()                                   const { return m_BIO; }
 
-  void      setIntraMTSMaxCand              ( unsigned u )   { m_IntraMTSMaxCand = u; }
-  unsigned  getIntraMTSMaxCand              ()         const { return m_IntraMTSMaxCand; }
-  void      setInterMTSMaxCand              ( unsigned u )   { m_InterMTSMaxCand = u; }
-  unsigned  getInterMTSMaxCand              ()         const { return m_InterMTSMaxCand; }
+  void      setMTSIntraMaxCand              ( unsigned u )   { m_MTSIntraMaxCand = u; }
+  unsigned  getMTSIntraMaxCand              ()         const { return m_MTSIntraMaxCand; }
+  void      setMTSInterMaxCand              ( unsigned u )   { m_MTSInterMaxCand = u; }
+  unsigned  getMTSInterMaxCand              ()         const { return m_MTSInterMaxCand; }
   void      setIntraMTS                     ( bool b )       { m_IntraMTS = b; }
   bool      getIntraMTS                     ()         const { return m_IntraMTS; }
   void      setInterMTS                     ( bool b )       { m_InterMTS = b; }
@@ -729,12 +943,17 @@ public:
   void      setUseSBT                       ( bool b )       { m_SBT = b; }
   bool      getUseSBT                       ()         const { return m_SBT; }
 
+  void      setSBTFast64WidthTh             ( int  b )       { m_SBTFast64WidthTh = b; }
+  int       getSBTFast64WidthTh             ()         const { return m_SBTFast64WidthTh; }
+
   void      setUseCompositeRef              (bool b)         { m_compositeRefEnabled = b; }
   bool      getUseCompositeRef              ()         const { return m_compositeRefEnabled; }
-  void      setUseGBi                       ( bool b )       { m_GBi = b; }
-  bool      getUseGBi                       ()         const { return m_GBi; }
-  void      setUseGBiFast                   ( uint32_t b )   { m_GBiFast = b; }
-  bool      getUseGBiFast                   ()         const { return m_GBiFast; }
+  void      setUseSMVD                      ( bool b )       { m_SMVD = b; }
+  bool      getUseSMVD                      ()         const { return m_SMVD; }
+  void      setUseBcw                       ( bool b )       { m_bcw = b; }
+  bool      getUseBcw                       ()         const { return m_bcw; }
+  void      setUseBcwFast                   ( uint32_t b )   { m_BcwFast = b; }
+  bool      getUseBcwFast                   ()         const { return m_BcwFast; }
 
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   void      setUseLadf                      ( bool b )       { m_LadfEnabled = b; }
@@ -748,8 +967,8 @@ public:
 
 #endif
 
-  void      setUseMHIntra                   ( bool b )       { m_MHIntra = b; }
-  bool      getUseMHIntra                   ()         const { return m_MHIntra; }
+  void      setUseCiip                   ( bool b )       { m_ciip = b; }
+  bool      getUseCiip                   ()         const { return m_ciip; }
   void      setUseTriangle                  ( bool b )       { m_Triangle = b; }
   bool      getUseTriangle                  ()         const { return m_Triangle; }
   void      setAllowDisFracMMVD             ( bool b )       { m_allowDisFracMMVD = b;    }
@@ -762,7 +981,18 @@ public:
   bool      getUseAffineAmvrEncOpt          ()         const { return m_AffineAmvrEncOpt; }
   void      setDMVR                      ( bool b )       { m_DMVR = b; }
   bool      getDMVR                      ()         const { return m_DMVR; }
-
+  void      setMMVD                         (bool b)         { m_MMVD = b;    }
+  bool      getMMVD                         ()         const { return m_MMVD; }
+  void      setMmvdDisNum                   ( int b )        { m_MmvdDisNum = b; }
+  int       getMmvdDisNum                   ()         const { return m_MmvdDisNum; }
+  void      setRGBFormatFlag(bool value) { m_rgbFormat = value; }
+  bool      getRGBFormatFlag()                         const { return m_rgbFormat; }
+  void      setUseColorTrans(bool value) { m_useColorTrans = value; }
+  bool      getUseColorTrans()                         const { return m_useColorTrans; }
+  void      setPLTMode                   ( unsigned n)    { m_PLTMode = n; }
+  unsigned  getPLTMode                   ()         const { return m_PLTMode; }
+  void      setJointCbCr                    ( bool b )       { m_JointCbCrMode = b; }
+  bool      getJointCbCr                    ()         const { return m_JointCbCrMode; }
   void      setIBCMode                      (unsigned n)     { m_IBCMode = n; }
   unsigned  getIBCMode                      ()         const { return m_IBCMode; }
   void      setIBCLocalSearchRangeX         (unsigned n)     { m_IBCLocalSearchRangeX = n; }
@@ -784,15 +1014,28 @@ public:
   unsigned  getWrapAroundOffset             ()         const { return m_wrapAroundOffset; }
 
   // ADD_NEW_TOOL : (encoder lib) add access functions here
-
-  void      setReshaper                     ( bool b )                   { m_lumaReshapeEnable = b; }
-  bool      getReshaper                     () const                     { return m_lumaReshapeEnable; }
+  void      setLoopFilterAcrossVirtualBoundariesDisabledFlag( bool b ) { m_loopFilterAcrossVirtualBoundariesDisabledFlag = b; }
+  bool      getLoopFilterAcrossVirtualBoundariesDisabledFlag() const { return m_loopFilterAcrossVirtualBoundariesDisabledFlag; }
+  void      setNumVerVirtualBoundaries      ( unsigned u )   { m_numVerVirtualBoundaries = u; }
+  unsigned  getNumVerVirtualBoundaries      ()         const { return m_numVerVirtualBoundaries; }
+  void      setNumHorVirtualBoundaries      ( unsigned u )   { m_numHorVirtualBoundaries = u; }
+  unsigned  getNumHorVirtualBoundaries      ()         const { return m_numHorVirtualBoundaries; }
+  void      setVirtualBoundariesPosX        ( unsigned u, unsigned idx ) { m_virtualBoundariesPosX[idx] = u; }
+  unsigned  getVirtualBoundariesPosX        ( unsigned idx ) const { return m_virtualBoundariesPosX[idx]; }
+  void      setVirtualBoundariesPosY        ( unsigned u, unsigned idx ) { m_virtualBoundariesPosY[idx] = u; }
+  unsigned  getVirtualBoundariesPosY        ( unsigned idx ) const { return m_virtualBoundariesPosY[idx]; }
+  void      setUseISP                       ( bool b )       { m_ISP = b; }
+  bool      getUseISP                       ()         const { return m_ISP; }
+  void      setLmcs                         ( bool b )                   { m_lmcsEnabled = b; }
+  bool      getLmcs                         () const                     { return m_lmcsEnabled; }
   void      setReshapeSignalType            ( uint32_t signalType )      { m_reshapeSignalType = signalType; }
   uint32_t  getReshapeSignalType            () const                     { return m_reshapeSignalType; }
   void      setReshapeIntraCMD              (uint32_t intraCMD)          { m_intraCMD = intraCMD; }
   uint32_t  getReshapeIntraCMD              ()                           { return m_intraCMD; }
   void      setReshapeCW                    (const ReshapeCW &reshapeCW) { m_reshapeCW = reshapeCW; }
   const ReshapeCW& getReshapeCW             ()                           { return m_reshapeCW; }
+  void      setReshapeCSoffset              (int CSoffset)          { m_CSoffset = CSoffset; }
+  int       getReshapeCSoffset              ()                      { return m_CSoffset; }
   void      setMaxCUWidth                   ( uint32_t  u )      { m_maxCUWidth  = u; }
   uint32_t      getMaxCUWidth                   () const         { return m_maxCUWidth; }
   void      setMaxCUHeight                  ( uint32_t  u )      { m_maxCUHeight = u; }
@@ -816,10 +1059,22 @@ public:
   bool      getUseE0023FastEnc              () const         { return m_e0023FastEnc; }
   void      setUseContentBasedFastQtbt      ( bool b )       { m_contentBasedFastQtbt = b; }
   bool      getUseContentBasedFastQtbt      () const         { return m_contentBasedFastQtbt; }
+  void      setUseNonLinearAlfLuma          ( bool b )       { m_useNonLinearAlfLuma = b; }
+  bool      getUseNonLinearAlfLuma          () const         { return m_useNonLinearAlfLuma; }
+  void      setUseNonLinearAlfChroma        ( bool b )       { m_useNonLinearAlfChroma = b; }
+  bool      getUseNonLinearAlfChroma        () const         { return m_useNonLinearAlfChroma; }
+  void      setMaxNumAlfAlternativesChroma  ( uint32_t u )   { m_maxNumAlfAlternativesChroma = u; }
+  uint32_t  getMaxNumAlfAlternativesChroma  () const         { return m_maxNumAlfAlternativesChroma; }
+  void      setUseMRL                       ( bool b )       { m_MRL = b; }
+  bool      getUseMRL                       () const         { return m_MRL; }
+  void      setUseMIP                       ( bool b )       { m_MIP = b; }
+  bool      getUseMIP                       () const         { return m_MIP; }
+  void      setUseFastMIP                   ( bool b )       { m_useFastMIP = b; }
+  bool      getUseFastMIP                   () const         { return m_useFastMIP; }
+  void     setFastLocalDualTreeMode         ( int i )        { m_fastLocalDualTreeMode = i; }
+  int      getFastLocalDualTreeMode         () const         { return m_fastLocalDualTreeMode; }
 
-#if MAX_TB_SIZE_SIGNALLING
   void      setLog2MaxTbSize                ( uint32_t  u )   { m_log2MaxTbSize = u; }
-#endif
 
   //====== Loop/Deblock Filter ========
   void      setLoopFilterDisable            ( bool  b )      { m_bLoopFilterDisable       = b; }
@@ -853,6 +1108,9 @@ public:
   void      setChromaCrQpOffsetDualTree     ( int   i )      { m_chromaCrQpOffsetDualTree = i; }
   int       getChromaCbQpOffsetDualTree     ()         const { return m_chromaCbQpOffsetDualTree; }
   int       getChromaCrQpOffsetDualTree     ()         const { return m_chromaCrQpOffsetDualTree; }
+  void      setChromaCbCrQpOffset           ( int   i )      { m_chromaCbCrQpOffset = i; }
+  void      setChromaCbCrQpOffsetDualTree   ( int   i )      { m_chromaCbCrQpOffsetDualTree = i; }
+  int       getChromaCbCrQpOffsetDualTree   ()         const { return m_chromaCbCrQpOffsetDualTree; }
 #if ER_CHROMA_QP_WCG_PPS
   void      setWCGChromaQpControl           ( const WCGChromaQPControl &ctrl )     { m_wcgChromaQpControl = ctrl; }
   const WCGChromaQPControl &getWCGChromaQPControl () const { return m_wcgChromaQpControl; }
@@ -907,12 +1165,11 @@ public:
   //==== Coding Structure ========
   uint32_t      getIntraPeriod                  () const     { return  m_uiIntraPeriod; }
   uint32_t      getDecodingRefreshType          () const     { return  m_uiDecodingRefreshType; }
-#if JCTVC_Y0038_PARAMS
   bool      getReWriteParamSets             ()  const    { return m_rewriteParamSets; }
-#endif
   int       getGOPSize                      () const     { return  m_iGOPSize; }
   int       getMaxDecPicBuffering           (uint32_t tlayer) { return m_maxDecPicBuffering[tlayer]; }
   int       getNumReorderPics               (uint32_t tlayer) { return m_numReorderPics[tlayer]; }
+  int       getDrapPeriod                   ()     { return m_drapPeriod; }
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
   int       getIntraQPOffset                () const    { return  m_intraQPOffset; }
   int       getLambdaFromQPEnable           () const    { return  m_lambdaFromQPEnable; }
@@ -960,6 +1217,7 @@ public:
   //==== Tool list ========
   void      setBitDepth( const ChannelType chType, int internalBitDepthForChannel ) { m_bitDepth[chType] = internalBitDepthForChannel; }
   void      setInputBitDepth( const ChannelType chType, int internalBitDepthForChannel ) { m_inputBitDepth[chType] = internalBitDepthForChannel; }
+  int*      getInputBitDepth()                              { return m_inputBitDepth; }
   void      setUseASR                       ( bool  b )     { m_bUseASR     = b; }
   void      setUseHADME                     ( bool  b )     { m_bUseHADME   = b; }
   void      setUseRDOQ                      ( bool  b )     { m_useRDOQ    = b; }
@@ -973,21 +1231,15 @@ public:
   void      setUseFastDecisionForMerge      ( bool  b )     { m_useFastDecisionForMerge = b; }
   void      setUseCbfFastMode               ( bool  b )     { m_bUseCbfFastMode = b; }
   void      setUseEarlySkipDetection        ( bool  b )     { m_useEarlySkipDetection = b; }
-  void      setUseConstrainedIntraPred      ( bool  b )     { m_bUseConstrainedIntraPred = b; }
   void      setFastUDIUseMPMEnabled         ( bool  b )     { m_bFastUDIUseMPMEnabled = b; }
   void      setFastMEForGenBLowDelayEnabled ( bool  b )     { m_bFastMEForGenBLowDelayEnabled = b; }
   void      setUseBLambdaForNonKeyLowDelayPictures ( bool b ) { m_bUseBLambdaForNonKeyLowDelayPictures = b; }
 
-  void      setPCMInputBitDepthFlag         ( bool  b )     { m_bPCMInputBitDepthFlag = b; }
-  void      setPCMFilterDisableFlag         ( bool  b )     {  m_bPCMFilterDisableFlag = b; }
-  void      setUsePCM                       ( bool  b )     {  m_usePCM = b;               }
-  void      setPCMBitDepth( const ChannelType chType, int pcmBitDepthForChannel ) { m_PCMBitDepth[chType] = pcmBitDepthForChannel; }
-  void      setPCMLog2MaxSize               ( uint32_t u )      { m_pcmLog2MaxSize = u;      }
-  void      setPCMLog2MinSize               ( uint32_t u )     { m_uiPCMLog2MinSize = u;      }
   void      setdQPs                         ( int*  p )     { m_aidQP       = p; }
   void      setDeltaQpRD                    ( uint32_t  u )     {m_uiDeltaQpRD  = u; }
   void      setFastDeltaQp                  ( bool  b )     {m_bFastDeltaQP = b; }
   int       getBitDepth                     (const ChannelType chType) const { return m_bitDepth[chType]; }
+  int*      getBitDepth                     ()      { return m_bitDepth; }
   bool      getUseASR                       ()      { return m_bUseASR;     }
   bool      getUseHADME                     ()      { return m_bUseHADME;   }
   bool      getUseRDOQ                      ()      { return m_useRDOQ;    }
@@ -1001,15 +1253,11 @@ public:
   bool      getUseFastDecisionForMerge      () const{ return m_useFastDecisionForMerge; }
   bool      getUseCbfFastMode               () const{ return m_bUseCbfFastMode; }
   bool      getUseEarlySkipDetection        () const{ return m_useEarlySkipDetection; }
-  bool      getUseConstrainedIntraPred      ()      { return m_bUseConstrainedIntraPred; }
   bool      getFastUDIUseMPMEnabled         ()      { return m_bFastUDIUseMPMEnabled; }
   bool      getFastMEForGenBLowDelayEnabled ()      { return m_bFastMEForGenBLowDelayEnabled; }
   bool      getUseBLambdaForNonKeyLowDelayPictures () { return m_bUseBLambdaForNonKeyLowDelayPictures; }
-  bool      getPCMInputBitDepthFlag         ()      { return m_bPCMInputBitDepthFlag;   }
-  bool      getPCMFilterDisableFlag         ()      { return m_bPCMFilterDisableFlag;   }
-  bool      getUsePCM                       ()      { return m_usePCM;                 }
-  uint32_t      getPCMLog2MaxSize               ()      { return m_pcmLog2MaxSize;  }
-  uint32_t      getPCMLog2MinSize               ()      { return  m_uiPCMLog2MinSize;  }
+  void  setGopBasedTemporalFilterEnabled(bool flag) { m_gopBasedTemporalFilterEnabled = flag; }
+  bool  getGopBasedTemporalFilterEnabled()          { return m_gopBasedTemporalFilterEnabled; }
 
   bool      getCrossComponentPredictionEnabledFlag     ()                const { return m_crossComponentPredictionEnabledFlag;   }
   void      setCrossComponentPredictionEnabledFlag     (const bool value)      { m_crossComponentPredictionEnabledFlag = value;  }
@@ -1023,6 +1271,12 @@ public:
   void setTransformSkipRotationEnabledFlag             (const bool value)  { m_transformSkipRotationEnabledFlag = value; }
   bool getTransformSkipContextEnabledFlag              ()            const { return m_transformSkipContextEnabledFlag;  }
   void setTransformSkipContextEnabledFlag              (const bool value)  { m_transformSkipContextEnabledFlag = value; }
+  bool getUseChromaTS                                  ()       { return m_useChromaTS; }
+  void setUseChromaTS                                  (bool b) { m_useChromaTS = b; }
+  int  getUseBDPCM                                     ()         { return m_useBDPCM; }
+  void setUseBDPCM                                     ( int b )  { m_useBDPCM = b;    }
+  bool getUseJointCbCr                                 ()         { return m_JointCbCrMode; }
+  void setUseJointCbCr                                 (bool b)   { m_JointCbCrMode = b; }
   bool getPersistentRiceAdaptationEnabledFlag          ()                 const { return m_persistentRiceAdaptationEnabledFlag;  }
   void setPersistentRiceAdaptationEnabledFlag          (const bool value)       { m_persistentRiceAdaptationEnabledFlag = value; }
   bool getCabacBypassAlignmentEnabledFlag              ()       const      { return m_cabacBypassAlignmentEnabledFlag;  }
@@ -1035,26 +1289,37 @@ public:
   void setLog2MaxTransformSkipBlockSize                ( uint32_t u )    { m_log2MaxTransformSkipBlockSize  = u;       }
   bool getIntraSmoothingDisabledFlag               ()      const { return m_intraSmoothingDisabledFlag; }
   void setIntraSmoothingDisabledFlag               (bool bValue) { m_intraSmoothingDisabledFlag=bValue; }
-  bool getUseFastISP                                   ()         { return m_useFastISP;    }
+  bool getUseFastISP                                   () const   { return m_useFastISP;    }
   void setUseFastISP                                   ( bool b ) { m_useFastISP  = b;   }
 
   const int* getdQPs                        () const { return m_aidQP;       }
   uint32_t      getDeltaQpRD                    () const { return m_uiDeltaQpRD; }
   bool      getFastDeltaQp                  () const { return m_bFastDeltaQP; }
 
-  //====== Slice ========
-  void  setSliceMode                   ( SliceConstraint  i )        { m_sliceMode = i;              }
-  void  setSliceArgument               ( int  i )                    { m_sliceArgument = i;          }
-  SliceConstraint getSliceMode         () const                      { return m_sliceMode;           }
-  int   getSliceArgument               ()                            { return m_sliceArgument;       }
-  //====== Dependent Slice ========
-  void  setSliceSegmentMode            ( SliceConstraint  i )        { m_sliceSegmentMode = i;       }
-  void  setSliceSegmentArgument        ( int  i )                    { m_sliceSegmentArgument = i;   }
-  SliceConstraint getSliceSegmentMode  () const                      { return m_sliceSegmentMode;    }
-  int   getSliceSegmentArgument        ()                            { return m_sliceSegmentArgument;}
-  void      setLFCrossSliceBoundaryFlag     ( bool   bValue  )       { m_bLFCrossSliceBoundaryFlag = bValue; }
-  bool      getLFCrossSliceBoundaryFlag     ()                       { return m_bLFCrossSliceBoundaryFlag;   }
-
+  //====== Tiles and Slices ========
+  void      setNoPicPartitionFlag( bool b )                                { m_noPicPartitionFlag = b;              }
+  bool      getNoPicPartitionFlag()                                        { return m_noPicPartitionFlag;           }
+  void      setTileColWidths( std::vector<uint32_t> tileColWidths )        { m_tileColumnWidth = tileColWidths;     }
+  const     std::vector<uint32_t>*   getTileColWidths() const              { return &m_tileColumnWidth;             }
+  void      setTileRowHeights( std::vector<uint32_t> tileRowHeights )      { m_tileRowHeight = tileRowHeights;      }
+  const     std::vector<uint32_t>*   getTileRowHeights() const             { return &m_tileRowHeight;               }
+  void      setRectSliceFlag( bool b )                                     { m_rectSliceFlag = b;                   }
+  bool      getRectSliceFlag()                                             { return m_rectSliceFlag;                }
+  void      setNumSlicesInPic( uint32_t u )                                { m_numSlicesInPic = u;                  }
+  uint32_t  getNumSlicesInPic()                                            { return m_numSlicesInPic;               }
+  void      setTileIdxDeltaPresentFlag( bool b )                           { m_tileIdxDeltaPresentFlag = b;         }
+  bool      getTileIdxDeltaPresentFlag()                                   { return m_tileIdxDeltaPresentFlag;      }
+  void      setRectSlices( std::vector<RectSlice> rectSlices )             { m_rectSlices = rectSlices;             }
+  const     std::vector<RectSlice>*   getRectSlices() const                { return &m_rectSlices;                  }
+  void      setRasterSliceSizes( std::vector<uint32_t> rasterSliceSizes )  { m_rasterSliceSize = rasterSliceSizes;  }
+  const     std::vector<uint32_t>*   getRasterSliceSizes() const           { return &m_rasterSliceSize;             }
+  void      setLFCrossTileBoundaryFlag( bool b )                           { m_bLFCrossTileBoundaryFlag = b;        }
+  bool      getLFCrossTileBoundaryFlag()                                   { return m_bLFCrossTileBoundaryFlag;     }
+  void      setLFCrossSliceBoundaryFlag( bool b )                          { m_bLFCrossSliceBoundaryFlag = b;       }
+  bool      getLFCrossSliceBoundaryFlag()                                  { return m_bLFCrossSliceBoundaryFlag;    }
+  //====== Sub-picture and Slices ========
+  void      setSingleSlicePerSubPicFlagFlag( bool b )                { m_singleSlicePerSubPicFlag = b;    }
+  bool      getSingleSlicePerSubPicFlagFlag( )                       { return m_singleSlicePerSubPicFlag;    }
   void      setUseSAO                  (bool bVal)                   { m_bUseSAO = bVal; }
   bool      getUseSAO                  ()                            { return m_bUseSAO; }
   void  setTestSAODisableAtPictureLevel (bool bVal)                  { m_bTestSAODisableAtPictureLevel = bVal; }
@@ -1069,91 +1334,20 @@ public:
   void  setSaoCtuBoundary              (bool val)                    { m_saoCtuBoundary = val; }
   bool  getSaoCtuBoundary              ()                            { return m_saoCtuBoundary; }
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   void  setSaoGreedyMergeEnc           (bool val)                    { m_saoGreedyMergeEnc = val; }
   bool  getSaoGreedyMergeEnc           ()                            { return m_saoGreedyMergeEnc; }
-#endif
-#if HEVC_TILES_WPP
-  void  setLFCrossTileBoundaryFlag               ( bool   val  )     { m_loopFilterAcrossTilesEnabledFlag = val; }
-  bool  getLFCrossTileBoundaryFlag               ()                  { return m_loopFilterAcrossTilesEnabledFlag;   }
-  void  setTileUniformSpacingFlag      ( bool b )                    { m_tileUniformSpacingFlag = b; }
-  bool  getTileUniformSpacingFlag      ()                            { return m_tileUniformSpacingFlag; }
-  void  setNumColumnsMinus1            ( int i )                     { m_iNumColumnsMinus1 = i; }
-  int   getNumColumnsMinus1            ()                            { return m_iNumColumnsMinus1; }
-  void  setColumnWidth ( const std::vector<int>& columnWidth )       { m_tileColumnWidth = columnWidth; }
-  uint32_t  getColumnWidth                 ( uint32_t columnIdx )            { return m_tileColumnWidth[columnIdx]; }
-  void  setNumRowsMinus1               ( int i )                     { m_iNumRowsMinus1 = i; }
-  int   getNumRowsMinus1               ()                            { return m_iNumRowsMinus1; }
-  void  setRowHeight ( const std::vector<int>& rowHeight)            { m_tileRowHeight = rowHeight; }
-  uint32_t  getRowHeight                   ( uint32_t rowIdx )               { return m_tileRowHeight[rowIdx]; }
-#endif
-  void  xCheckGSParameters();
-#if HEVC_TILES_WPP
   void  setEntropyCodingSyncEnabledFlag(bool b)                      { m_entropyCodingSyncEnabledFlag = b; }
   bool  getEntropyCodingSyncEnabledFlag() const                      { return m_entropyCodingSyncEnabledFlag; }
-#endif
   void  setDecodedPictureHashSEIType(HashType m)                     { m_decodedPictureHashSEIType = m; }
   HashType getDecodedPictureHashSEIType() const                      { return m_decodedPictureHashSEIType; }
   void  setBufferingPeriodSEIEnabled(bool b)                         { m_bufferingPeriodSEIEnabled = b; }
   bool  getBufferingPeriodSEIEnabled() const                         { return m_bufferingPeriodSEIEnabled; }
   void  setPictureTimingSEIEnabled(bool b)                           { m_pictureTimingSEIEnabled = b; }
   bool  getPictureTimingSEIEnabled() const                           { return m_pictureTimingSEIEnabled; }
-  void  setRecoveryPointSEIEnabled(bool b)                           { m_recoveryPointSEIEnabled = b; }
-  bool  getRecoveryPointSEIEnabled() const                           { return m_recoveryPointSEIEnabled; }
-  void  setToneMappingInfoSEIEnabled(bool b)                         { m_toneMappingInfoSEIEnabled = b;  }
-  bool  getToneMappingInfoSEIEnabled()                               { return m_toneMappingInfoSEIEnabled;  }
-  void  setTMISEIToneMapId(int b)                                    { m_toneMapId = b;  }
-  int   getTMISEIToneMapId()                                         { return m_toneMapId;  }
-  void  setTMISEIToneMapCancelFlag(bool b)                           { m_toneMapCancelFlag=b;  }
-  bool  getTMISEIToneMapCancelFlag()                                 { return m_toneMapCancelFlag;  }
-  void  setTMISEIToneMapPersistenceFlag(bool b)                      { m_toneMapPersistenceFlag = b;  }
-  bool   getTMISEIToneMapPersistenceFlag()                           { return m_toneMapPersistenceFlag;  }
-  void  setTMISEICodedDataBitDepth(int b)                            { m_codedDataBitDepth = b;  }
-  int   getTMISEICodedDataBitDepth()                                 { return m_codedDataBitDepth;  }
-  void  setTMISEITargetBitDepth(int b)                               { m_targetBitDepth = b;  }
-  int   getTMISEITargetBitDepth()                                    { return m_targetBitDepth;  }
-  void  setTMISEIModelID(int b)                                      { m_modelId = b;  }
-  int   getTMISEIModelID()                                           { return m_modelId;  }
-  void  setTMISEIMinValue(int b)                                     { m_minValue = b;  }
-  int   getTMISEIMinValue()                                          { return m_minValue;  }
-  void  setTMISEIMaxValue(int b)                                     { m_maxValue = b;  }
-  int   getTMISEIMaxValue()                                          { return m_maxValue;  }
-  void  setTMISEISigmoidMidpoint(int b)                              { m_sigmoidMidpoint = b;  }
-  int   getTMISEISigmoidMidpoint()                                   { return m_sigmoidMidpoint;  }
-  void  setTMISEISigmoidWidth(int b)                                 { m_sigmoidWidth = b;  }
-  int   getTMISEISigmoidWidth()                                      { return m_sigmoidWidth;  }
-  void  setTMISEIStartOfCodedInterva( int*  p )                      { m_startOfCodedInterval = p;  }
-  int*  getTMISEIStartOfCodedInterva()                               { return m_startOfCodedInterval;  }
-  void  setTMISEINumPivots(int b)                                    { m_numPivots = b;  }
-  int   getTMISEINumPivots()                                         { return m_numPivots;  }
-  void  setTMISEICodedPivotValue( int*  p )                          { m_codedPivotValue = p;  }
-  int*  getTMISEICodedPivotValue()                                   { return m_codedPivotValue;  }
-  void  setTMISEITargetPivotValue( int*  p )                         { m_targetPivotValue = p;  }
-  int*  getTMISEITargetPivotValue()                                  { return m_targetPivotValue;  }
-  void  setTMISEICameraIsoSpeedIdc(int b)                            { m_cameraIsoSpeedIdc = b;  }
-  int   getTMISEICameraIsoSpeedIdc()                                 { return m_cameraIsoSpeedIdc;  }
-  void  setTMISEICameraIsoSpeedValue(int b)                          { m_cameraIsoSpeedValue = b;  }
-  int   getTMISEICameraIsoSpeedValue()                               { return m_cameraIsoSpeedValue;  }
-  void  setTMISEIExposureIndexIdc(int b)                             { m_exposureIndexIdc = b;  }
-  int   getTMISEIExposurIndexIdc()                                   { return m_exposureIndexIdc;  }
-  void  setTMISEIExposureIndexValue(int b)                           { m_exposureIndexValue = b;  }
-  int   getTMISEIExposurIndexValue()                                 { return m_exposureIndexValue;  }
-  void  setTMISEIExposureCompensationValueSignFlag(bool b)           { m_exposureCompensationValueSignFlag = b;  }
-  bool  getTMISEIExposureCompensationValueSignFlag()                 { return m_exposureCompensationValueSignFlag;  }
-  void  setTMISEIExposureCompensationValueNumerator(int b)           { m_exposureCompensationValueNumerator = b;  }
-  int   getTMISEIExposureCompensationValueNumerator()                { return m_exposureCompensationValueNumerator;  }
-  void  setTMISEIExposureCompensationValueDenomIdc(int b)            { m_exposureCompensationValueDenomIdc =b;  }
-  int   getTMISEIExposureCompensationValueDenomIdc()                 { return m_exposureCompensationValueDenomIdc;  }
-  void  setTMISEIRefScreenLuminanceWhite(int b)                      { m_refScreenLuminanceWhite = b;  }
-  int   getTMISEIRefScreenLuminanceWhite()                           { return m_refScreenLuminanceWhite;  }
-  void  setTMISEIExtendedRangeWhiteLevel(int b)                      { m_extendedRangeWhiteLevel = b;  }
-  int   getTMISEIExtendedRangeWhiteLevel()                           { return m_extendedRangeWhiteLevel;  }
-  void  setTMISEINominalBlackLevelLumaCodeValue(int b)               { m_nominalBlackLevelLumaCodeValue = b;  }
-  int   getTMISEINominalBlackLevelLumaCodeValue()                    { return m_nominalBlackLevelLumaCodeValue;  }
-  void  setTMISEINominalWhiteLevelLumaCodeValue(int b)               { m_nominalWhiteLevelLumaCodeValue = b;  }
-  int   getTMISEINominalWhiteLevelLumaCodeValue()                    { return m_nominalWhiteLevelLumaCodeValue;  }
-  void  setTMISEIExtendedWhiteLevelLumaCodeValue(int b)              { m_extendedWhiteLevelLumaCodeValue =b;  }
-  int   getTMISEIExtendedWhiteLevelLumaCodeValue()                   { return m_extendedWhiteLevelLumaCodeValue;  }
+  void  setFrameFieldInfoSEIEnabled(bool b)                           { m_frameFieldInfoSEIEnabled = b; }
+  bool  getFrameFieldInfoSEIEnabled() const                           { return m_frameFieldInfoSEIEnabled; }
+  void  setDependentRAPIndicationSEIEnabled(bool b)                  { m_dependentRAPIndicationSEIEnabled = b; }
+  int   getDependentRAPIndicationSEIEnabled() const                  { return m_dependentRAPIndicationSEIEnabled; }
   void  setFramePackingArrangementSEIEnabled(bool b)                 { m_framePackingSEIEnabled = b; }
   bool  getFramePackingArrangementSEIEnabled() const                 { return m_framePackingSEIEnabled; }
   void  setFramePackingArrangementSEIType(int b)                     { m_framePackingSEIType = b; }
@@ -1164,63 +1358,159 @@ public:
   int   getFramePackingArrangementSEIQuincunx()                      { return m_framePackingSEIQuincunx; }
   void  setFramePackingArrangementSEIInterpretation(int b)           { m_framePackingSEIInterpretation = b; }
   int   getFramePackingArrangementSEIInterpretation()                { return m_framePackingSEIInterpretation; }
-  void  setSegmentedRectFramePackingArrangementSEIEnabled(bool b)    { m_segmentedRectFramePackingSEIEnabled = b; }
-  bool  getSegmentedRectFramePackingArrangementSEIEnabled() const    { return m_segmentedRectFramePackingSEIEnabled; }
-  void  setSegmentedRectFramePackingArrangementSEICancel(int b)      { m_segmentedRectFramePackingSEICancel = b; }
-  int   getSegmentedRectFramePackingArrangementSEICancel()           { return m_segmentedRectFramePackingSEICancel; }
-  void  setSegmentedRectFramePackingArrangementSEIType(int b)        { m_segmentedRectFramePackingSEIType = b; }
-  int   getSegmentedRectFramePackingArrangementSEIType()             { return m_segmentedRectFramePackingSEIType; }
-  void  setSegmentedRectFramePackingArrangementSEIPersistence(int b) { m_segmentedRectFramePackingSEIPersistence = b; }
-  int   getSegmentedRectFramePackingArrangementSEIPersistence()      { return m_segmentedRectFramePackingSEIPersistence; }
-  void  setDisplayOrientationSEIAngle(int b)                         { m_displayOrientationSEIAngle = b; }
-  int   getDisplayOrientationSEIAngle()                              { return m_displayOrientationSEIAngle; }
-  void  setTemporalLevel0IndexSEIEnabled(bool b)                     { m_temporalLevel0IndexSEIEnabled = b; }
-  bool  getTemporalLevel0IndexSEIEnabled() const                     { return m_temporalLevel0IndexSEIEnabled; }
-  void  setGradualDecodingRefreshInfoEnabled(bool b)                 { m_gradualDecodingRefreshInfoEnabled = b;    }
-  bool  getGradualDecodingRefreshInfoEnabled() const                 { return m_gradualDecodingRefreshInfoEnabled; }
-  void  setNoDisplaySEITLayer(int b)                                 { m_noDisplaySEITLayer = b;    }
-  int   getNoDisplaySEITLayer()                                      { return m_noDisplaySEITLayer; }
+  void  setBpDeltasGOPStructure(bool b)                              { m_bpDeltasGOPStructure = b;    }
+  bool  getBpDeltasGOPStructure() const                              { return m_bpDeltasGOPStructure; }
   void  setDecodingUnitInfoSEIEnabled(bool b)                        { m_decodingUnitInfoSEIEnabled = b;    }
   bool  getDecodingUnitInfoSEIEnabled() const                        { return m_decodingUnitInfoSEIEnabled; }
+#if HEVC_SEI
   void  setSOPDescriptionSEIEnabled(bool b)                          { m_SOPDescriptionSEIEnabled = b; }
   bool  getSOPDescriptionSEIEnabled() const                          { return m_SOPDescriptionSEIEnabled; }
   void  setScalableNestingSEIEnabled(bool b)                         { m_scalableNestingSEIEnabled = b; }
   bool  getScalableNestingSEIEnabled() const                         { return m_scalableNestingSEIEnabled; }
   void  setTMCTSSEIEnabled(bool b)                                   { m_tmctsSEIEnabled = b; }
   bool  getTMCTSSEIEnabled()                                         { return m_tmctsSEIEnabled; }
+#endif
+
+  void  setErpSEIEnabled(bool b)                                     { m_erpSEIEnabled = b; }
+  bool  getErpSEIEnabled()                                           { return m_erpSEIEnabled; }
+  void  setErpSEICancelFlag(bool b)                                  { m_erpSEICancelFlag = b; }
+  bool  getErpSEICancelFlag()                                        { return m_erpSEICancelFlag; }
+  void  setErpSEIPersistenceFlag(bool b)                             { m_erpSEIPersistenceFlag = b; }
+  bool  getErpSEIPersistenceFlag()                                   { return m_erpSEIPersistenceFlag; }
+  void  setErpSEIGuardBandFlag(bool b)                               { m_erpSEIGuardBandFlag = b; }
+  bool  getErpSEIGuardBandFlag()                                     { return m_erpSEIGuardBandFlag; }
+  void  setErpSEIGuardBandType(uint32_t b)                           { m_erpSEIGuardBandType = b; }
+  uint32_t  getErpSEIGuardBandType()                                 { return m_erpSEIGuardBandType; }
+  void  setErpSEILeftGuardBandWidth(uint32_t b)                      { m_erpSEILeftGuardBandWidth = b; }
+  uint32_t  getErpSEILeftGuardBandWidth()                            { return m_erpSEILeftGuardBandWidth; }
+  void  setErpSEIRightGuardBandWidth(uint32_t b)                     { m_erpSEIRightGuardBandWidth = b; }
+  uint32_t  getErpSEIRightGuardBandWidth()                           { return m_erpSEIRightGuardBandWidth; }
+  void  setSphereRotationSEIEnabled(bool b)                          { m_sphereRotationSEIEnabled = b; }
+  bool  getSphereRotationSEIEnabled()                                { return m_sphereRotationSEIEnabled; }
+  void  setSphereRotationSEICancelFlag(bool b)                       { m_sphereRotationSEICancelFlag = b; }
+  bool  getSphereRotationSEICancelFlag()                             { return m_sphereRotationSEICancelFlag; }
+  void  setSphereRotationSEIPersistenceFlag(bool b)                  { m_sphereRotationSEIPersistenceFlag = b; }
+  bool  getSphereRotationSEIPersistenceFlag()                        { return m_sphereRotationSEIPersistenceFlag; }
+  void  setSphereRotationSEIYaw(int b)                               { m_sphereRotationSEIYaw = b; }
+  int   getSphereRotationSEIYaw()                                    { return m_sphereRotationSEIYaw; }
+  void  setSphereRotationSEIPitch(int b)                             { m_sphereRotationSEIPitch = b; }
+  int   getSphereRotationSEIPitch()                                  { return m_sphereRotationSEIPitch; }
+  void  setSphereRotationSEIRoll(int b)                              { m_sphereRotationSEIRoll = b; }
+  int   getSphereRotationSEIRoll()                                   { return m_sphereRotationSEIRoll; }
+  void  setOmniViewportSEIEnabled(bool b)                            { m_omniViewportSEIEnabled = b; }
+  bool  getOmniViewportSEIEnabled()                                  { return m_omniViewportSEIEnabled; }
+  void  setOmniViewportSEIId(uint32_t b)                             { m_omniViewportSEIId = b; }
+  uint32_t  getOmniViewportSEIId()                                   { return m_omniViewportSEIId; }
+  void  setOmniViewportSEICancelFlag(bool b)                         { m_omniViewportSEICancelFlag = b; }
+  bool  getOmniViewportSEICancelFlag()                               { return m_omniViewportSEICancelFlag; }
+  void  setOmniViewportSEIPersistenceFlag(bool b)                    { m_omniViewportSEIPersistenceFlag = b; }
+  bool  getOmniViewportSEIPersistenceFlag()                          { return m_omniViewportSEIPersistenceFlag; }
+  void  setOmniViewportSEICntMinus1(uint32_t b)                      { m_omniViewportSEICntMinus1 = b; }
+  uint32_t  getOmniViewportSEICntMinus1()                            { return m_omniViewportSEICntMinus1; }
+  void  setOmniViewportSEIAzimuthCentre(const std::vector<int>& vi)  { m_omniViewportSEIAzimuthCentre = vi; }
+  int   getOmniViewportSEIAzimuthCentre(int idx)                     { return m_omniViewportSEIAzimuthCentre[idx]; }
+  void  setOmniViewportSEIElevationCentre(const std::vector<int>& vi){ m_omniViewportSEIElevationCentre = vi; }
+  int   getOmniViewportSEIElevationCentre(int idx)                   { return m_omniViewportSEIElevationCentre[idx]; }
+  void  setOmniViewportSEITiltCentre(const std::vector<int>& vi)     { m_omniViewportSEITiltCentre = vi; }
+  int   getOmniViewportSEITiltCentre(int idx)                        { return m_omniViewportSEITiltCentre[idx]; }
+  void  setOmniViewportSEIHorRange(const std::vector<uint32_t>& vi)  { m_omniViewportSEIHorRange = vi; }
+  uint32_t  getOmniViewportSEIHorRange(int idx)                      { return m_omniViewportSEIHorRange[idx]; }
+  void  setOmniViewportSEIVerRange(const std::vector<uint32_t>& vi)  { m_omniViewportSEIVerRange = vi; } 
+  uint32_t  getOmniViewportSEIVerRange(int idx)                      { return m_omniViewportSEIVerRange[idx]; }
+  void     setRwpSEIEnabled(bool b)                                                                     { m_rwpSEIEnabled = b; }
+  bool     getRwpSEIEnabled()                                                                           { return m_rwpSEIEnabled; }
+  void     setRwpSEIRwpCancelFlag(bool b)                                                               { m_rwpSEIRwpCancelFlag = b; }
+  bool     getRwpSEIRwpCancelFlag()                                                                     { return m_rwpSEIRwpCancelFlag; }
+  void     setRwpSEIRwpPersistenceFlag (bool b)                                                         { m_rwpSEIRwpPersistenceFlag = b; }
+  bool     getRwpSEIRwpPersistenceFlag ()                                                               { return m_rwpSEIRwpPersistenceFlag; }
+  void     setRwpSEIConstituentPictureMatchingFlag (bool b)                                             { m_rwpSEIConstituentPictureMatchingFlag = b; }
+  bool     getRwpSEIConstituentPictureMatchingFlag ()                                                   { return m_rwpSEIConstituentPictureMatchingFlag; }
+  void     setRwpSEINumPackedRegions (int value)                                                        { m_rwpSEINumPackedRegions = value; }
+  int      getRwpSEINumPackedRegions ()                                                                 { return m_rwpSEINumPackedRegions; }
+  void     setRwpSEIProjPictureWidth (int value)                                                        { m_rwpSEIProjPictureWidth = value; }
+  int      getRwpSEIProjPictureWidth ()                                                                 { return m_rwpSEIProjPictureWidth; }
+  void     setRwpSEIProjPictureHeight (int value)                                                       { m_rwpSEIProjPictureHeight = value; }
+  int      getRwpSEIProjPictureHeight ()                                                                { return m_rwpSEIProjPictureHeight; }
+  void     setRwpSEIPackedPictureWidth (int value)                                                      { m_rwpSEIPackedPictureWidth = value; }
+  int      getRwpSEIPackedPictureWidth ()                                                               { return m_rwpSEIPackedPictureWidth; }
+  void     setRwpSEIPackedPictureHeight (int value)                                                     { m_rwpSEIPackedPictureHeight = value; }
+  int      getRwpSEIPackedPictureHeight ()                                                              { return m_rwpSEIPackedPictureHeight; }
+  void     setRwpSEIRwpTransformType(const std::vector<uint8_t>& rwpTransformType)                          { m_rwpSEIRwpTransformType =rwpTransformType; }
+  uint8_t  getRwpSEIRwpTransformType(uint32_t idx) const                                                    { return m_rwpSEIRwpTransformType[idx]; }
+  void     setRwpSEIRwpGuardBandFlag(const std::vector<bool>& rwpGuardBandFlag)                             { m_rwpSEIRwpGuardBandFlag = rwpGuardBandFlag; }
+  bool     getRwpSEIRwpGuardBandFlag(uint32_t idx) const                                                    { return m_rwpSEIRwpGuardBandFlag[idx]; }
+  void     setRwpSEIProjRegionWidth(const std::vector<uint32_t>& projRegionWidth)                           { m_rwpSEIProjRegionWidth = projRegionWidth; }
+  uint32_t getRwpSEIProjRegionWidth(uint32_t idx) const                                                     { return m_rwpSEIProjRegionWidth[idx]; }
+  void     setRwpSEIProjRegionHeight(const std::vector<uint32_t>& projRegionHeight)                         { m_rwpSEIProjRegionHeight = projRegionHeight; }
+  uint32_t getRwpSEIProjRegionHeight(uint32_t idx) const                                                    { return m_rwpSEIProjRegionHeight[idx]; }
+  void     setRwpSEIRwpSEIProjRegionTop(const std::vector<uint32_t>& projRegionTop)                         { m_rwpSEIRwpSEIProjRegionTop = projRegionTop; }
+  uint32_t getRwpSEIRwpSEIProjRegionTop(uint32_t idx) const                                                 { return m_rwpSEIRwpSEIProjRegionTop[idx]; }
+  void     setRwpSEIProjRegionLeft(const std::vector<uint32_t>& projRegionLeft)                             { m_rwpSEIProjRegionLeft = projRegionLeft; }
+  uint32_t getRwpSEIProjRegionLeft(uint32_t idx) const                                                      { return m_rwpSEIProjRegionLeft[idx]; }
+  void     setRwpSEIPackedRegionWidth(const std::vector<uint16_t>& packedRegionWidth)                       { m_rwpSEIPackedRegionWidth  = packedRegionWidth; }
+  uint16_t getRwpSEIPackedRegionWidth(uint32_t idx) const                                                   { return m_rwpSEIPackedRegionWidth[idx]; }
+  void     setRwpSEIPackedRegionHeight(const std::vector<uint16_t>& packedRegionHeight)                     { m_rwpSEIPackedRegionHeight = packedRegionHeight; }
+  uint16_t getRwpSEIPackedRegionHeight(uint32_t idx) const                                                  { return m_rwpSEIPackedRegionHeight[idx]; }
+  void     setRwpSEIPackedRegionTop(const std::vector<uint16_t>& packedRegionTop)                           { m_rwpSEIPackedRegionTop = packedRegionTop; }
+  uint16_t getRwpSEIPackedRegionTop(uint32_t idx) const                                                     { return m_rwpSEIPackedRegionTop[idx]; }
+  void     setRwpSEIPackedRegionLeft(const std::vector<uint16_t>& packedRegionLeft)                         { m_rwpSEIPackedRegionLeft = packedRegionLeft; }
+  uint16_t getRwpSEIPackedRegionLeft(uint32_t idx) const                                                    { return m_rwpSEIPackedRegionLeft[idx]; }
+  void     setRwpSEIRwpLeftGuardBandWidth(const std::vector<uint8_t>& rwpLeftGuardBandWidth)                { m_rwpSEIRwpLeftGuardBandWidth = rwpLeftGuardBandWidth; }
+  uint8_t  getRwpSEIRwpLeftGuardBandWidth(uint32_t idx) const                                               { return m_rwpSEIRwpLeftGuardBandWidth[idx]; }
+  void     setRwpSEIRwpRightGuardBandWidth(const std::vector<uint8_t>& rwpRightGuardBandWidth)              { m_rwpSEIRwpRightGuardBandWidth = rwpRightGuardBandWidth; }
+  uint8_t  getRwpSEIRwpRightGuardBandWidth(uint32_t idx) const                                              { return m_rwpSEIRwpRightGuardBandWidth[idx]; }
+  void     setRwpSEIRwpTopGuardBandHeight(const std::vector<uint8_t>& rwpTopGuardBandHeight)                { m_rwpSEIRwpTopGuardBandHeight = rwpTopGuardBandHeight; }
+  uint8_t  getRwpSEIRwpTopGuardBandHeight(uint32_t idx) const                                               { return m_rwpSEIRwpTopGuardBandHeight[idx]; }
+  void     setRwpSEIRwpBottomGuardBandHeight(const std::vector<uint8_t>& rwpBottomGuardBandHeight)          { m_rwpSEIRwpBottomGuardBandHeight = rwpBottomGuardBandHeight; }
+  uint8_t  getRwpSEIRwpBottomGuardBandHeight(uint32_t idx) const                                            { return m_rwpSEIRwpBottomGuardBandHeight[idx]; }
+  void     setRwpSEIRwpGuardBandNotUsedForPredFlag(const std::vector<bool>& rwpGuardBandNotUsedForPredFlag) { m_rwpSEIRwpGuardBandNotUsedForPredFlag = rwpGuardBandNotUsedForPredFlag; }
+  bool     getRwpSEIRwpGuardBandNotUsedForPredFlag(uint32_t idx) const                                      { return m_rwpSEIRwpGuardBandNotUsedForPredFlag[idx]; }
+  void     setRwpSEIRwpGuardBandType(const std::vector<uint8_t>& rwpGuardBandType)                          { m_rwpSEIRwpGuardBandType = rwpGuardBandType; }
+  uint8_t  getRwpSEIRwpGuardBandType(uint32_t idx) const                                                    { return m_rwpSEIRwpGuardBandType[idx]; }
+  void    setGcmpSEIEnabled(bool b)                                                                 { m_gcmpSEIEnabled = b; }
+  bool    getGcmpSEIEnabled()                                                                       { return m_gcmpSEIEnabled; }
+  void    setGcmpSEICancelFlag(bool b)                                                              { m_gcmpSEICancelFlag = b; }
+  bool    getGcmpSEICancelFlag()                                                                    { return m_gcmpSEICancelFlag; }
+  void    setGcmpSEIPersistenceFlag(bool b)                                                         { m_gcmpSEIPersistenceFlag = b; }
+  bool    getGcmpSEIPersistenceFlag()                                                               { return m_gcmpSEIPersistenceFlag; }
+  void    setGcmpSEIPackingType(uint8_t u)                                                          { m_gcmpSEIPackingType = u; }
+  uint8_t getGcmpSEIPackingType()                                                                   { return m_gcmpSEIPackingType; }
+  void    setGcmpSEIMappingFunctionType(uint8_t u)                                                  { m_gcmpSEIMappingFunctionType = u; }
+  uint8_t getGcmpSEIMappingFunctionType()                                                           { return m_gcmpSEIMappingFunctionType; }
+  void    setGcmpSEIFaceIndex(const std::vector<uint8_t>& gcmpFaceIndex)                            { m_gcmpSEIFaceIndex = gcmpFaceIndex; }
+  uint8_t getGcmpSEIFaceIndex(int idx) const                                                        { return m_gcmpSEIFaceIndex[idx]; }
+  void    setGcmpSEIFaceRotation(const std::vector<uint8_t>& gcmpFaceRotation)                      { m_gcmpSEIFaceRotation = gcmpFaceRotation; }
+  uint8_t getGcmpSEIFaceRotation(int idx) const                                                     { return m_gcmpSEIFaceRotation[idx]; }
+  void    setGcmpSEIFunctionCoeffU(const std::vector<double>& gcmpFunctionCoeffU)                   { m_gcmpSEIFunctionCoeffU = gcmpFunctionCoeffU; }
+  double  getGcmpSEIFunctionCoeffU(int idx) const                                                   { return m_gcmpSEIFunctionCoeffU[idx]; }
+  void    setGcmpSEIFunctionUAffectedByVFlag(const std::vector<bool>& gcmpFunctionUAffectedByVFlag) { m_gcmpSEIFunctionUAffectedByVFlag = gcmpFunctionUAffectedByVFlag; }
+  bool    getGcmpSEIFunctionUAffectedByVFlag(int idx) const                                         { return m_gcmpSEIFunctionUAffectedByVFlag[idx]; }
+  void    setGcmpSEIFunctionCoeffV(const std::vector<double>& gcmpFunctionCoeffV)                   { m_gcmpSEIFunctionCoeffV = gcmpFunctionCoeffV; }
+  double  getGcmpSEIFunctionCoeffV(int idx) const                                                   { return m_gcmpSEIFunctionCoeffV[idx]; }
+  void    setGcmpSEIFunctionVAffectedByUFlag(const std::vector<bool>& gcmpFunctionVAffectedByUFlag) { m_gcmpSEIFunctionVAffectedByUFlag = gcmpFunctionVAffectedByUFlag; }
+  bool    getGcmpSEIFunctionVAffectedByUFlag(int idx) const                                         { return m_gcmpSEIFunctionVAffectedByUFlag[idx]; }
+  void    setGcmpSEIGuardBandFlag(bool b)                                                           { m_gcmpSEIGuardBandFlag = b; }
+  bool    getGcmpSEIGuardBandFlag()                                                                 { return m_gcmpSEIGuardBandFlag; }
+  void    setGcmpSEIGuardBandBoundaryType(bool b)                                                   { m_gcmpSEIGuardBandBoundaryType = b; }
+  bool    getGcmpSEIGuardBandBoundaryType()                                                         { return m_gcmpSEIGuardBandBoundaryType; }
+  void    setGcmpSEIGuardBandSamplesMinus1( uint8_t u )                                             { m_gcmpSEIGuardBandSamplesMinus1 = u; }
+  uint8_t getGcmpSEIGuardBandSamplesMinus1()                                                        { return m_gcmpSEIGuardBandSamplesMinus1; }
+  bool    getSubpicureLevelInfoSEIEnabled() const { return m_subpicureLevelInfoSEIEnabled; }
+  void    setSubpicureLevelInfoSEIEnabled(bool val) { m_subpicureLevelInfoSEIEnabled = val; }
+  bool     getSampleAspectRatioInfoSEIEnabled() const                                                       { return m_sampleAspectRatioInfoSEIEnabled; }
+  void     setSampleAspectRatioInfoSEIEnabled(const bool val)                                               { m_sampleAspectRatioInfoSEIEnabled = val; }
+  bool     getSariCancelFlag() const                                                                        { return m_sariCancelFlag; }
+  void     setSariCancelFlag(const bool val)                                                                { m_sariCancelFlag = val; }
+  bool     getSariPersistenceFlag() const                                                                   { return m_sariPersistenceFlag; }
+  void     setSariPersistenceFlag(const bool val)                                                           { m_sariPersistenceFlag = val; }
+  int      getSariAspectRatioIdc() const                                                                    { return m_sariAspectRatioIdc; }
+  void     setSariAspectRatioIdc(const int val)                                                             { m_sariAspectRatioIdc = val; }
+  int      getSariSarWidth() const                                                                          { return m_sariSarWidth; }
+  void     setSariSarWidth(const int val)                                                                   { m_sariSarWidth = val; }
+  int      getSariSarHeight() const                                                                         { return m_sariSarHeight; }
+  void     setSariSarHeight(const int val)                                                                  { m_sariSarHeight = val; }
   void  setMCTSEncConstraint(bool b)                                 { m_MCTSEncConstraint = b; }
   bool  getMCTSEncConstraint()                                       { return m_MCTSEncConstraint; }
-  void  setTimeCodeSEIEnabled(bool b)                                { m_timeCodeSEIEnabled = b; }
-  bool  getTimeCodeSEIEnabled()                                      { return m_timeCodeSEIEnabled; }
-  void  setNumberOfTimeSets(int value)                               { m_timeCodeSEINumTs = value; }
-  int   getNumberOfTimesets()                                        { return m_timeCodeSEINumTs; }
-  void  setTimeSet(SEITimeSet element, int index)                    { m_timeSetArray[index] = element; }
-  SEITimeSet &getTimeSet(int index)                                  { return m_timeSetArray[index]; }
-  const SEITimeSet &getTimeSet(int index) const                      { return m_timeSetArray[index]; }
-  void  setKneeSEIEnabled(int b)                                     { m_kneeSEIEnabled = b; }
-  bool  getKneeSEIEnabled()                                          { return m_kneeSEIEnabled; }
-  void  setKneeSEIId(int b)                                          { m_kneeSEIId = b; }
-  int   getKneeSEIId()                                               { return m_kneeSEIId; }
-  void  setKneeSEICancelFlag(bool b)                                 { m_kneeSEICancelFlag=b; }
-  bool  getKneeSEICancelFlag()                                       { return m_kneeSEICancelFlag; }
-  void  setKneeSEIPersistenceFlag(bool b)                            { m_kneeSEIPersistenceFlag = b; }
-  bool  getKneeSEIPersistenceFlag()                                  { return m_kneeSEIPersistenceFlag; }
-  void  setKneeSEIInputDrange(int b)                                 { m_kneeSEIInputDrange = b; }
-  int   getKneeSEIInputDrange()                                      { return m_kneeSEIInputDrange; }
-  void  setKneeSEIInputDispLuminance(int b)                          { m_kneeSEIInputDispLuminance = b; }
-  int   getKneeSEIInputDispLuminance()                               { return m_kneeSEIInputDispLuminance; }
-  void  setKneeSEIOutputDrange(int b)                                { m_kneeSEIOutputDrange = b; }
-  int   getKneeSEIOutputDrange()                                     { return m_kneeSEIOutputDrange; }
-  void  setKneeSEIOutputDispLuminance(int b)                         { m_kneeSEIOutputDispLuminance = b; }
-  int   getKneeSEIOutputDispLuminance()                              { return m_kneeSEIOutputDispLuminance; }
-  void  setKneeSEINumKneePointsMinus1(int b)                         { m_kneeSEINumKneePointsMinus1 = b; }
-  int   getKneeSEINumKneePointsMinus1()                              { return m_kneeSEINumKneePointsMinus1; }
-  void  setKneeSEIInputKneePoint(int *p)                             { m_kneeSEIInputKneePoint = p; }
-  int*  getKneeSEIInputKneePoint()                                   { return m_kneeSEIInputKneePoint; }
-  void  setKneeSEIOutputKneePoint(int *p)                            { m_kneeSEIOutputKneePoint = p; }
-  int*  getKneeSEIOutputKneePoint()                                  { return m_kneeSEIOutputKneePoint; }
-  void  setColourRemapInfoSEIFileRoot( const std::string &s )        { m_colourRemapSEIFileRoot = s; }
-  const std::string &getColourRemapInfoSEIFileRoot() const           { return m_colourRemapSEIFileRoot; }
   void  setMasteringDisplaySEI(const SEIMasteringDisplay &src)       { m_masteringDisplay = src; }
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   void  setSEIAlternativeTransferCharacteristicsSEIEnable( bool b)   { m_alternativeTransferCharacteristicsSEIEnabled = b;    }
@@ -1228,40 +1518,115 @@ public:
   void  setSEIPreferredTransferCharacteristics(uint8_t v)              { m_preferredTransferCharacteristics = v;    }
   uint8_t getSEIPreferredTransferCharacteristics() const               { return m_preferredTransferCharacteristics; }
 #endif
-  void  setSEIGreenMetadataInfoSEIEnable( bool b)                    { m_greenMetadataInfoSEIEnabled = b;    }
-  bool  getSEIGreenMetadataInfoSEIEnable( ) const                    { return m_greenMetadataInfoSEIEnabled; }
-  void  setSEIGreenMetadataType(uint8_t v)                             { m_greenMetadataType = v;    }
-  uint8_t getSEIGreenMetadataType() const                              { return m_greenMetadataType; }
-  void  setSEIXSDMetricType(uint8_t v)                                 { m_xsdMetricType = v;    }
-  uint8_t getSEIXSDMetricType() const                                  { return m_xsdMetricType; }
-
   const SEIMasteringDisplay &getMasteringDisplaySEI() const          { return m_masteringDisplay; }
+  // film grain SEI
+  void  setFilmGrainCharactersticsSEIEnabled (bool b)                { m_fgcSEIEnabled = b; }
+  bool  getFilmGrainCharactersticsSEIEnabled()                       { return m_fgcSEIEnabled; }
+  void  setFilmGrainCharactersticsSEICancelFlag(bool b)              { m_fgcSEICancelFlag = b; }
+  bool  getFilmGrainCharactersticsSEICancelFlag()                    { return m_fgcSEICancelFlag; }
+  void  setFilmGrainCharactersticsSEIPersistenceFlag(bool b)         { m_fgcSEIPersistenceFlag = b; }
+  bool  getFilmGrainCharactersticsSEIPersistenceFlag()               { return m_fgcSEIPersistenceFlag; }
+  void  setFilmGrainCharactersticsSEIModelID(uint8_t v )             { m_fgcSEIModelID = v; }
+  uint8_t getFilmGrainCharactersticsSEIModelID()                     { return m_fgcSEIModelID; }
+  void  setFilmGrainCharactersticsSEISepColourDescPresent(bool b)    { m_fgcSEISepColourDescPresentFlag = b; }
+  bool  getFilmGrainCharactersticsSEISepColourDescPresent()          { return m_fgcSEISepColourDescPresentFlag; }
+  void  setFilmGrainCharactersticsSEIBlendingModeID(uint8_t v )      { m_fgcSEIBlendingModeID = v; }
+  uint8_t getFilmGrainCharactersticsSEIBlendingModeID()              { return m_fgcSEIBlendingModeID; }
+  void  setFilmGrainCharactersticsSEILog2ScaleFactor(uint8_t v )     { m_fgcSEILog2ScaleFactor = v; }
+  uint8_t getFilmGrainCharactersticsSEILog2ScaleFactor()             { return m_fgcSEILog2ScaleFactor; }
+  void  setFGCSEICompModelPresent(bool b, int index)                 { m_fgcSEICompModelPresent[index] = b; }
+  bool  getFGCSEICompModelPresent(int index)                         { return m_fgcSEICompModelPresent[index]; }
+  // cll SEI
+  void  setCLLSEIEnabled(bool b)                                     { m_cllSEIEnabled = b; }
+  bool  getCLLSEIEnabled()                                           { return m_cllSEIEnabled; }
+  void  setCLLSEIMaxContentLightLevel (uint16_t v)                   { m_cllSEIMaxContentLevel = v; }
+  uint16_t getCLLSEIMaxContentLightLevel()                           { return m_cllSEIMaxContentLevel; }
+  void  setCLLSEIMaxPicAvgLightLevel(uint16_t v)                     { m_cllSEIMaxPicAvgLevel = v; }
+  uint16_t getCLLSEIMaxPicAvgLightLevel()                            { return m_cllSEIMaxPicAvgLevel; }
+  // ave SEI
+  void  setAmbientViewingEnvironmentSEIEnabled (bool b)              { m_aveSEIEnabled = b; }
+  bool  getAmbientViewingEnvironmentSEIEnabled ()                    { return m_aveSEIEnabled; }
+  void  setAmbientViewingEnvironmentSEIIlluminance( uint32_t v )     { m_aveSEIAmbientIlluminance = v; }
+  uint32_t getAmbientViewingEnvironmentSEIIlluminance()              { return m_aveSEIAmbientIlluminance; }
+  void  setAmbientViewingEnvironmentSEIAmbientLightX( uint16_t v )   { m_aveSEIAmbientLightX = v; }
+  uint16_t getAmbientViewingEnvironmentSEIAmbientLightX()            { return m_aveSEIAmbientLightX; }
+  void  setAmbientViewingEnvironmentSEIAmbientLightY( uint16_t v )   { m_aveSEIAmbientLightY = v; }
+  uint16_t getAmbientViewingEnvironmentSEIAmbientLightY()            { return m_aveSEIAmbientLightY; }
+  // ccv SEI
+  void     setCcvSEIEnabled(bool b)                                  { m_ccvSEIEnabled = b; }
+  bool     getCcvSEIEnabled()                                        { return m_ccvSEIEnabled; }
+  void     setCcvSEICancelFlag(bool b)                               { m_ccvSEICancelFlag = b; }
+  bool     getCcvSEICancelFlag()                                     { return m_ccvSEICancelFlag; }
+  void     setCcvSEIPersistenceFlag(bool b)                          { m_ccvSEIPersistenceFlag = b; }
+  bool     getCcvSEIPersistenceFlag()                                { return m_ccvSEIPersistenceFlag; }
+  void     setCcvSEIPrimariesPresentFlag(bool b)                     { m_ccvSEIPrimariesPresentFlag = b; }
+  bool     getCcvSEIPrimariesPresentFlag()                           { return m_ccvSEIPrimariesPresentFlag; }
+  void     setCcvSEIMinLuminanceValuePresentFlag(bool b)             { m_ccvSEIMinLuminanceValuePresentFlag = b; }
+  bool     getCcvSEIMinLuminanceValuePresentFlag()                   { return m_ccvSEIMinLuminanceValuePresentFlag; }
+  void     setCcvSEIMaxLuminanceValuePresentFlag(bool b)             { m_ccvSEIMaxLuminanceValuePresentFlag = b; }
+  bool     getCcvSEIMaxLuminanceValuePresentFlag()                   { return m_ccvSEIMaxLuminanceValuePresentFlag; }
+  void     setCcvSEIAvgLuminanceValuePresentFlag(bool b)             { m_ccvSEIAvgLuminanceValuePresentFlag = b; }
+  bool     getCcvSEIAvgLuminanceValuePresentFlag()                   { return m_ccvSEIAvgLuminanceValuePresentFlag; }
+  void     setCcvSEIPrimariesX(double dValue, int index)             { m_ccvSEIPrimariesX[index] = dValue; }
+  double   getCcvSEIPrimariesX(int index)                            { return m_ccvSEIPrimariesX[index]; }
+  void     setCcvSEIPrimariesY(double dValue, int index)             { m_ccvSEIPrimariesY[index] = dValue; }
+  double   getCcvSEIPrimariesY(int index)                            { return m_ccvSEIPrimariesY[index]; }
+  void     setCcvSEIMinLuminanceValue  (double dValue)               { m_ccvSEIMinLuminanceValue = dValue; }
+  double   getCcvSEIMinLuminanceValue  ()                            { return m_ccvSEIMinLuminanceValue;  }
+  void     setCcvSEIMaxLuminanceValue  (double dValue)               { m_ccvSEIMaxLuminanceValue = dValue; }
+  double   getCcvSEIMaxLuminanceValue  ()                            { return m_ccvSEIMaxLuminanceValue;  }
+  void     setCcvSEIAvgLuminanceValue  (double dValue)               { m_ccvSEIAvgLuminanceValue = dValue; }
+  double   getCcvSEIAvgLuminanceValue  ()                            { return m_ccvSEIAvgLuminanceValue;  }
   void         setUseWP               ( bool b )                     { m_useWeightedPred   = b;    }
   void         setWPBiPred            ( bool b )                     { m_useWeightedBiPred = b;    }
   bool         getUseWP               ()                             { return m_useWeightedPred;   }
   bool         getWPBiPred            ()                             { return m_useWeightedBiPred; }
-  void         setLog2ParallelMergeLevelMinus2   ( uint32_t u )          { m_log2ParallelMergeLevelMinus2       = u;    }
-  uint32_t         getLog2ParallelMergeLevelMinus2   ()                  { return m_log2ParallelMergeLevelMinus2;       }
   void         setMaxNumMergeCand                ( uint32_t u )          { m_maxNumMergeCand = u;      }
   uint32_t         getMaxNumMergeCand                ()                  { return m_maxNumMergeCand;   }
   void         setMaxNumAffineMergeCand          ( uint32_t u )      { m_maxNumAffineMergeCand = u;    }
   uint32_t     getMaxNumAffineMergeCand          ()                  { return m_maxNumAffineMergeCand; }
-#if HEVC_USE_SCALING_LISTS
+  void         setMaxNumTriangleCand             ( uint32_t u )      { m_maxNumTriangleCand = u;    }
+  uint32_t     getMaxNumTriangleCand             ()                  { return m_maxNumTriangleCand; }
+  void         setMaxNumIBCMergeCand             ( uint32_t u )      { m_maxNumIBCMergeCand = u; }
+  uint32_t     getMaxNumIBCMergeCand             ()                  { return m_maxNumIBCMergeCand; }
   void         setUseScalingListId    ( ScalingListMode u )          { m_useScalingListId       = u;   }
   ScalingListMode getUseScalingListId    ()                          { return m_useScalingListId;      }
   void         setScalingListFileName       ( const std::string &s ) { m_scalingListFileName = s;      }
   const std::string& getScalingListFileName () const                 { return m_scalingListFileName;   }
-#endif
+  void         setSliceLevelRpl  ( bool b )                          { m_sliceLevelRpl = b;     }
+  bool         getSliceLevelRpl  ()                                  { return m_sliceLevelRpl;  }
+  void         setSliceLevelDblk ( bool b )                          { m_sliceLevelDblk = b;    }
+  bool         getSliceLevelDblk ()                                  { return m_sliceLevelDblk; }
+  void         setSliceLevelSao  ( bool b )                          { m_sliceLevelSao = b;     }
+  bool         getSliceLevelSao  ()                                  { return m_sliceLevelSao;  }
+  void         setSliceLevelAlf  ( bool b )                          { m_sliceLevelAlf = b;     }
+  bool         getSliceLevelAlf  ()                                  { return m_sliceLevelAlf;  }
+  void         setDisableScalingMatrixForLfnstBlks(bool u)          { m_disableScalingMatrixForLfnstBlks = u;   }
+  bool         getDisableScalingMatrixForLfnstBlks() const          { return m_disableScalingMatrixForLfnstBlks; }
   void         setTMVPModeId ( int  u )                              { m_TMVPModeId = u;    }
   int          getTMVPModeId ()                                      { return m_TMVPModeId; }
+  void         setConstantSliceHeaderParamsEnabledFlag ( bool u )    { m_constantSliceHeaderParamsEnabledFlag = u; }
+  bool         getConstantSliceHeaderParamsEnabledFlag ()            { return m_constantSliceHeaderParamsEnabledFlag; }
+  void         setPPSDepQuantEnabledIdc ( int u )                    { m_PPSDepQuantEnabledIdc = u; }
+  int          getPPSDepQuantEnabledIdc ()                           { return m_PPSDepQuantEnabledIdc; }
+  void         setPPSRefPicListSPSIdc0 ( int u )                     { m_PPSRefPicListSPSIdc0 = u; }
+  int          getPPSRefPicListSPSIdc0 ()                            { return m_PPSRefPicListSPSIdc0; }
+  void         setPPSRefPicListSPSIdc1 ( int u )                     { m_PPSRefPicListSPSIdc1 = u; }
+  int          getPPSRefPicListSPSIdc1 ()                            { return m_PPSRefPicListSPSIdc1; }
+  void         setPPSMvdL1ZeroIdc ( int u )                          { m_PPSMvdL1ZeroIdc = u; }
+  int          getPPSMvdL1ZeroIdc ()                                 { return m_PPSMvdL1ZeroIdc; }
+  void         setPPSCollocatedFromL0Idc ( int u )                   { m_PPSCollocatedFromL0Idc = u; }
+  int          getPPSCollocatedFromL0Idc ()                          { return m_PPSCollocatedFromL0Idc; }
+  void         setPPSSixMinusMaxNumMergeCandPlus1 ( uint32_t u )     { m_PPSSixMinusMaxNumMergeCandPlus1 = u; }
+  uint32_t     getPPSSixMinusMaxNumMergeCandPlus1 ()                 { return m_PPSSixMinusMaxNumMergeCandPlus1; }
+  void         setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 ( uint32_t u ) { m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 = u; }
+  uint32_t     getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1 ()  { return m_PPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1; }
   WeightedPredictionMethod getWeightedPredictionMethod() const       { return m_weightedPredictionMethod; }
   void         setWeightedPredictionMethod( WeightedPredictionMethod m ) { m_weightedPredictionMethod = m; }
   void         setDepQuantEnabledFlag( bool b )                      { m_DepQuantEnabledFlag = b;    }
   bool         getDepQuantEnabledFlag()                              { return m_DepQuantEnabledFlag; }
-#if HEVC_USE_SIGN_HIDING
   void         setSignDataHidingEnabledFlag( bool b )                { m_SignDataHidingEnabledFlag = b;    }
   bool         getSignDataHidingEnabledFlag()                        { return m_SignDataHidingEnabledFlag; }
-#endif
   bool         getUseRateCtrl         () const                       { return m_RCEnableRateControl;   }
   void         setUseRateCtrl         ( bool b )                     { m_RCEnableRateControl = b;      }
   int          getTargetBitrate       ()                             { return m_RCTargetBitrate;       }
@@ -1284,33 +1649,31 @@ public:
   double       getInitialCpbFullness  ()                             { return m_RCInitialCpbFullness;  }
   void         setInitialCpbFullness  (double f)                     { m_RCInitialCpbFullness = f;     }
 #endif
-  bool         getTransquantBypassEnabledFlag()                      { return m_TransquantBypassEnabledFlag; }
-  void         setTransquantBypassEnabledFlag(bool flag)             { m_TransquantBypassEnabledFlag = flag; }
-  bool         getCUTransquantBypassFlagForceValue() const           { return m_CUTransquantBypassFlagForce; }
-  void         setCUTransquantBypassFlagForceValue(bool flag)        { m_CUTransquantBypassFlagForce = flag; }
   CostMode     getCostMode( ) const                                  { return m_costMode; }
   void         setCostMode(CostMode m )                              { m_costMode = m; }
 
-#if HEVC_VPS
   void         setVPS(VPS *p)                                        { m_cVPS = *p; }
   VPS *        getVPS()                                              { return &m_cVPS; }
-#endif
+  void         setDPS(DPS *p)                                        { m_dps = *p; }
+  DPS*         getDPS()                                              { return &m_dps; }
   void         setUseRecalculateQPAccordingToLambda (bool b)         { m_recalculateQPAccordingToLambda = b;    }
   bool         getUseRecalculateQPAccordingToLambda ()               { return m_recalculateQPAccordingToLambda; }
 
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  void         setUseStrongIntraSmoothing ( bool b )                 { m_useStrongIntraSmoothing = b;    }
-  bool         getUseStrongIntraSmoothing ()                         { return m_useStrongIntraSmoothing; }
-
-#endif
   void         setEfficientFieldIRAPEnabled( bool b )                { m_bEfficientFieldIRAPEnabled = b; }
   bool         getEfficientFieldIRAPEnabled( ) const                 { return m_bEfficientFieldIRAPEnabled; }
 
   void         setHarmonizeGopFirstFieldCoupleEnabled( bool b )      { m_bHarmonizeGopFirstFieldCoupleEnabled = b; }
   bool         getHarmonizeGopFirstFieldCoupleEnabled( ) const       { return m_bHarmonizeGopFirstFieldCoupleEnabled; }
 
+#if HEVC_SEI
   void         setActiveParameterSetsSEIEnabled ( int b )            { m_activeParameterSetsSEIEnabled = b; }
   int          getActiveParameterSetsSEIEnabled ()                   { return m_activeParameterSetsSEIEnabled; }
+#endif
+
+  bool         getDecodingParameterSetEnabled()                      { return m_decodingParameterSetEnabled; }
+  void         setDecodingParameterSetEnabled(bool i)                { m_decodingParameterSetEnabled = i; }
+  bool         getHrdParametersPresentFlag()                         { return m_hrdParametersPresentFlag; }
+  void         setHrdParametersPresentFlag(bool i)                   { m_hrdParametersPresentFlag = i; }
   bool         getVuiParametersPresentFlag()                         { return m_vuiParametersPresentFlag; }
   void         setVuiParametersPresentFlag(bool i)                   { m_vuiParametersPresentFlag = i; }
   bool         getAspectRatioInfoPresentFlag()                       { return m_aspectRatioInfoPresentFlag; }
@@ -1321,16 +1684,6 @@ public:
   void         setSarWidth(int i)                                    { m_sarWidth = i; }
   int          getSarHeight()                                        { return m_sarHeight; }
   void         setSarHeight(int i)                                   { m_sarHeight = i; }
-  bool         getOverscanInfoPresentFlag()                          { return m_overscanInfoPresentFlag; }
-  void         setOverscanInfoPresentFlag(bool i)                    { m_overscanInfoPresentFlag = i; }
-  bool         getOverscanAppropriateFlag()                          { return m_overscanAppropriateFlag; }
-  void         setOverscanAppropriateFlag(bool i)                    { m_overscanAppropriateFlag = i; }
-  bool         getVideoSignalTypePresentFlag()                       { return m_videoSignalTypePresentFlag; }
-  void         setVideoSignalTypePresentFlag(bool i)                 { m_videoSignalTypePresentFlag = i; }
-  int          getVideoFormat()                                      { return m_videoFormat; }
-  void         setVideoFormat(int i)                                 { m_videoFormat = i; }
-  bool         getVideoFullRangeFlag()                               { return m_videoFullRangeFlag; }
-  void         setVideoFullRangeFlag(bool i)                         { m_videoFullRangeFlag = i; }
   bool         getColourDescriptionPresentFlag()                     { return m_colourDescriptionPresentFlag; }
   void         setColourDescriptionPresentFlag(bool i)               { m_colourDescriptionPresentFlag = i; }
   int          getColourPrimaries()                                  { return m_colourPrimaries; }
@@ -1345,34 +1698,14 @@ public:
   void         setChromaSampleLocTypeTopField(int i)                 { m_chromaSampleLocTypeTopField = i; }
   int          getChromaSampleLocTypeBottomField()                   { return m_chromaSampleLocTypeBottomField; }
   void         setChromaSampleLocTypeBottomField(int i)              { m_chromaSampleLocTypeBottomField = i; }
-  bool         getNeutralChromaIndicationFlag()                      { return m_neutralChromaIndicationFlag; }
-  void         setNeutralChromaIndicationFlag(bool i)                { m_neutralChromaIndicationFlag = i; }
-  Window      &getDefaultDisplayWindow()                             { return m_defaultDisplayWindow; }
-  void         setDefaultDisplayWindow (int offsetLeft, int offsetRight, int offsetTop, int offsetBottom ) { m_defaultDisplayWindow.setWindow (offsetLeft, offsetRight, offsetTop, offsetBottom); }
-  bool         getFrameFieldInfoPresentFlag()                        { return m_frameFieldInfoPresentFlag; }
-  void         setFrameFieldInfoPresentFlag(bool i)                  { m_frameFieldInfoPresentFlag = i; }
-  bool         getPocProportionalToTimingFlag()                      { return m_pocProportionalToTimingFlag; }
-  void         setPocProportionalToTimingFlag(bool x)                { m_pocProportionalToTimingFlag = x;    }
-  int          getNumTicksPocDiffOneMinus1()                         { return m_numTicksPocDiffOneMinus1;    }
-  void         setNumTicksPocDiffOneMinus1(int x)                    { m_numTicksPocDiffOneMinus1 = x;       }
-  bool         getBitstreamRestrictionFlag()                         { return m_bitstreamRestrictionFlag; }
-  void         setBitstreamRestrictionFlag(bool i)                   { m_bitstreamRestrictionFlag = i; }
-#if HEVC_TILES_WPP
-  bool         getTilesFixedStructureFlag()                          { return m_tilesFixedStructureFlag; }
-  void         setTilesFixedStructureFlag(bool i)                    { m_tilesFixedStructureFlag = i; }
-#endif
-  bool         getMotionVectorsOverPicBoundariesFlag()               { return m_motionVectorsOverPicBoundariesFlag; }
-  void         setMotionVectorsOverPicBoundariesFlag(bool i)         { m_motionVectorsOverPicBoundariesFlag = i; }
-  int          getMinSpatialSegmentationIdc()                        { return m_minSpatialSegmentationIdc; }
-  void         setMinSpatialSegmentationIdc(int i)                   { m_minSpatialSegmentationIdc = i; }
-  int          getMaxBytesPerPicDenom()                              { return m_maxBytesPerPicDenom; }
-  void         setMaxBytesPerPicDenom(int i)                         { m_maxBytesPerPicDenom = i; }
-  int          getMaxBitsPerMinCuDenom()                             { return m_maxBitsPerMinCuDenom; }
-  void         setMaxBitsPerMinCuDenom(int i)                        { m_maxBitsPerMinCuDenom = i; }
-  int          getLog2MaxMvLengthHorizontal()                        { return m_log2MaxMvLengthHorizontal; }
-  void         setLog2MaxMvLengthHorizontal(int i)                   { m_log2MaxMvLengthHorizontal = i; }
-  int          getLog2MaxMvLengthVertical()                          { return m_log2MaxMvLengthVertical; }
-  void         setLog2MaxMvLengthVertical(int i)                     { m_log2MaxMvLengthVertical = i; }
+  int          getChromaSampleLocType()                              { return m_chromaSampleLocType; }
+  void         setChromaSampleLocType(int i)                         { m_chromaSampleLocType = i; }
+  bool         getOverscanInfoPresentFlag()                          { return m_overscanInfoPresentFlag; }
+  void         setOverscanInfoPresentFlag(bool i)                    { m_overscanInfoPresentFlag = i; }
+  bool         getOverscanAppropriateFlag()                          { return m_overscanAppropriateFlag; }
+  void         setOverscanAppropriateFlag(bool i)                    { m_overscanAppropriateFlag = i; }
+  bool         getVideoFullRangeFlag()                               { return m_videoFullRangeFlag; }
+  void         setVideoFullRangeFlag(bool i)                         { m_videoFullRangeFlag = i; }
 
   bool         getProgressiveSourceFlag() const                      { return m_progressiveSourceFlag; }
   void         setProgressiveSourceFlag(bool b)                      { m_progressiveSourceFlag = b; }
@@ -1386,27 +1719,19 @@ public:
   bool         getFrameOnlyConstraintFlag() const                    { return m_frameOnlyConstraintFlag; }
   void         setFrameOnlyConstraintFlag(bool b)                    { m_frameOnlyConstraintFlag = b; }
 
-  uint32_t         getBitDepthConstraintValue() const                    { return m_bitDepthConstraintValue; }
-  void         setBitDepthConstraintValue(uint32_t v)                    { m_bitDepthConstraintValue=v; }
-
-  ChromaFormat getChromaFormatConstraintValue() const                { return m_chromaFormatConstraintValue; }
-  void         setChromaFormatConstraintValue(ChromaFormat v)        { m_chromaFormatConstraintValue=v; }
 
   bool         getIntraConstraintFlag() const                        { return m_intraConstraintFlag; }
   void         setIntraConstraintFlag(bool b)                        { m_intraConstraintFlag=b; }
 
-  bool         getOnePictureOnlyConstraintFlag() const               { return m_onePictureOnlyConstraintFlag; }
-  void         setOnePictureOnlyConstraintFlag(bool b)               { m_onePictureOnlyConstraintFlag=b; }
-
-  bool         getLowerBitRateConstraintFlag() const                 { return m_lowerBitRateConstraintFlag; }
-  void         setLowerBitRateConstraintFlag(bool b)                 { m_lowerBitRateConstraintFlag=b; }
 
+#if HEVC_SEI
   bool         getChromaResamplingFilterHintEnabled()                { return m_chromaResamplingFilterHintEnabled;}
   void         setChromaResamplingFilterHintEnabled(bool i)          { m_chromaResamplingFilterHintEnabled = i;}
   int          getChromaResamplingHorFilterIdc()                     { return m_chromaResamplingHorFilterIdc;}
   void         setChromaResamplingHorFilterIdc(int i)                { m_chromaResamplingHorFilterIdc = i;}
   int          getChromaResamplingVerFilterIdc()                     { return m_chromaResamplingVerFilterIdc;}
   void         setChromaResamplingVerFilterIdc(int i)                { m_chromaResamplingVerFilterIdc = i;}
+#endif
 
   void         setSummaryOutFilename(const std::string &s)           { m_summaryOutFilename = s; }
   const std::string& getSummaryOutFilename() const                   { return m_summaryOutFilename; }
@@ -1443,16 +1768,45 @@ public:
   void         setForceSingleSplitThread( bool b )                   { m_forceSingleSplitThread = b; }
   int          getForceSingleSplitThread()                     const { return m_forceSingleSplitThread; }
 #endif
-#if ENABLE_WPP_PARALLELISM
-  void         setNumWppThreads( int n )                             { m_numWppThreads = n; }
-  int          getNumWppThreads()                              const { return m_numWppThreads; }
-  void         setNumWppExtraLines( int n )                          { m_numWppExtraLines = n; }
-  int          getNumWppExtraLines()                           const { return m_numWppExtraLines; }
-  void         setEnsureWppBitEqual( bool b)                         { m_ensureWppBitEqual = b; }
-  bool         getEnsureWppBitEqual()                          const { return m_ensureWppBitEqual; }
+  void         setUseALF( bool b ) { m_alf = b; }
+  bool         getUseALF()                                      const { return m_alf; }
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  void        setWhitePointDeltaE( uint32_t index, double value )     { m_whitePointDeltaE[ index ] = value; }
+  double      getWhitePointDeltaE( uint32_t index )             const { return m_whitePointDeltaE[ index ]; }
+  void        setMaxSampleValue(double value)                         { m_maxSampleValue = value;}
+  double      getMaxSampleValue()                               const { return m_maxSampleValue;}
+  void        setSampleRange(int value)                               { m_sampleRange = static_cast<hdrtoolslib::SampleRange>(value);}
+  hdrtoolslib::SampleRange getSampleRange()                     const { return m_sampleRange;}
+  void        setColorPrimaries(int value)                            { m_colorPrimaries = static_cast<hdrtoolslib::ColorPrimaries>(value);}
+  hdrtoolslib::ColorPrimaries getColorPrimaries()               const { return m_colorPrimaries;}
+  void        setEnableTFunctionLUT(bool value)                       { m_enableTFunctionLUT = value;}
+  bool        getEnableTFunctionLUT()                           const { return m_enableTFunctionLUT;}
+  void        setChromaLocation(uint32_t index, int value)            { m_chromaLocation[ index ] = static_cast<hdrtoolslib::ChromaLocation>(value);}
+  hdrtoolslib::ChromaLocation getChromaLocation(uint32_t index) const { return m_chromaLocation[index];}
+  void        setChromaUPFilter(int value)                            { m_chromaUPFilter = value;}
+  int         getChromaUPFilter()                               const { return m_chromaUPFilter;}
+  void        setCropOffsetLeft(int value)                            { m_cropOffsetLeft = value;}
+  int         getCropOffsetLeft()                               const { return m_cropOffsetLeft;}
+  void        setCropOffsetTop(int value)                             { m_cropOffsetTop = value;}
+  int         getCropOffsetTop()                                const { return m_cropOffsetTop;}
+  void        setCropOffsetRight(int value)                           { m_cropOffsetRight = value;}
+  int         getCropOffsetRight()                              const { return m_cropOffsetRight;}
+  void        setCropOffsetBottom(int value)                          { m_cropOffsetBottom = value;}
+  int         getCropOffsetBottom()                             const { return m_cropOffsetBottom;}
+  void        setCalculateHdrMetrics(bool value)                      { m_calculateHdrMetrics = value;}
+  bool        getCalcluateHdrMetrics()                          const { return m_calculateHdrMetrics;}
 #endif
-  void        setUseALF( bool b ) { m_alf = b; }
-  bool        getUseALF()                                      const { return m_alf; }
+
+  void        setScalingRatio( double hor, double ver )              { m_scalingRatioHor = hor, m_scalingRatioVer = ver;  }
+  void        setRPREnabled( bool b )                                { m_rprEnabled = b;    }
+  bool        isRPREnabled()                                   const { return m_rprEnabled; }
+  void        setSwitchPocPeriod( int p )                            { m_switchPocPeriod = p;}
+  void        setUpscaledOutput( int b )                             { m_upscaledOutput = b; }
+  int         getUpscaledOutput()                              const { return m_upscaledOutput; }
+
+  void        setNumRefLayers( int* numRefLayers )                   { std::memcpy( m_numRefLayers, numRefLayers, sizeof( m_numRefLayers ) ); }
+  int         getNumRefLayers( int layerIdx )                  const { return m_numRefLayers[layerIdx];  }
 };
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 51f0e938634470972ecb2abdd9c31613eb26202e..c5775ca715aa414737912446f599ac95f7df8359 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -52,10 +52,6 @@
 #include <stdio.h>
 #include <cmath>
 #include <algorithm>
-#if ENABLE_WPP_PARALLELISM
-#include <mutex>
-extern std::recursive_mutex g_cache_mutex;
-#endif
 
 
 
@@ -63,9 +59,7 @@ extern std::recursive_mutex g_cache_mutex;
 //! \{
 
 // ====================================================================================================================
-// Constructor / destructor / create / destroy
-// ====================================================================================================================
-const TriangleMotionInfo  EncCu::m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS] =
+EncCu::EncCu() : m_triangleModeTest
 {
   TriangleMotionInfo( 0, 1, 0 ), TriangleMotionInfo( 1, 0, 1 ), TriangleMotionInfo( 1, 0, 2 ), TriangleMotionInfo( 0, 0, 1 ), TriangleMotionInfo( 0, 2, 0 ),
   TriangleMotionInfo( 1, 0, 3 ), TriangleMotionInfo( 1, 0, 4 ), TriangleMotionInfo( 1, 1, 0 ), TriangleMotionInfo( 0, 3, 0 ), TriangleMotionInfo( 0, 4, 0 ),
@@ -75,7 +69,8 @@ const TriangleMotionInfo  EncCu::m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS] =
   TriangleMotionInfo( 1, 3, 4 ), TriangleMotionInfo( 1, 4, 0 ), TriangleMotionInfo( 1, 3, 1 ), TriangleMotionInfo( 1, 2, 3 ), TriangleMotionInfo( 1, 4, 1 ),
   TriangleMotionInfo( 0, 4, 1 ), TriangleMotionInfo( 0, 2, 3 ), TriangleMotionInfo( 1, 4, 2 ), TriangleMotionInfo( 0, 3, 2 ), TriangleMotionInfo( 1, 4, 3 ),
   TriangleMotionInfo( 0, 3, 1 ), TriangleMotionInfo( 0, 2, 4 ), TriangleMotionInfo( 1, 2, 4 ), TriangleMotionInfo( 0, 4, 2 ), TriangleMotionInfo( 0, 3, 4 ),
-};
+}
+{}
 
 void EncCu::create( EncCfg* encCfg )
 {
@@ -87,11 +82,15 @@ void EncCu::create( EncCfg* encCfg )
   unsigned      numHeights    = gp_sizeIdxInfo->numHeights();
   m_pTempCS = new CodingStructure**  [numWidths];
   m_pBestCS = new CodingStructure**  [numWidths];
+  m_pTempCS2 = new CodingStructure** [numWidths];
+  m_pBestCS2 = new CodingStructure** [numWidths];
 
   for( unsigned w = 0; w < numWidths; w++ )
   {
     m_pTempCS[w] = new CodingStructure*  [numHeights];
     m_pBestCS[w] = new CodingStructure*  [numHeights];
+    m_pTempCS2[w] = new CodingStructure* [numHeights];
+    m_pBestCS2[w] = new CodingStructure* [numHeights];
 
     for( unsigned h = 0; h < numHeights; h++ )
     {
@@ -103,13 +102,21 @@ void EncCu::create( EncCfg* encCfg )
         m_pTempCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
         m_pBestCS[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
 
-        m_pTempCS[w][h]->create( chromaFormat, Area( 0, 0, width, height ), false );
-        m_pBestCS[w][h]->create( chromaFormat, Area( 0, 0, width, height ), false );
+        m_pTempCS[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
+        m_pBestCS[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
+
+        m_pTempCS2[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
+        m_pBestCS2[w][h] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
+
+        m_pTempCS2[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
+        m_pBestCS2[w][h]->create(chromaFormat, Area(0, 0, width, height), false, (bool)encCfg->getPLTMode());
       }
       else
       {
         m_pTempCS[w][h] = nullptr;
         m_pBestCS[w][h] = nullptr;
+        m_pTempCS2[w][h] = nullptr;
+        m_pBestCS2[w][h] = nullptr;
       }
     }
   }
@@ -129,17 +136,38 @@ void EncCu::create( EncCfg* encCfg )
   for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
   {
     m_acRealMergeBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
+    m_acMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxWidth, uiMaxHeight));
   }
-  for( unsigned ui = 0; ui < TRIANGLE_MAX_NUM_UNI_CANDS; ui++ )
+  const unsigned maxNumTriangleCand = encCfg->getMaxNumTriangleCand();
+  for (unsigned i = 0; i < maxNumTriangleCand; i++)
   {
-    for( unsigned uj = 0; uj < TRIANGLE_MAX_NUM_UNI_CANDS; uj++ )
+    for (unsigned j = 0; j < maxNumTriangleCand; j++)
     {
-      if(ui == uj)
+      if (i == j)
         continue;
-      uint8_t idxBits0 = ui + (ui == TRIANGLE_MAX_NUM_UNI_CANDS - 1 ? 0 : 1);
-      uint8_t candIdx1Enc = uj - (uj > ui ? 1 : 0);
-      uint8_t idxBits1 = candIdx1Enc + (candIdx1Enc == TRIANGLE_MAX_NUM_UNI_CANDS - 2 ? 0 : 1);
-      m_triangleIdxBins[1][ui][uj] = m_triangleIdxBins[0][ui][uj] = 1 + idxBits0 + idxBits1;
+      uint8_t idxBits0 = i + (i == maxNumTriangleCand - 1 ? 0 : 1);
+      uint8_t candIdx1Enc = j - (j > i ? 1 : 0);
+      uint8_t idxBits1 = candIdx1Enc + (candIdx1Enc == maxNumTriangleCand - 2 ? 0 : 1);
+      m_triangleIdxBins[1][i][j] = m_triangleIdxBins[0][i][j] = 1 + idxBits0 + idxBits1;
+    }
+  }
+  if (maxNumTriangleCand != 5)
+  {
+    // update the table
+    int index = 0;
+    for (unsigned i = 0; i < maxNumTriangleCand; i++)
+    {
+      for (unsigned j = 0; j < maxNumTriangleCand; j++)
+      {
+        if (i == j)
+          continue;
+        for (unsigned dir = 0; dir < 2; dir++, index++)
+        {
+          m_triangleModeTest[index].m_splitDir = dir;
+          m_triangleModeTest[index].m_candIdx0 = i;
+          m_triangleModeTest[index].m_candIdx1 = j;
+        }
+      }
     }
   }
   for( unsigned ui = 0; ui < TRIANGLE_MAX_NUM_CANDS; ui++ )
@@ -166,14 +194,24 @@ void EncCu::destroy()
 
       delete m_pBestCS[w][h];
       delete m_pTempCS[w][h];
+
+      if( m_pBestCS2[w][h] ) m_pBestCS2[w][h]->destroy();
+      if( m_pTempCS2[w][h] ) m_pTempCS2[w][h]->destroy();
+
+      delete m_pBestCS2[w][h];
+      delete m_pTempCS2[w][h];
     }
 
     delete[] m_pTempCS[w];
     delete[] m_pBestCS[w];
+    delete[] m_pTempCS2[w];
+    delete[] m_pBestCS2[w];
   }
 
   delete[] m_pBestCS; m_pBestCS = nullptr;
   delete[] m_pTempCS; m_pTempCS = nullptr;
+  delete[] m_pBestCS2; m_pBestCS2 = nullptr;
+  delete[] m_pTempCS2; m_pTempCS2 = nullptr;
 
 #if REUSE_CU_RESULTS
   if (m_tmpStorageLCU)
@@ -197,6 +235,7 @@ void EncCu::destroy()
   for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
   {
     m_acRealMergeBuffer[ui].destroy();
+    m_acMergeTmpBuffer[ui].destroy();
   }
   for( unsigned ui = 0; ui < TRIANGLE_MAX_NUM_CANDS; ui++ )
   {
@@ -226,31 +265,20 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) )
   m_CtxCache           = pcEncLib->getCtxCache( PARL_PARAM0( tId ) );
   m_pcRateCtrl         = pcEncLib->getRateCtrl();
   m_pcSliceEncoder     = pcEncLib->getSliceEncoder();
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   m_pcEncLib           = pcEncLib;
   m_dataId             = tId;
 #endif
   m_pcLoopFilter       = pcEncLib->getLoopFilter();
-  m_shareState = NO_SHARE;
-  m_pcInterSearch->setShareState(0);
-  setShareStateDec(0);
-
-  m_shareBndPosX = -1;
-  m_shareBndPosY = -1;
-  m_shareBndSizeW = 0;
-  m_shareBndSizeH = 0;
 
   DecCu::init( m_pcTrQuant, m_pcIntraSearch, m_pcInterSearch );
 
   m_modeCtrl->init( m_pcEncCfg, m_pcRateCtrl, m_pcRdCost );
 
   m_pcInterSearch->setModeCtrl( m_modeCtrl );
+  m_modeCtrl->setInterSearch(m_pcInterSearch);
   m_pcIntraSearch->setModeCtrl( m_modeCtrl );
 
-  if ( ( m_pcEncCfg->getIBCHashSearch() && m_pcEncCfg->getIBCMode() ) || m_pcEncCfg->getAllowDisFracMMVD() )
-  {
-    m_ibcHashMap.init(m_pcEncCfg->getSourceWidth(), m_pcEncCfg->getSourceHeight());
-  }
 }
 
 // ====================================================================================================================
@@ -260,7 +288,9 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) )
 void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[], const int currQP[] )
 {
   m_modeCtrl->initCTUEncoding( *cs.slice );
+  cs.treeType = TREE_D;
 
+  cs.slice->m_mapPltCost.clear();
 #if ENABLE_SPLIT_PARALLELISM
   if( m_pcEncCfg->getNumSplitThreads() > 1 )
   {
@@ -295,8 +325,8 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
   if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); }
 #endif
   // init the partitioning manager
-  Partitioner *partitioner = PartitionerFactory::get( *cs.slice );
-  partitioner->initCtu( area, CH_L, *cs.slice );
+  QTBTPartitioner partitioner;
+  partitioner.initCtu(area, CH_L, *cs.slice);
   if (m_pcEncCfg->getIBCMode())
   {
     if (area.lx() == 0 && area.ly() == 0)
@@ -327,34 +357,36 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
   CodingStructure *tempCS = m_pTempCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )];
   CodingStructure *bestCS = m_pBestCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )];
 
-  cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false );
-  cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false );
+  cs.initSubStructure(*tempCS, partitioner.chType, partitioner.currArea(), false);
+  cs.initSubStructure(*bestCS, partitioner.chType, partitioner.currArea(), false);
   tempCS->currQP[CH_L] = bestCS->currQP[CH_L] =
   tempCS->baseQP       = bestCS->baseQP       = currQP[CH_L];
   tempCS->prevQP[CH_L] = bestCS->prevQP[CH_L] = prevQP[CH_L];
 
-  xCompressCU( tempCS, bestCS, *partitioner );
-
+  xCompressCU(tempCS, bestCS, partitioner);
+  cs.slice->m_mapPltCost.clear();
   // all signals were already copied during compression if the CTU was split - at this point only the structures are copied to the top level CS
   const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1;
-  cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals );
+  cs.useSubStructure(*bestCS, partitioner.chType, CS::getArea(*bestCS, area, partitioner.chType), copyUnsplitCTUSignals,
+                     false, false, copyUnsplitCTUSignals);
 
   if (CS::isDualITree (cs) && isChromaEnabled (cs.pcv->chrFormat))
   {
     m_CABACEstimator->getCtx() = m_CurrCtx->start;
 
-    partitioner->initCtu( area, CH_C, *cs.slice );
+    partitioner.initCtu(area, CH_C, *cs.slice);
 
-    cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false );
-    cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false );
+    cs.initSubStructure(*tempCS, partitioner.chType, partitioner.currArea(), false);
+    cs.initSubStructure(*bestCS, partitioner.chType, partitioner.currArea(), false);
     tempCS->currQP[CH_C] = bestCS->currQP[CH_C] =
     tempCS->baseQP       = bestCS->baseQP       = currQP[CH_C];
     tempCS->prevQP[CH_C] = bestCS->prevQP[CH_C] = prevQP[CH_C];
 
-    xCompressCU( tempCS, bestCS, *partitioner );
+    xCompressCU(tempCS, bestCS, partitioner);
 
     const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1;
-    cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals );
+    cs.useSubStructure(*bestCS, partitioner.chType, CS::getArea(*bestCS, area, partitioner.chType),
+                       copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals);
   }
 
   if (m_pcEncCfg->getUseRateCtrl())
@@ -364,14 +396,7 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
   // reset context states and uninit context pointer
   m_CABACEstimator->getCtx() = m_CurrCtx->start;
   m_CurrCtx                  = 0;
-  delete partitioner;
 
-#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM
-  if( m_pcEncCfg->getNumSplitThreads() > 1 && m_pcEncCfg->getNumWppThreads() > 1 )
-  {
-    cs.picture->finishCtuPart( area );
-  }
-#endif
 
   // Ensure that a coding was found
   // Selected mode's RD-cost must be not MAX_DOUBLE.
@@ -517,16 +542,6 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS,
 
     if( m_modeCtrl->useModeResult( encTestMode, tempCS, partitioner ) )
     {
-      if( tempCS->cus.size() == 1 )
-      {
-        // if tempCS is not a split-mode
-        CodingUnit &cu = *tempCS->cus.front();
-
-        if( CU::isLosslessCoded( cu ) && !cu.ipcm )
-        {
-          xFillPCMBuffer( cu );
-        }
-      }
 
       std::swap( tempCS, bestCS );
       // store temp best CI for next CU coding
@@ -542,17 +557,9 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS,
 
 }
 
-void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner )
+void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, double maxCostAllowed )
 {
-  if (m_shareState == NO_SHARE)
-  {
-    tempCS->sharedBndPos = tempCS->area.Y().lumaPos();
-    tempCS->sharedBndSize.width = tempCS->area.lwidth();
-    tempCS->sharedBndSize.height = tempCS->area.lheight();
-    bestCS->sharedBndPos = bestCS->area.Y().lumaPos();
-    bestCS->sharedBndSize.width = bestCS->area.lwidth();
-    bestCS->sharedBndSize.height = bestCS->area.lheight();
-  }
+  CHECK(maxCostAllowed < 0, "Wrong value of maxCostAllowed!");
 #if ENABLE_SPLIT_PARALLELISM
   CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" );
 
@@ -567,6 +574,40 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   }
 
 #endif
+  uint32_t compBegin;
+  uint32_t numComp;
+  bool jointPLT = false;
+  if (partitioner.isSepTree( *tempCS ))
+  {
+    if (isLuma(partitioner.chType))
+    {
+      compBegin = COMPONENT_Y;
+      numComp = 1;
+    }
+    else
+    {
+      compBegin = COMPONENT_Cb;
+      numComp = 2;
+    }
+  }
+  else
+  {
+    compBegin = COMPONENT_Y;
+    numComp = 3;
+    jointPLT = true;
+  }
+  SplitSeries splitmode = -1;
+  uint8_t   bestLastPLTSize[MAX_NUM_CHANNEL_TYPE];
+  Pel       bestLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT for 
+  uint8_t   curLastPLTSize[MAX_NUM_CHANNEL_TYPE];
+  Pel       curLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT if no partition
+  for (int i = compBegin; i < (compBegin + numComp); i++)
+  {
+    ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+    bestLastPLTSize[comID] = 0;
+    curLastPLTSize[comID] = tempCS->prevPLT.curPLTSize[comID];
+    memcpy(curLastPLT[i], tempCS->prevPLT.curPLT[i], tempCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
+  }
 
   Slice&   slice      = *tempCS->slice;
   const PPS &pps      = *tempCS->pps;
@@ -574,15 +615,16 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   const uint32_t uiLPelX  = tempCS->area.Y().lumaPos().x;
   const uint32_t uiTPelY  = tempCS->area.Y().lumaPos().y;
 
+  const ModeType modeTypeParent  = partitioner.modeType;
+  const TreeType treeTypeParent  = partitioner.treeType;
+  const ChannelType chTypeParent = partitioner.chType;
   const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture );
 
-  tempCS->chType = partitioner.chType;
-  bestCS->chType = partitioner.chType;
   m_modeCtrl->initCULevel( partitioner, *tempCS );
   if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) )
   {
     auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
-    int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxSbtSize() : MTS_INTER_MAX_CU_SIZE;
+    int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE;
     slsSbt->resetSaveloadSbt( maxSLSize );
 #if ENABLE_SPLIT_PARALLELISM
     CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." );
@@ -606,8 +648,8 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   {
     // TODO M0133 : double check encoder decisions with respect to chroma QG detection and actual encode
     int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
-      std::max<int>( 0, sps.getLog2DiffMaxMinCodingBlockSize() - int( pps.getPpsRangeExtension().getCuChromaQpOffsetSubdiv()/2 ) );
-    m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1 );
+      std::max<int>( 0, sps.getLog2DiffMaxMinCodingBlockSize() - int( slice.getCuChromaQpOffsetSubdiv()/2 ) );
+    m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getChromaQpOffsetListLen() + 1 );
   }
 
   if( !m_modeCtrl->anyMode() )
@@ -623,14 +665,45 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() );
 
 
-  int startShareThisLevel = 0;
   m_pcInterSearch->resetSavedAffineMotion();
 
+  double bestIntPelCost = MAX_DOUBLE;
+
+  if (tempCS->slice->getSPS()->getUseColorTrans())
+  {
+    tempCS->tmpColorSpaceCost = MAX_DOUBLE;
+    bestCS->tmpColorSpaceCost = MAX_DOUBLE;
+    tempCS->firstColorSpaceSelected = true;
+    bestCS->firstColorSpaceSelected = true;
+  }
+
+  if (tempCS->slice->getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS))
+  {
+    tempCS->firstColorSpaceTestOnly = false;
+    bestCS->firstColorSpaceTestOnly = false;
+    tempCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
+    tempCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE;
+    bestCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
+    bestCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE;
+
+    if (tempCS->bestParent && tempCS->bestParent->firstColorSpaceTestOnly)
+    {
+      tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true;
+    }
+  }
+
   do
   {
+    for (int i = compBegin; i < (compBegin + numComp); i++)
+    {
+      ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+      tempCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
+      memcpy(tempCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel));
+    }
     EncTestMode currTestMode = m_modeCtrl->currTestMode();
+    currTestMode.maxCostAllowed = maxCostAllowed;
 
-    if (pps.getUseDQP() && CS::isDualITree(*tempCS) && isChroma(partitioner.chType))
+    if (pps.getUseDQP() && partitioner.isSepTree(*tempCS) && isChroma( partitioner.chType ))
     {
       const Position chromaCentral(tempCS->area.Cb().chromaPos().offset(tempCS->area.Cb().chromaSize().width >> 1, tempCS->area.Cb().chromaSize().height >> 1));
       const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, tempCS->area.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, tempCS->area.chromaFormat));
@@ -660,7 +733,11 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
 #endif
       if (currTestMode.qp >= 0)
       {
-        updateLambda (&slice, currTestMode.qp, CS::isDualITree (*tempCS) || (partitioner.currDepth == 0));
+        updateLambda (&slice, currTestMode.qp,
+ #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
+                      m_pcEncCfg->getWCGChromaQPControl().isEnabled(),
+ #endif
+                      CS::isDualITree (*tempCS) || (partitioner.currDepth == 0));
       }
     }
 #endif
@@ -669,9 +746,13 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
     {
       if( ( currTestMode.opts & ETO_IMV ) != 0 )
       {
-        tempCS->bestCS = bestCS;
-        xCheckRDCostInterIMV( tempCS, bestCS, partitioner, currTestMode );
-        tempCS->bestCS = nullptr;
+        const bool skipAltHpelIF = ( int( ( currTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ) == 4 ) && ( bestIntPelCost > 1.25 * bestCS->cost );
+        if (!skipAltHpelIF)
+        {
+          tempCS->bestCS = bestCS;
+          xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode, bestIntPelCost);
+          tempCS->bestCS = nullptr;
+        }
       }
       else
       {
@@ -708,11 +789,40 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
     }
     else if( currTestMode.type == ETM_INTRA )
     {
-      xCheckRDCostIntra( tempCS, bestCS, partitioner, currTestMode );
+      if (slice.getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS))
+      {
+        bool skipSecColorSpace = false;
+        skipSecColorSpace = xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? true : false));
+        
+        if (!skipSecColorSpace && !tempCS->firstColorSpaceTestOnly)
+        {
+          xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? false : true));
+        }
+
+        if (!tempCS->firstColorSpaceTestOnly)
+        {
+          if (tempCS->tmpColorSpaceIntraCost[0] != MAX_DOUBLE && tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE)
+          {
+            double skipCostRatio = m_pcEncCfg->getRGBFormatFlag() ? 1.1 : 1.0;
+            if (tempCS->tmpColorSpaceIntraCost[1] > (skipCostRatio*tempCS->tmpColorSpaceIntraCost[0]))
+            {
+              tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true;
+            }
+          }
+        }
+        else
+        {
+          CHECK(tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE, "the RD test of the second color space should be skipped");
+        }
+      }
+      else
+      {
+        xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, false);
+      }
     }
-    else if( currTestMode.type == ETM_IPCM )
+    else if (currTestMode.type == ETM_PALETTE)
     {
-      xCheckIntraPCM( tempCS, bestCS, partitioner, currTestMode );
+      xCheckPLT( tempCS, bestCS, partitioner, currTestMode );
     }
     else if (currTestMode.type == ETM_IBC)
     {
@@ -724,8 +834,78 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
     }
     else if( isModeSplit( currTestMode ) )
     {
+      if (bestCS->cus.size() != 0)
+      {
+        splitmode = bestCS->cus[0]->splitSeries;
+      }
+      assert( partitioner.modeType == tempCS->modeType );
+      int signalModeConsVal = tempCS->signalModeCons( getPartSplit( currTestMode ), partitioner, modeTypeParent );
+      int numRoundRdo = signalModeConsVal == LDT_MODE_TYPE_SIGNAL ? 2 : 1;
+      bool skipInterPass = false;
+      for( int i = 0; i < numRoundRdo; i++ )
+      {
+        //change cons modes
+        if( signalModeConsVal == LDT_MODE_TYPE_SIGNAL )
+        {
+          CHECK( numRoundRdo != 2, "numRoundRdo shall be 2 - [LDT_MODE_TYPE_SIGNAL]" );
+          tempCS->modeType = partitioner.modeType = (i == 0) ? MODE_TYPE_INTER : MODE_TYPE_INTRA;
+        }
+        else if( signalModeConsVal == LDT_MODE_TYPE_INFER )
+        {
+          CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INFER]" );
+          tempCS->modeType = partitioner.modeType = MODE_TYPE_INTRA;
+        }
+        else if( signalModeConsVal == LDT_MODE_TYPE_INHERIT )
+        {
+          CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INHERIT]" );
+          tempCS->modeType = partitioner.modeType = modeTypeParent;
+        }
 
-      xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode );
+        //for lite intra encoding fast algorithm, set the status to save inter coding info
+        if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType == MODE_TYPE_INTER )
+        {
+          m_pcIntraSearch->setSaveCuCostInSCIPU( true );
+          m_pcIntraSearch->setNumCuInSCIPU( 0 );
+        }
+        else if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType != MODE_TYPE_INTER )
+        {
+          m_pcIntraSearch->setSaveCuCostInSCIPU( false );
+          if( tempCS->modeType == MODE_TYPE_ALL )
+          {
+            m_pcIntraSearch->setNumCuInSCIPU( 0 );
+          }
+        }
+
+        xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode, modeTypeParent, skipInterPass );
+        //recover cons modes
+        tempCS->modeType = partitioner.modeType = modeTypeParent;
+        tempCS->treeType = partitioner.treeType = treeTypeParent;
+        partitioner.chType = chTypeParent;
+        if( modeTypeParent == MODE_TYPE_ALL )
+        {
+          m_pcIntraSearch->setSaveCuCostInSCIPU( false );
+          if( numRoundRdo == 2 && tempCS->modeType == MODE_TYPE_INTRA )
+          {
+            m_pcIntraSearch->initCuAreaCostInSCIPU();
+          }
+        }
+        if( skipInterPass )
+        {
+          break;
+        }
+      }
+      if (splitmode != bestCS->cus[0]->splitSeries)
+      {
+        splitmode = bestCS->cus[0]->splitSeries;
+        const CodingUnit&     cu = *bestCS->cus.front();
+        cu.cs->prevPLT = bestCS->prevPLT;
+        for (int i = compBegin; i < (compBegin + numComp); i++)
+        {
+          ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+          bestLastPLTSize[comID] = bestCS->cus[0]->cs->prevPLT.curPLTSize[comID];
+          memcpy(bestLastPLT[i], bestCS->cus[0]->cs->prevPLT.curPLT[i], bestCS->cus[0]->cs->prevPLT.curPLTSize[comID] * sizeof(Pel));
+        }
+      }
     }
     else
     {
@@ -733,12 +913,6 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
     }
   } while( m_modeCtrl->nextMode( *tempCS, partitioner ) );
 
-  if(startShareThisLevel == 1)
-  {
-    m_shareState = NO_SHARE;
-    m_pcInterSearch->setShareState(m_shareState);
-    setShareStateDec(m_shareState);
-  }
 
   //////////////////////////////////////////////////////////////////////////
   // Finishing CU
@@ -753,30 +927,47 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   }
 
 #endif
+  if( tempCS->cost == MAX_DOUBLE && bestCS->cost == MAX_DOUBLE )
+  {
+    //although some coding modes were planned to be tried in RDO, no coding mode actually finished encoding due to early termination
+    //thus tempCS->cost and bestCS->cost are both MAX_DOUBLE; in this case, skip the following process for normal case
+    m_modeCtrl->finishCULevel( partitioner );
+    return;
+  }
+
   // set context states
   m_CABACEstimator->getCtx() = m_CurrCtx->best;
 
   // QP from last processed CU for further processing
+  //copy the qp of the last non-chroma CU
+  int numCUInThisNode = (int)bestCS->cus.size();
+  if( numCUInThisNode > 1 && bestCS->cus.back()->chType == CHANNEL_TYPE_CHROMA && !CS::isDualITree( *bestCS ) )
+  {
+    CHECK( bestCS->cus[numCUInThisNode-2]->chType != CHANNEL_TYPE_LUMA, "wrong chType" );
+    bestCS->prevQP[partitioner.chType] = bestCS->cus[numCUInThisNode-2]->qp;
+  }
+  else
+  {
   bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp;
+  }
   if ((!slice.isIntra() || slice.getSPS()->getIBCFlag())
-    && bestCS->chType == CHANNEL_TYPE_LUMA
+    && partitioner.chType == CHANNEL_TYPE_LUMA
     && bestCS->cus.size() == 1 && (bestCS->cus.back()->predMode == MODE_INTER || bestCS->cus.back()->predMode == MODE_IBC)
     && bestCS->area.Y() == (*bestCS->cus.back()).Y()
     )
   {
     const CodingUnit&     cu = *bestCS->cus.front();
-    const PredictionUnit& pu = *cu.firstPU;
 
-    if (!cu.affine && !cu.triangle)
-    {
-      MotionInfo mi = pu.getMotionInfo();
-      mi.GBiIdx = (mi.interDir == 3) ? cu.GBiIdx : GBI_DEFAULT;
-      cu.cs->addMiToLut(CU::isIBC(cu) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, mi);
-    }
+    bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16);
+    CU::saveMotionInHMVP( cu, isIbcSmallBlk );
   }
   bestCS->picture->getPredBuf(currCsArea).copyFrom(bestCS->getPredBuf(currCsArea));
   bestCS->picture->getRecoBuf( currCsArea ).copyFrom( bestCS->getRecoBuf( currCsArea ) );
   m_modeCtrl->finishCULevel( partitioner );
+  if( m_pcIntraSearch->getSaveCuCostInSCIPU() && bestCS->cus.size() == 1 )
+  {
+    m_pcIntraSearch->saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost );
+  }
 
 #if ENABLE_SPLIT_PARALLELISM
   if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 )
@@ -785,94 +976,77 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   }
 
 #endif
-  // Assert if Best prediction mode is NONE
-  // Selected mode's RD-cost must be not MAX_DOUBLE.
-  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
-  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
-  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
-}
-
-#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
-void EncCu::updateLambda (Slice* slice, const int dQP, const bool updateRdCostLambda)
-{
-#if WCG_EXT && !ENABLE_QPA_SUB_CTU
-  int    NumberBFrames = ( m_pcEncCfg->getGOPSize() - 1 );
-  int    SHIFT_QP = 12;
-  double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(slice->getPic()->fieldPic ? NumberBFrames/2 : NumberBFrames) );
-
-  int bitdepth_luma_qp_scale = 6
-                               * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
-                                  - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
-  double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP;
-
-  double dQPFactor = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_QPFactor;
-
-  if( slice->getSliceType() == I_SLICE )
+  if (bestCS->cus.size() == 1) // no partition
   {
-    if( m_pcEncCfg->getIntraQpFactor() >= 0.0 /*&& m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_sliceType != I_SLICE*/ )
+    CHECK(bestCS->cus[0]->tileIdx != bestCS->pps->getTileIdx(bestCS->area.lumaPos()), "Wrong tile index!");
+    if (bestCS->cus[0]->predMode == MODE_PLT)
     {
-      dQPFactor = m_pcEncCfg->getIntraQpFactor();
+      for (int i = compBegin; i < (compBegin + numComp); i++)
+      {
+        ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+        bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
+        memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel));
+      }
+      bestCS->reorderPrevPLT(bestCS->prevPLT, bestCS->cus[0]->curPLTSize, bestCS->cus[0]->curPLT, bestCS->cus[0]->reuseflag, compBegin, numComp, jointPLT);
     }
     else
     {
-      if( m_pcEncCfg->getLambdaFromQPEnable() )
-      {
-        dQPFactor = 0.57;
-      }
-      else
+      for (int i = compBegin; i<(compBegin + numComp); i++)
       {
-        dQPFactor = 0.57*dLambda_scale;
+        ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+        bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
+        memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
       }
     }
   }
-  else if( m_pcEncCfg->getLambdaFromQPEnable() )
+  else
   {
-    dQPFactor = 0.57*dQPFactor;
+    for (int i = compBegin; i<(compBegin + numComp); i++)
+    {
+      ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
+      bestCS->prevPLT.curPLTSize[comID] = bestLastPLTSize[comID];
+      memcpy(bestCS->prevPLT.curPLT[i], bestLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
+    }
   }
+  const CodingUnit&     cu = *bestCS->cus.front();
+  cu.cs->prevPLT = bestCS->prevPLT;
+  // Assert if Best prediction mode is NONE
+  // Selected mode's RD-cost must be not MAX_DOUBLE.
+  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
+  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
+  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
+}
 
-  double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
-  int depth = slice->getDepth();
-
-  if( !m_pcEncCfg->getLambdaFromQPEnable() && depth>0 )
-  {
-    int qp_temp_slice = slice->getSliceQp() + bitdepth_luma_qp_scale - SHIFT_QP; // avoid lambda  over adjustment,  use slice_qp here
-    dLambda *= Clip3( 2.00, 4.00, (qp_temp_slice / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 )
-  }
-  if( !m_pcEncCfg->getUseHADME() && slice->getSliceType( ) != I_SLICE )
+#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
+void EncCu::updateLambda (Slice* slice, const int dQP,
+ #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
+                          const bool useWCGChromaControl,
+ #endif
+                          const bool updateRdCostLambda)
+{
+#if WCG_EXT && ER_CHROMA_QP_WCG_PPS
+  if (useWCGChromaControl)
   {
-    dLambda *= 0.95;
-  }
+    const double lambda = m_pcSliceEncoder->initializeLambda (slice, m_pcSliceEncoder->getGopId(), slice->getSliceQp(), (double)dQP);
+    const int clippedQP = Clip3 (-slice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, dQP);
 
-  const int temporalId = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_temporalId;
-  const std::vector<double> &intraLambdaModifiers = m_pcEncCfg->getIntraLambdaModifier();
-  double lambdaModifier;
-  if( slice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty())
-  {
-    lambdaModifier = m_pcEncCfg->getLambdaModifier(temporalId);
-  }
-  else
-  {
-    lambdaModifier = intraLambdaModifiers[(temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size() - 1)];
+    m_pcSliceEncoder->setUpLambda (slice, lambda, clippedQP);
+    return;
   }
-  dLambda *= lambdaModifier;
-
-  int qpBDoffset = slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA);
-  int iQP = Clip3(-qpBDoffset, MAX_QP, (int)floor((double)dQP + 0.5));
-  m_pcSliceEncoder->setUpLambda(slice, dLambda, iQP);
-
-#else
+#endif
   int iQP = dQP;
   const double oldQP     = (double)slice->getSliceQpBase();
 #if ENABLE_QPA_SUB_CTU
   const double oldLambda = (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && slice->getPPS()->getUseDQP()) ? slice->getLambdas()[0] :
-                           m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), slice->getDepth(), oldQP, oldQP, iQP);
+                           m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), oldQP, oldQP, iQP);
 #else
-  const double oldLambda = m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), slice->getDepth(), oldQP, oldQP, iQP);
+  const double oldLambda = m_pcSliceEncoder->calculateLambda (slice, m_pcSliceEncoder->getGopId(), oldQP, oldQP, iQP);
 #endif
   const double newLambda = oldLambda * pow (2.0, ((double)dQP - oldQP) / 3.0);
 #if RDOQ_CHROMA_LAMBDA
-  const double chromaLambda = newLambda / m_pcRdCost->getChromaWeight();
-  const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda};
+  const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y),
+                                                 newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb),
+                                                 newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)};
   m_pcTrQuant->setLambdas (lambdaArray);
 #else
   m_pcTrQuant->setLambda (newLambda);
@@ -880,10 +1054,12 @@ void EncCu::updateLambda (Slice* slice, const int dQP, const bool updateRdCostLa
   if (updateRdCostLambda)
   {
     m_pcRdCost->setLambda (newLambda, slice->getSPS()->getBitDepths());
-  }
+#if WCG_EXT
+    m_pcRdCost->saveUnadjustedLambda();
 #endif
+  }
 }
-#endif
+#endif // SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
 
 #if ENABLE_SPLIT_PARALLELISM
 //#undef DEBUG_PARALLEL_TIMINGS
@@ -901,35 +1077,25 @@ void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bes
   std::fill( jobUsed, jobUsed + NUM_RESERVERD_SPLIT_JOBS, false );
 
   const UnitArea currArea = CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType );
-#if ENABLE_WPP_PARALLELISM
-  const int      wppTId   = picture->scheduler.getWppThreadId();
-#endif
   const bool doParallel   = !m_pcEncCfg->getForceSingleSplitThread();
-#if _MSC_VER && ENABLE_WPP_PARALLELISM
-#pragma omp parallel for schedule(dynamic,1) num_threads(NUM_SPLIT_THREADS_IF_MSVC) if(doParallel)
-#else
   omp_set_num_threads( m_pcEncCfg->getNumSplitThreads() );
 
 #pragma omp parallel for schedule(dynamic,1) if(doParallel)
-#endif
   for( int jId = 1; jId <= numJobs; jId++ )
   {
     // thread start
-#if ENABLE_WPP_PARALLELISM
-    picture->scheduler.setWppThreadId( wppTId );
-#endif
     picture->scheduler.setSplitThreadId();
     picture->scheduler.setSplitJobId( jId );
 
-    Partitioner* jobPartitioner = PartitionerFactory::get( *tempCS->slice );
+    QTBTPartitioner jobPartitioner;
     EncCu*       jobCuEnc       = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) );
     auto*        jobBlkCache    = dynamic_cast<CacheBlkInfoCtrl*>( jobCuEnc->m_modeCtrl );
 #if REUSE_CU_RESULTS
     auto*        jobBestCache   = dynamic_cast<BestEncInfoCache*>( jobCuEnc->m_modeCtrl );
 #endif
 
-    jobPartitioner->copyState( partitioner );
-    jobCuEnc      ->copyState( this, *jobPartitioner, currArea, true );
+    jobPartitioner.copyState( partitioner );
+    jobCuEnc      ->copyState( this, jobPartitioner, currArea, true );
 
     if( jobBlkCache  ) { jobBlkCache ->tick(); }
 #if REUSE_CU_RESULTS
@@ -941,9 +1107,7 @@ void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bes
 
     jobUsed[jId] = true;
 
-    jobCuEnc->xCompressCU( jobTemp, jobBest, *jobPartitioner );
-
-    delete jobPartitioner;
+    jobCuEnc->xCompressCU( jobTemp, jobBest, jobPartitioner );
 
     picture->scheduler.setSplitJobId( 0 );
     // thread stop
@@ -1044,41 +1208,35 @@ void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& c
   m_modeCtrl     ->copyState( *other->m_modeCtrl, partitioner.currArea() );
   m_pcRdCost     ->copyState( *other->m_pcRdCost );
   m_pcTrQuant    ->copyState( *other->m_pcTrQuant );
-  if( m_pcEncCfg->getReshaper() )
+  if( m_pcEncCfg->getLmcs() )
   {
     EncReshape *encReshapeThis  = dynamic_cast<EncReshape*>(       m_pcReshape);
     EncReshape *encReshapeOther = dynamic_cast<EncReshape*>(other->m_pcReshape);
     encReshapeThis->copyState( *encReshapeOther );
   }
-  m_shareState    = other->m_shareState;
-  m_shareBndPosX  = other->m_shareBndPosX;
-  m_shareBndPosY  = other->m_shareBndPosY;
-  m_shareBndSizeW = other->m_shareBndSizeW;
-  m_shareBndSizeH = other->m_shareBndSizeH;
-  setShareStateDec( other->getShareStateDec() );
-  m_pcInterSearch->setShareState( other->m_pcInterSearch->getShareState() );
 
   m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx();
 }
 #endif
 
-void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
+void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass )
 {
   const int qp                = encTestMode.qp;
   const Slice &slice          = *tempCS->slice;
-  const bool bIsLosslessMode  = false; // False at this level. Next level down may set it to true.
   const int oldPrevQp         = tempCS->prevQP[partitioner.chType];
   const auto oldMotionLut     = tempCS->motionLut;
 #if ENABLE_QPA_SUB_CTU
   const PPS &pps              = *tempCS->pps;
   const uint32_t currDepth    = partitioner.currDepth;
 #endif
+  const auto oldPLT           = tempCS->prevPLT;
 
   const PartSplit split = getPartSplit( encTestMode );
+  const ModeType modeTypeChild = partitioner.modeType;
 
   CHECK( split == CU_DONT_SPLIT, "No proper split provided!" );
 
-  tempCS->initStructData( qp, bIsLosslessMode );
+  tempCS->initStructData( qp );
 
   m_CABACEstimator->getCtx() = m_CurrCtx->start;
 
@@ -1086,10 +1244,11 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
   const TempCtx ctxStartQt( m_CtxCache, SubCtx( Ctx::SplitQtFlag, m_CABACEstimator->getCtx() ) );
   const TempCtx ctxStartHv( m_CtxCache, SubCtx( Ctx::SplitHvFlag, m_CABACEstimator->getCtx() ) );
   const TempCtx ctxStart12( m_CtxCache, SubCtx( Ctx::Split12Flag, m_CABACEstimator->getCtx() ) );
-
+  const TempCtx ctxStartMC( m_CtxCache, SubCtx( Ctx::ModeConsFlag, m_CABACEstimator->getCtx() ) );
   m_CABACEstimator->resetBits();
 
   m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner );
+  m_CABACEstimator->mode_constraint( split, *tempCS, partitioner, modeTypeChild );
 
   const double factor = ( tempCS->currQP[partitioner.chType] > 30 ? 1.1 : 1.075 );
   tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
@@ -1101,10 +1260,10 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
   m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitQtFlag, ctxStartQt );
   m_CABACEstimator->getCtx() = SubCtx( Ctx::SplitHvFlag, ctxStartHv );
   m_CABACEstimator->getCtx() = SubCtx( Ctx::Split12Flag, ctxStart12 );
-
+  m_CABACEstimator->getCtx() = SubCtx( Ctx::ModeConsFlag, ctxStartMC );
   if (cost > bestCS->cost + bestCS->costDbOffset
 #if ENABLE_QPA_SUB_CTU
-    || (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP() && (pps.getCuQpDeltaSubdiv() > 0) && (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) &&
+    || (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP() && (slice.getCuQpDeltaSubdiv() > 0) && (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) &&
         (currDepth == 0)) // force quad-split or no split at CTU level
 #endif
     )
@@ -1113,45 +1272,25 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
     return;
   }
 
-  int startShareThisLevel = 0;
-  const uint32_t uiLPelX = tempCS->area.Y().lumaPos().x;
-  const uint32_t uiTPelY = tempCS->area.Y().lumaPos().y;
-
-  int splitRatio = 1;
-  CHECK(!(split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT
-    || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT), "invalid split type");
-  splitRatio = (split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT) ? 1 : 2;
-
-  bool isOneChildSmall = ((tempCS->area.lwidth())*(tempCS->area.lheight()) >> splitRatio) < MRG_SHARELIST_SHARSIZE;
-
-  if ((((tempCS->area.lwidth())*(tempCS->area.lheight())) > (MRG_SHARELIST_SHARSIZE * 1)))
+  const bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false;
+  if( partitioner.treeType != TREE_D )
   {
-    m_shareState = NO_SHARE;
+    tempCS->treeType = TREE_L;
   }
-
-  if (m_shareState == NO_SHARE)//init state
+  else
   {
-    if (isOneChildSmall)
+    if( chromaNotSplit )
     {
-      m_shareState = GEN_ON_SHARED_BOUND;//share start state
-      startShareThisLevel = 1;
+      CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" );
+      tempCS->treeType = partitioner.treeType = TREE_L;
+    }
+    else
+    {
+      tempCS->treeType = partitioner.treeType = TREE_D;
     }
-  }
-  if ((m_shareState == GEN_ON_SHARED_BOUND) && (!slice.isIntra() || slice.getSPS()->getIBCFlag()))
-  {
-    tempCS->motionLut.lutShare = tempCS->motionLut.lut;
-    tempCS->motionLut.lutShareIbc = tempCS->motionLut.lutIbc;
-    m_shareBndPosX = uiLPelX;
-    m_shareBndPosY = uiTPelY;
-    m_shareBndSizeW = tempCS->area.lwidth();
-    m_shareBndSizeH = tempCS->area.lheight();
-    m_shareState = SHARING;
   }
 
 
-  m_pcInterSearch->setShareState(m_shareState);
-  setShareStateDec(m_shareState);
-
   partitioner.splitCurrArea( split, *tempCS );
   bool qgEnableChildren = partitioner.currQgEnable(); // QG possible at children level
 
@@ -1163,6 +1302,12 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
   AffineMVInfo tmpMVInfo;
   bool isAffMVInfoSaved;
   m_pcInterSearch->savePrevAffMVInfo(0, tmpMVInfo, isAffMVInfoSaved);
+  BlkUniMvInfo tmpUniMvInfo;
+  bool         isUniMvInfoSaved = false;
+  if (!tempCS->slice->isIntra())
+  {
+    m_pcInterSearch->savePrevUniMvInfo(tempCS->area.Y(), tmpUniMvInfo, isUniMvInfoSaved);
+  }
 
   do
   {
@@ -1178,15 +1323,11 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
 
       tempCS->initSubStructure( *tempSubCS, partitioner.chType, subCUArea, false );
       tempCS->initSubStructure( *bestSubCS, partitioner.chType, subCUArea, false );
-      tempSubCS->sharedBndPos.x = (m_shareState == SHARING) ? m_shareBndPosX : tempSubCS->area.Y().lumaPos().x;
-      tempSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y;
-      tempSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth();
-      tempSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight();
-      bestSubCS->sharedBndPos.x = (m_shareState == SHARING) ? m_shareBndPosX : tempSubCS->area.Y().lumaPos().x;
-      bestSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y;
-      bestSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth();
-      bestSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight();
-      xCompressCU( tempSubCS, bestSubCS, partitioner );
+      tempSubCS->bestParent = bestSubCS->bestParent = bestCS;
+      double newMaxCostAllowed = isLuma(partitioner.chType) ? std::min(encTestMode.maxCostAllowed, bestCS->cost - m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist)) : MAX_DOUBLE;
+      newMaxCostAllowed = std::max(0.0, newMaxCostAllowed);
+      xCompressCU(tempSubCS, bestSubCS, partitioner, newMaxCostAllowed);
+      tempSubCS->bestParent = bestSubCS->bestParent = nullptr;
 
       if( bestSubCS->cost == MAX_DOUBLE )
       {
@@ -1197,6 +1338,10 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
         m_CurrCtx--;
         partitioner.exitCurrSplit();
         xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
+        if( partitioner.chType == CHANNEL_TYPE_LUMA )
+        {
+          tempCS->motionLut = oldMotionLut;
+        }
         return;
       }
 
@@ -1207,35 +1352,161 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
       {
         tempCS->prevQP[partitioner.chType] = bestSubCS->prevQP[partitioner.chType];
       }
+      if( partitioner.isConsInter() )
+      {
+        for( int i = 0; i < bestSubCS->cus.size(); i++ )
+        {
+          CHECK( bestSubCS->cus[i]->predMode != MODE_INTER, "all CUs must be inter mode in an Inter coding region (SCIPU)" );
+        }
+      }
+      else if( partitioner.isConsIntra() )
+      {
+        for( int i = 0; i < bestSubCS->cus.size(); i++ )
+        {
+          CHECK( bestSubCS->cus[i]->predMode == MODE_INTER, "all CUs must not be inter mode in an Intra coding region (SCIPU)" );
+        }
+      }
 
       tempSubCS->releaseIntermediateData();
       bestSubCS->releaseIntermediateData();
+      if( !tempCS->slice->isIntra() && partitioner.isConsIntra() )
+      {
+        tempCS->cost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
+        if( tempCS->cost > bestCS->cost )
+        {
+          tempCS->cost = MAX_DOUBLE;
+          tempCS->costDbOffset = 0;
+          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
+          m_CurrCtx--;
+          partitioner.exitCurrSplit();
+          if( partitioner.chType == CHANNEL_TYPE_LUMA )
+          {
+            tempCS->motionLut = oldMotionLut;
+          }
+          return;
+        }
+      }
     }
   } while( partitioner.nextPart( *tempCS ) );
 
   partitioner.exitCurrSplit();
 
-  if (startShareThisLevel == 1)
-  {
-    m_shareState = NO_SHARE;
-    m_pcInterSearch->setShareState(m_shareState);
-    setShareStateDec(m_shareState);
-  }
 
   m_CurrCtx--;
 
+  if( chromaNotSplit )
+  {
+    //Note: In local dual tree region, the chroma CU refers to the central luma CU's QP.
+    //If the luma CU QP shall be predQP (no residual in it and before it in the QG), it must be revised to predQP before encoding the chroma CU
+    //Otherwise, the chroma CU uses predQP+deltaQP in encoding but is decoded as using predQP, thus causing encoder-decoded mismatch on chroma qp.
+    if( tempCS->pps->getUseDQP() )
+    {
+      //find parent CS that including all coded CUs in the QG before this node
+      CodingStructure* qgCS = tempCS;
+      bool deltaQpCodedBeforeThisNode = false;
+      if( partitioner.currArea().lumaPos() != partitioner.currQgPos )
+      {
+        int numParentNodeToQgCS = 0;
+        while( qgCS->area.lumaPos() != partitioner.currQgPos )
+        {
+          CHECK( qgCS->parent == nullptr, "parent of qgCS shall exsit" );
+          qgCS = qgCS->parent;
+          numParentNodeToQgCS++;
+        }
+
+        //check whether deltaQP has been coded (in luma CU or luma&chroma CU) before this node
+        CodingStructure* parentCS = tempCS->parent;
+        for( int i = 0; i < numParentNodeToQgCS; i++ )
+        {
+          //checking each parent
+          CHECK( parentCS == nullptr, "parentCS shall exsit" );
+          for( const auto &cu : parentCS->cus )
+          {
+            if( cu->rootCbf && !isChroma( cu->chType ) )
+            {
+              deltaQpCodedBeforeThisNode = true;
+              break;
+            }
+          }
+          parentCS = parentCS->parent;
+        }
+      }
+
+      //revise luma CU qp before the first luma CU with residual in the SCIPU to predQP
+      if( !deltaQpCodedBeforeThisNode )
+      {
+        //get pred QP of the QG
+        const CodingUnit* cuFirst = qgCS->getCU( CHANNEL_TYPE_LUMA );
+        CHECK( cuFirst->lumaPos() != partitioner.currQgPos, "First cu of the Qg is wrong" );
+        int predQp = CU::predictQP( *cuFirst, qgCS->prevQP[CHANNEL_TYPE_LUMA] );
+
+        //revise to predQP
+        int firstCuHasResidual = (int)tempCS->cus.size();
+        for( int i = 0; i < tempCS->cus.size(); i++ )
+        {
+          if( tempCS->cus[i]->rootCbf )
+          {
+            firstCuHasResidual = i;
+            break;
+          }
+        }
+
+        for( int i = 0; i < firstCuHasResidual; i++ )
+        {
+          tempCS->cus[i]->qp = predQp;
+        }
+      }
+    }
+    assert( tempCS->treeType == TREE_L );
+    uint32_t numCuPuTu[6];
+    tempCS->picture->cs->getNumCuPuTuOffset( numCuPuTu );
+    tempCS->picture->cs->useSubStructure( *tempCS, partitioner.chType, CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType ), false, true, false, false );
+
+    partitioner.chType = CHANNEL_TYPE_CHROMA;
+    tempCS->treeType = partitioner.treeType = TREE_C;
+
+    m_CurrCtx++;
+
+    const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() );
+    const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );
+    CodingStructure *tempCSChroma = m_pTempCS2[wIdx][hIdx];
+    CodingStructure *bestCSChroma = m_pBestCS2[wIdx][hIdx];
+    tempCS->initSubStructure( *tempCSChroma, partitioner.chType, partitioner.currArea(), false );
+    tempCS->initSubStructure( *bestCSChroma, partitioner.chType, partitioner.currArea(), false );
+    tempCS->treeType = TREE_D;
+    xCompressCU( tempCSChroma, bestCSChroma, partitioner );
+
+    //attach chromaCS to luma CS and update cost
+    bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
+    //bestCSChroma->treeType = tempCSChroma->treeType = TREE_C;
+    CHECK( bestCSChroma->treeType != TREE_C || tempCSChroma->treeType != TREE_C, "wrong treeType for chroma CS" );
+    tempCS->useSubStructure( *bestCSChroma, partitioner.chType, CS::getArea( *bestCSChroma, partitioner.currArea(), partitioner.chType ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, true );
+
+    //release tmp resource
+    tempCSChroma->releaseIntermediateData();
+    bestCSChroma->releaseIntermediateData();
+    //tempCS->picture->cs->releaseIntermediateData();
+    tempCS->picture->cs->clearCuPuTuIdxMap( partitioner.currArea(), numCuPuTu[0], numCuPuTu[1], numCuPuTu[2], numCuPuTu + 3 );
+
+    m_CurrCtx--;
+
+    //recover luma tree status
+    partitioner.chType = CHANNEL_TYPE_LUMA;
+    partitioner.treeType = TREE_D;
+    partitioner.modeType = MODE_TYPE_ALL;
+  }
+
   // Finally, generate split-signaling bits for RD-cost check
   const PartSplit implicitSplit = partitioner.getImplicitSplit( *tempCS );
 
   {
     bool enforceQT = implicitSplit == CU_QUAD_SPLIT;
-#if HM_QTBT_REPRODUCE_FAST_LCTU_BUG
 
     // LARGE CTU bug
     if( m_pcEncCfg->getUseFastLCTU() )
     {
       unsigned minDepth = 0;
-      unsigned maxDepth = g_aucLog2[tempCS->sps->getCTUSize()] - g_aucLog2[tempCS->sps->getMinQTSize(slice.getSliceType(), partitioner.chType)];
+      unsigned maxDepth = floorLog2(tempCS->sps->getCTUSize()) - floorLog2(tempCS->sps->getMinQTSize(slice.getSliceType(), partitioner.chType));
 
       if( auto ad = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) )
       {
@@ -1248,14 +1519,14 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
         enforceQT = true;
       }
     }
-#endif
 
     if( !enforceQT )
     {
       m_CABACEstimator->resetBits();
 
       m_CABACEstimator->split_cu_mode( split, *tempCS, partitioner );
-
+      partitioner.modeType = modeTypeParent;
+      m_CABACEstimator->mode_constraint( split, *tempCS, partitioner, modeTypeChild );
       tempCS->fracBits += m_CABACEstimator->getEstFracBits(); // split bits
     }
   }
@@ -1273,60 +1544,52 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
   // The exception is each slice / slice-segment must have at least one CTU.
   if (bestCS->cost != MAX_DOUBLE)
   {
-#if HEVC_TILES_WPP
-    const TileMap& tileMap = *tempCS->picture->tileMap;
-#endif
-#if HEVC_TILES_WPP || HEVC_DEPENDENT_SLICES
-    const uint32_t CtuAddr             = CU::getCtuAddr( *bestCS->getCU( partitioner.chType ) );
-#endif
-    const bool isEndOfSlice        =    slice.getSliceMode() == FIXED_NUMBER_OF_BYTES
-                                      && ((slice.getSliceBits() + CS::getEstBits(*bestCS)) > slice.getSliceArgument() << 3)
-#if HEVC_TILES_WPP
-                                      && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceCurStartCtuTsAddr())
-#endif
-#if HEVC_DEPENDENT_SLICES
-                                      && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr());
-#else
-                                      ;
-#endif
-
-#if HEVC_DEPENDENT_SLICES
-    const bool isEndOfSliceSegment =    slice.getSliceSegmentMode() == FIXED_NUMBER_OF_BYTES
-                                      && ((slice.getSliceSegmentBits() + CS::getEstBits(*bestCS)) > slice.getSliceSegmentArgument() << 3)
-                                      && CtuAddr != tileMap.getCtuTsToRsAddrMap(slice.getSliceSegmentCurStartCtuTsAddr());
-                                          // Do not need to check slice condition for slice-segment since a slice-segment is a subset of a slice.
-    if (isEndOfSlice || isEndOfSliceSegment)
-#else
-    if(isEndOfSlice)
-#endif
-    {
-      bestCS->cost = MAX_DOUBLE;
-      bestCS->costDbOffset = 0;
-    }
   }
   else
   {
     bestCS->costDbOffset = 0;
   }
   tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
+  if( tempCS->cus.size() > 0 && modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTER )
+  {
+    int areaSizeNoResiCu = 0;
+    for( int k = 0; k < tempCS->cus.size(); k++ )
+    {
+      areaSizeNoResiCu += (tempCS->cus[k]->rootCbf == false) ? tempCS->cus[k]->lumaSize().area() : 0;
+    }
+    if( areaSizeNoResiCu >= (tempCS->area.lumaSize().area() >> 1) )
+    {
+      skipInterPass = true;
+    }
+  }
 
   // RD check for sub partitioned coding structure.
   xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
 
   if (isAffMVInfoSaved)
     m_pcInterSearch->addAffMVInfo(tmpMVInfo);
+  if (!tempCS->slice->isIntra() && isUniMvInfoSaved)
+  {
+    m_pcInterSearch->addUniMvInfo(tmpUniMvInfo);
+  }
 
   tempCS->motionLut = oldMotionLut;
 
+  tempCS->prevPLT   = oldPLT;
+
   tempCS->releaseIntermediateData();
 
   tempCS->prevQP[partitioner.chType] = oldPrevQp;
 }
 
-
-void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
+bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, bool adaptiveColorTrans)
 {
-  const PPS &pps      = *tempCS->pps;
+  double          bestInterCost             = m_modeCtrl->getBestInterCost();
+  double          costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost();
+  bool            skipSecondMtsPass         = m_modeCtrl->getSkipSecondMTSPass();
+  const SPS&      sps                       = *tempCS->sps;
+  const int       maxSizeMTS                = MTS_INTRA_MAX_CU_SIZE;
+  uint8_t         considerMtsSecondPass     = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0;
 
   bool   useIntraSubPartitions   = false;
   double maxCostAllowedForChroma = MAX_DOUBLE;
@@ -1334,196 +1597,428 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC
   Distortion interHad = m_modeCtrl->getInterHad();
 
 
+  double dct2Cost                =   MAX_DOUBLE;
+  double bestNonDCT2Cost         = MAX_DOUBLE;
+  double trGrpBestCost     [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE };
+  double globalBestCost          =   MAX_DOUBLE;
+  bool   bestSelFlag       [ 4 ] = { false, false, false, false };
+  bool   trGrpCheck        [ 4 ] = { true, true, true, true };
+  int    startMTSIdx       [ 4 ] = { 0, 1, 2, 3 };
+  int    endMTSIdx         [ 4 ] = { 0, 1, 2, 3 };
+  double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 };
+  int    bestMtsFlag             =   0;
+  int    bestLfnstIdx            =   0;
+
+  const int  maxLfnstIdx         = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
+                                   || ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2;
+  bool       skipOtherLfnst      = false;
+  int        startLfnstIdx       = 0;
+  int        endLfnstIdx         = sps.getUseLFNST() ? maxLfnstIdx : 0;
+
+  int grpNumMax = sps.getUseLFNST() ? m_pcEncCfg->getMTSIntraMaxCand() : 1;
+  m_modeCtrl->setISPWasTested(false);
+  m_pcIntraSearch->invalidateBestModeCost();
+  if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
+  {
+    if ((m_pcEncCfg->getRGBFormatFlag() && adaptiveColorTrans) || (!m_pcEncCfg->getRGBFormatFlag() && !adaptiveColorTrans))
+    {
+      m_pcIntraSearch->invalidateBestRdModeFirstColorSpace();
+    }
+  }
+
+  bool foundZeroRootCbf = false;
+  if (sps.getUseColorTrans())
   {
+    CHECK(tempCS->treeType != TREE_D || partitioner.treeType != TREE_D, "localtree should not be applied when adaptive color transform is enabled");
+    CHECK(tempCS->modeType != MODE_TYPE_ALL || partitioner.modeType != MODE_TYPE_ALL, "localtree should not be applied when adaptive color transform is enabled");
+    CHECK(adaptiveColorTrans && (CS::isDualITree(*tempCS) || partitioner.chType != CHANNEL_TYPE_LUMA), "adaptive color transform cannot be applied to dual-tree");
+  }
 
-    tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+  for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ )
+  {
+    const uint8_t startMtsFlag = trGrpIdx > 0;
+    const uint8_t endMtsFlag   = sps.getUseLFNST() ? considerMtsSecondPass : 0;
 
-    CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
+    if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] )
+    {
+      for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ )
+      {
+        for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ )
+        {
+          if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
+          {
+            m_pcIntraSearch->setSavedRdModeIdx(trGrpIdx*(NUM_LFNST_NUM_PER_SET * 2) + lfnstIdx * 2 + mtsFlag);
+          }
+          if (mtsFlag > 0 && lfnstIdx > 0)
+          {
+            continue;
+          }
+          //3) if interHad is 0, only try further modes if some intra mode was already better than inter
+          if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 )
+          {
+            continue;
+          }
 
-    partitioner.setCUData( cu );
-    cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
-    cu.skip             = false;
-    cu.mmvdSkip = false;
-    cu.predMode         = MODE_INTRA;
-    cu.transQuantBypass = encTestMode.lossless;
-    cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
-    cu.qp               = encTestMode.qp;
-  //cu.ipcm             = false;
-    cu.ispMode          = NOT_INTRA_SUBPARTITIONS;
+          tempCS->initStructData( encTestMode.qp );
 
-    CU::addPUs( cu );
+          CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
 
-    tempCS->interHad    = interHad;
+          partitioner.setCUData( cu );
+          cu.slice            = tempCS->slice;
+          cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
+          cu.skip             = false;
+          cu.mmvdSkip = false;
+          cu.predMode         = MODE_INTRA;
+          cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
+          cu.qp               = encTestMode.qp;
+          cu.lfnstIdx         = lfnstIdx;
+          cu.mtsFlag          = mtsFlag;
+          cu.ispMode          = NOT_INTRA_SUBPARTITIONS;
+          cu.colorTransform = adaptiveColorTrans;
 
-    m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
+          CU::addPUs( cu );
 
-    if( isLuma( partitioner.chType ) )
-    {
-      //the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines
-      const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
-      m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar );
+          tempCS->interHad    = interHad;
 
-      useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS;
-      if( !CS::isDualITree( *tempCS ) )
-      {
-        tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
-        if( useIntraSubPartitions )
-        {
-          //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost
-          maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
-        }
-      }
+          m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
-      if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
-          && tempCS->interHad == 0)
-      {
-        interHad = 0;
-        // JEM assumes only perfect reconstructions can from now on beat the inter mode
-        m_modeCtrl->enforceInterHad( 0 );
-        return;
-      }
+          bool validCandRet = false;
+          if( isLuma( partitioner.chType ) )
+          {
+            //ISP uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary subpartitions
+            double bestCostSoFar = partitioner.isSepTree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
+            if (partitioner.isSepTree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
+            {
+              bestCostSoFar = encTestMode.maxCostAllowed;
+            }
+            validCandRet = m_pcIntraSearch->estIntraPredLumaQT(cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[trGrpIdx], endMTSIdx[trGrpIdx], (trGrpIdx > 0), !cu.colorTransform ? bestCS : nullptr);
+            if ((!validCandRet || (cu.ispMode && cu.firstTU->cbf[COMPONENT_Y] == 0)))
+            {
+              continue;
+            }
+            if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform)
+            {
+              m_modeCtrl->setISPMode(cu.ispMode);
+              m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx);
+              m_modeCtrl->setMIPFlagISPPass(cu.mipFlag);
+              m_modeCtrl->setBestISPIntraModeRelCU(cu.ispMode ? PU::getFinalIntraMode(*cu.firstPU, CHANNEL_TYPE_LUMA) : UINT8_MAX);
+              m_modeCtrl->setBestDCT2NonISPCostRelCU(m_modeCtrl->getMtsFirstPassNoIspCost());
+            }
 
-      if( !CS::isDualITree( *tempCS ) )
-      {
-        cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) );
-        cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y));
-      }
-    }
+            if (sps.getUseColorTrans() && m_pcEncCfg->getRGBFormatFlag() && !CS::isDualITree(*tempCS) && !cu.colorTransform)
+            {
+              double curLumaCost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
+              if (curLumaCost > bestCS->cost)
+              {
+                continue;
+              }
+            }
 
-    if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) )
-    {
-      TUIntraSubPartitioner subTuPartitioner( partitioner );
-      m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
-      if( useIntraSubPartitions && !cu.ispMode )
-      {
-        //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations
-        return;
-      }
-    }
+            useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS;
+            if( !partitioner.isSepTree( *tempCS ) )
+            {
+              tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
+              if( useIntraSubPartitions )
+              {
+                //the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost
+                maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
+              }
+            }
+
+            if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
+                && tempCS->interHad == 0)
+            {
+              interHad = 0;
+              // JEM assumes only perfect reconstructions can from now on beat the inter mode
+              m_modeCtrl->enforceInterHad( 0 );
+              continue;
+            }
 
-    cu.rootCbf = false;
+            if( !partitioner.isSepTree( *tempCS ) )
+            {
+              if (!cu.colorTransform)
+              {
+                cu.cs->picture->getRecoBuf(cu.Y()).copyFrom(cu.cs->getRecoBuf(COMPONENT_Y));
+                cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y));
+              }
+              else
+              {
+                cu.cs->picture->getRecoBuf(cu).copyFrom(cu.cs->getRecoBuf(cu));
+                cu.cs->picture->getPredBuf(cu).copyFrom(cu.cs->getPredBuf(cu));
+              }
+            }
+          }
 
-    for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
-    {
-      cu.rootCbf |= cu.firstTU->cbf[t] != 0;
-    }
+          if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !cu.isSepTree() ) && !cu.colorTransform )
+          {
+            TUIntraSubPartitioner subTuPartitioner( partitioner );
+            m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( cu.isSepTree() && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
+            if( useIntraSubPartitions && !cu.ispMode )
+            {
+              //At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations
+              continue;
+            }
+          }
 
-    // Get total bits for current mode: encode CU
-    m_CABACEstimator->resetBits();
+          cu.rootCbf = false;
 
-    if( pps.getTransquantBypassEnabledFlag() )
-    {
-      m_CABACEstimator->cu_transquant_bypass_flag( cu );
-    }
+          for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
+          {
+            cu.rootCbf |= cu.firstTU->cbf[t] != 0;
+          }
 
-    if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
-      && cu.Y().valid()
-      )
-    {
-      m_CABACEstimator->cu_skip_flag ( cu );
-    }
-    m_CABACEstimator->pred_mode      ( cu );
-    m_CABACEstimator->pcm_data       ( cu, partitioner );
-    m_CABACEstimator->extend_ref_line( cu );
-    m_CABACEstimator->isp_mode       ( cu );
-    m_CABACEstimator->cu_pred_data   ( cu );
+          if (!cu.rootCbf)
+          {
+            cu.colorTransform = false;
+            foundZeroRootCbf = true;
+          }
 
-    // Encode Coefficients
-    CUCtx cuCtx;
-    cuCtx.isDQPCoded = true;
-    cuCtx.isChromaQpAdjCoded = true;
-    m_CABACEstimator->cu_residual( cu, partitioner, cuCtx );
+          // Get total bits for current mode: encode CU
+          m_CABACEstimator->resetBits();
 
-    tempCS->fracBits = m_CABACEstimator->getEstFracBits();
-    tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
+          if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
+            && cu.Y().valid()
+            )
+          {
+            m_CABACEstimator->cu_skip_flag ( cu );
+          }
+          m_CABACEstimator->pred_mode      ( cu );
+          m_CABACEstimator->adaptive_color_transform(cu);
+          m_CABACEstimator->cu_pred_data   ( cu );
+          m_CABACEstimator->bdpcm_mode     ( cu, ComponentID(partitioner.chType) );
+          if (!CS::isDualITree(*cu.cs) && isLuma(partitioner.chType))
+              m_CABACEstimator->bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA));
 
-    const double tmpCostWithoutSplitFlags = tempCS->cost;
-    xEncodeDontSplit( *tempCS, partitioner );
+          // Encode Coefficients
+          CUCtx cuCtx;
+          cuCtx.isDQPCoded = true;
+          cuCtx.isChromaQpAdjCoded = true;
+          m_CABACEstimator->cu_residual( cu, partitioner, cuCtx );
 
-    xCheckDQP( *tempCS, partitioner );
+          tempCS->fracBits = m_CABACEstimator->getEstFracBits();
+          tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
 
-    if( tempCS->cost < bestCS->cost )
-    {
-      m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
-    }
 
-    xCalDebCost( *tempCS, partitioner );
-    tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
+          const double tmpCostWithoutSplitFlags = tempCS->cost;
+          xEncodeDontSplit( *tempCS, partitioner );
+
+          xCheckDQP( *tempCS, partitioner );
+
+          // Check if low frequency non-separable transform (LFNST) is too expensive
+          if( lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode )
+          {
+            bool cbfAtZeroDepth = cu.isSepTree() ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
+            if( cbfAtZeroDepth )
+            {
+              tempCS->cost = MAX_DOUBLE;
+            }
+          }
+
+          if( mtsFlag == 0 && lfnstIdx == 0 )
+          {
+            dct2Cost = tempCS->cost;
+          }
+          else if (tmpCostWithoutSplitFlags < bestNonDCT2Cost)
+          {
+            bestNonDCT2Cost = tmpCostWithoutSplitFlags;
+          }
+
+          if( tempCS->cost < bestCS->cost )
+          {
+            m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
+          }
+
+          if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost;
+
+          if( sps.getUseLFNST() && !tempCS->cus.empty() )
+          {
+            skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner );
+          }
+
+          xCalDebCost( *tempCS, partitioner );
+          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
 
 
 #if WCG_EXT
-    DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
+          DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
 #else
-    DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
+          DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
 #endif
-    xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
+          if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
+          {
+            int colorSpaceIdx = ((m_pcEncCfg->getRGBFormatFlag() && adaptiveColorTrans) || (!m_pcEncCfg->getRGBFormatFlag() && !adaptiveColorTrans)) ? 0 : 1;
+            if (tempCS->cost < tempCS->tmpColorSpaceIntraCost[colorSpaceIdx])
+            {
+              tempCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost;
+              bestCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost;
+            }
+          }
+          if( !sps.getUseLFNST() )
+          {
+            xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
+          }
+          else
+          {
+            if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) )
+            {
+              trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost;
+              bestSelFlag  [ trGrpIdx ] = true;
+              bestMtsFlag               = mtsFlag;
+              bestLfnstIdx              = lfnstIdx;
+              if( bestCS->cus.size() == 1 )
+              {
+                CodingUnit &cu = *bestCS->cus.front();
+                if (cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP)
+                {
+                  if( ( floorLog2( cu.firstTU->blocks[ COMPONENT_Y ].width ) + floorLog2( cu.firstTU->blocks[ COMPONENT_Y ].height ) ) >= 6 )
+                  {
+                    endLfnstIdx = 0;
+                  }
+                }
+              }
+            }
 
-  } //for emtCuFlag
-}
+            //we decide to skip the non-DCT-II transforms and LFNST according to the ISP results
+            if ((endMtsFlag > 0 || endLfnstIdx > 0) && (cu.ispMode || (bestCS && bestCS->cus[0]->ispMode)) && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP())
+            {
+              double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost();
+              double bestIspCost       = m_modeCtrl->getIspCost();
+              CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" );
+              double threshold = 1.4;
 
-void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
-{
-  tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+              double lfnstThreshold = 1.01 * threshold;
+              if( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*lfnstThreshold )
+              {
+                endLfnstIdx = lfnstIdx;
+              }
 
-  CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
+              if ( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*threshold )
+              {
+                skipSecondMtsPass = true;
+                m_modeCtrl->setSkipSecondMTSPass( true );
+                break;
+              }
+            }
+            //now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not
+            if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA )
+            {
+              const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode
+              if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost )
+              {
+                skipSecondMtsPass = true;
+                m_modeCtrl->setSkipSecondMTSPass( true );
+                break;
+              }
+            }
+          }
 
-  partitioner.setCUData( cu );
-  cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-  cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
-  cu.skip             = false;
+        } //for emtCuFlag
+        if( skipOtherLfnst )
+        {
+          startLfnstIdx = lfnstIdx;
+          endLfnstIdx   = lfnstIdx;
+          break;
+        }
+      } //for lfnstIdx
+    } //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx])
+
+    if( sps.getUseLFNST() && trGrpIdx < 3 )
+    {
+      trGrpCheck[ trGrpIdx + 1 ] = false;
+
+      if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass )
+      {
+        double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ];
+        trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ];
+      }
+    }
+  } //trGrpIdx
+  if(!adaptiveColorTrans)
+  m_modeCtrl->setBestNonDCT2Cost(bestNonDCT2Cost);
+  return foundZeroRootCbf;
+}
+
+
+void EncCu::xCheckPLT(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
+{
+  tempCS->initStructData(encTestMode.qp);
+  CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
+  partitioner.setCUData(cu);
+  cu.slice = tempCS->slice;
+  cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
+  cu.skip = false;
   cu.mmvdSkip = false;
-  cu.predMode         = MODE_INTRA;
-  cu.transQuantBypass = encTestMode.lossless;
-  cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
-  cu.qp               = encTestMode.qp;
-  cu.ipcm             = true;
+  cu.predMode = MODE_PLT;
 
-  tempCS->addPU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
+  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
+  cu.qp = encTestMode.qp;
+  cu.bdpcmMode = 0;
 
-  tempCS->addTU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
+  tempCS->addPU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
+  tempCS->addTU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
+  // Search
+  tempCS->dist = 0;
+  if (cu.isSepTree())
+  {
+    if (isLuma(partitioner.chType))
+    {
+      m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 1);
+    }
+    if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA))
+    {
+      m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Cb, 2);
+    }
+  }
+  else
+  {
+    m_pcIntraSearch->PLTSearch(*tempCS, partitioner, COMPONENT_Y, 3);
+  }
 
-  m_pcIntraSearch->IPCMSearch(*tempCS, partitioner);
 
   m_CABACEstimator->getCtx() = m_CurrCtx->start;
-
   m_CABACEstimator->resetBits();
-
-  if( tempCS->pps->getTransquantBypassEnabledFlag() )
+  if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
+    && cu.Y().valid())
   {
-    m_CABACEstimator->cu_transquant_bypass_flag( cu );
+    m_CABACEstimator->cu_skip_flag(cu);
   }
+  m_CABACEstimator->pred_mode(cu);
 
-  if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
-    && cu.Y().valid()
-    )
+  // signaling
+  CUCtx cuCtx;
+  cuCtx.isDQPCoded = true;
+  cuCtx.isChromaQpAdjCoded = true;
+  if (cu.isSepTree())
   {
-    m_CABACEstimator->cu_skip_flag ( cu );
+    if (isLuma(partitioner.chType))
+    {
+      m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 1, cuCtx);
+    }
+    if (tempCS->area.chromaFormat != CHROMA_400 && (partitioner.chType == CHANNEL_TYPE_CHROMA))
+    {
+      m_CABACEstimator->cu_palette_info(cu, COMPONENT_Cb, 2, cuCtx);
+    }
+  }
+  else
+  {
+    m_CABACEstimator->cu_palette_info(cu, COMPONENT_Y, 3, cuCtx);
   }
-  m_CABACEstimator->pred_mode      ( cu );
-  m_CABACEstimator->pcm_data       ( cu, partitioner );
-
-
   tempCS->fracBits = m_CABACEstimator->getEstFracBits();
-  tempCS->cost     = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
-
-  xEncodeDontSplit( *tempCS, partitioner );
+  tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);
 
-  xCheckDQP( *tempCS, partitioner );
-
-  xCalDebCost( *tempCS, partitioner );
+  xEncodeDontSplit(*tempCS, partitioner);
+  xCheckDQP(*tempCS, partitioner);
+  xCalDebCost(*tempCS, partitioner);
   tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
 
+  const Area currCuArea = cu.block(getFirstComponentOfChannel(partitioner.chType));
+  cu.slice->m_mapPltCost[currCuArea.pos()][currCuArea.size()] = tempCS->cost;
 #if WCG_EXT
-  DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
+  DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda(true));
 #else
-  DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
+  DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
 #endif
-  xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
+  xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
 }
 
 void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx )
@@ -1536,7 +2031,7 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep
     return;
   }
 
-  if (CS::isDualITree(cs) && isChroma(partitioner.chType))
+  if (partitioner.isSepTree(cs) && isChroma(partitioner.chType))
   {
     return;
   }
@@ -1554,7 +2049,8 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep
   bool hasResidual = false;
   for( const auto &cu : cs.cus )
   {
-    if( cu->rootCbf )
+    //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU
+    if( cu->rootCbf && !isChroma( cu->chType ))
     {
       hasResidual = true;
       break;
@@ -1580,7 +2076,8 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep
     // NOTE: reset QPs for CUs without residuals up to first coded CU
     for( const auto &cu : cs.cus )
     {
-      if( cu->rootCbf )
+      //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU
+      if( cu->rootCbf && !isChroma( cu->chType ))
       {
         break;
       }
@@ -1612,8 +2109,7 @@ void EncCu::xFillPCMBuffer( CodingUnit &cu )
 
       const CPelBuf source      = tu.cs->getOrgBuf( compArea );
              PelBuf destination = tu.getPcmbuf( compID );
-
-      if (tu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
+      if (tu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
       {
         CompArea    tmpArea(COMPONENT_Y, compArea.chromaFormat, Position(0, 0), compArea.size());
         PelBuf tempOrgBuf = m_tmpStorageLCU->getBuf(tmpArea);
@@ -1630,17 +2126,17 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b
 {
   bool isPerfectMatch = false;
 
-  tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+  tempCS->initStructData(encTestMode.qp);
   m_pcInterSearch->resetBufferedUniMotions();
   m_pcInterSearch->setAffineModeSelected(false);
   CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);
 
   partitioner.setCUData(cu);
   cu.slice = tempCS->slice;
+  cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
   cu.skip = false;
   cu.predMode = MODE_INTER;
-  cu.transQuantBypass = encTestMode.lossless;
-  cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
   cu.qp = encTestMode.qp;
   CU::addPUs(cu);
   cu.mmvdSkip = false;
@@ -1648,13 +2144,13 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b
 
   if (m_pcInterSearch->predInterHashSearch(cu, partitioner, isPerfectMatch))
   {
-    double equGBiCost = MAX_DOUBLE;
+    double equBcwCost = MAX_DOUBLE;
 
     m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
     xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
       , 0
-      , &equGBiCost
+      , &equBcwCost
     );
 
     if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
@@ -1662,9 +2158,9 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b
       xCalDebCost( *bestCS, partitioner );
     }
   }
-  tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
-
-  if (cu.lwidth() != 64)
+  tempCS->initStructData(encTestMode.qp);
+  int minSize = min(cu.lwidth(), cu.lheight());
+  if (minSize < 64)
   {
     isPerfectMatch = false;
   }
@@ -1677,7 +2173,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
   CHECK( slice.getSliceType() == I_SLICE, "Merge modes not available for I-slices" );
 
-  tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+  tempCS->initStructData( encTestMode.qp );
 
   MergeCtx mergeCtx;
   const SPS &sps = *tempCS->sps;
@@ -1697,19 +2193,16 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     cu.cs       = tempCS;
     cu.predMode = MODE_INTER;
     cu.slice    = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx  = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos());
-#endif
+    cu.tileIdx  = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
 
     PredictionUnit pu( tempCS->area );
     pu.cu = &cu;
     pu.cs = tempCS;
-    pu.shareParentPos = tempCS->sharedBndPos;
-    pu.shareParentSize = tempCS->sharedBndSize;
     PU::getInterMergeCandidates(pu, mergeCtx
       , 0
     );
     PU::getInterMMVDMergeCandidates(pu, mergeCtx);
+    pu.regularMergeFlag = true;
   }
   bool candHasNoResidual[MRG_MAX_NUM_CANDS + MMVD_ADD_NUM];
   for (uint32_t ui = 0; ui < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM; ui++)
@@ -1720,18 +2213,38 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
   bool                                        bestIsSkip = false;
   bool                                        bestIsMMVDSkip = true;
   PelUnitBuf                                  acMergeBuffer[MRG_MAX_NUM_CANDS];
+  PelUnitBuf                                  acMergeTmpBuffer[MRG_MAX_NUM_CANDS];
   PelUnitBuf                                  acMergeRealBuffer[MMVD_MRG_MAX_RD_BUF_NUM];
   PelUnitBuf *                                acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM];
   PelUnitBuf *                                singleMergeTempBuffer;
   int                                         insertPos;
   unsigned                                    uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM;
 
-  static_vector<unsigned, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  RdModeList;
+  struct ModeInfo
+  {
+    uint32_t mergeCand;
+    bool     isRegularMerge;
+    bool     isMMVD;
+    bool     isCIIP;
+    ModeInfo() : mergeCand(0), isRegularMerge(false), isMMVD(false), isCIIP(false) {}
+    ModeInfo(const uint32_t mergeCand, const bool isRegularMerge, const bool isMMVD, const bool isCIIP) :
+      mergeCand(mergeCand), isRegularMerge(isRegularMerge), isMMVD(isMMVD), isCIIP(isCIIP) {}
+  };
+
+  static_vector<ModeInfo, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  RdModeList;
   bool                                        mrgTempBufSet = false;
+  const int candNum = MRG_MAX_NUM_CANDS + (tempCS->sps->getUseMMVD() ? MMVD_ADD_NUM : 0);
 
-  for (unsigned i = 0; i < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM; i++)
+  for (int i = 0; i < candNum; i++)
   {
-    RdModeList.push_back(i);
+    if (i < mergeCtx.numValidMergeCand)
+    {
+      RdModeList.push_back(ModeInfo(i, true, false, false));
+    }
+    else
+    {
+      RdModeList.push_back(ModeInfo(std::min(MMVD_ADD_NUM, i - mergeCtx.numValidMergeCand), false, true, false));
+    }
   }
 
   const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
@@ -1748,9 +2261,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     }
   }
 
-  static_vector<unsigned, MRG_MAX_NUM_CANDS + MMVD_ADD_NUM>  RdModeList2; // store the Intra mode for Intrainter
-  RdModeList2.clear();
-  bool isIntrainterEnabled = sps.getUseMHIntra();
+  bool isIntrainterEnabled = sps.getUseCiip();
   if (bestCS->area.lwidth() * bestCS->area.lheight() < 64 || bestCS->area.lwidth() >= MAX_CU_SIZE || bestCS->area.lheight() >= MAX_CU_SIZE)
   {
     isIntrainterEnabled = false;
@@ -1793,31 +2304,27 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     {
       RdModeList.clear();
       mrgTempBufSet       = true;
-      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless );
+      const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
 
       CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
-      const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda(cu.transQuantBypass) / double(1 << SCALE_BITS);
-
+      const double sqrtLambdaForFirstPassIntra = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE;
       partitioner.setCUData( cu );
       cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-      cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
       cu.skip             = false;
       cu.mmvdSkip = false;
       cu.triangle         = false;
     //cu.affine
       cu.predMode         = MODE_INTER;
     //cu.LICFlag
-      cu.transQuantBypass = encTestMode.lossless;
-      cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+      cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
       cu.qp               = encTestMode.qp;
     //cu.emtFlag  is set below
 
       PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
 
       DistParam distParam;
-      const bool bUseHadamard= !encTestMode.lossless;
+      const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
       m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
 
       const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
@@ -1828,7 +2335,8 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         PU::spanMotionInfo( pu, mergeCtx );
         pu.mvRefine = true;
         distParam.cur = singleMergeTempBuffer->Y();
-        m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);
+        acMergeTmpBuffer[uiMergeCand] = m_acMergeTmpBuffer[uiMergeCand].getBuf(localUnitArea);
+        m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, true, &(acMergeTmpBuffer[uiMergeCand]));
         acMergeBuffer[uiMergeCand] = m_acRealMergeBuffer[uiMergeCand].getBuf(localUnitArea);
         acMergeBuffer[uiMergeCand].copyFrom(*singleMergeTempBuffer);
         pu.mvRefine = false;
@@ -1855,17 +2363,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         }
 
         Distortion uiSad = distParam.distFunc(distParam);
-        uint32_t uiBitsCand = uiMergeCand + 1;
-        if( uiMergeCand == tempCS->slice->getMaxNumMergeCand() - 1 )
-        {
-          uiBitsCand--;
-        }
-#if !JVET_MMVD_OFF_MACRO
-        uiBitsCand++; // for mmvd_flag
-#endif
-        double cost     = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
+        m_CABACEstimator->getCtx() = ctxStart;
+        uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
+        double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
         insertPos = -1;
-        updateDoubleCandList(uiMergeCand, cost, RdModeList, candCostList, RdModeList2, (uint32_t)NUM_LUMA_MODE, uiNumMrgSATDCand, &insertPos);
+        updateCandList(ModeInfo(uiMergeCand, true, false, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
         if (insertPos != -1)
         {
           if (insertPos == RdModeList.size() - 1)
@@ -1886,161 +2388,107 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
       if (isIntrainterEnabled)
       {
-        int numTestIntraMode = 4;
         // prepare for Intra bits calculation
-        const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
-        const TempCtx ctxStartIntraMode(m_CtxCache, SubCtx(Ctx::MHIntraPredMode, m_CABACEstimator->getCtx()));
-
-        // for Intrainter fast, recored the best intra mode during the first round for mrege 0
-        int bestMHIntraMode = -1;
-        double bestMHIntraCost = MAX_DOUBLE;
-
-        pu.mhIntraFlag = true;
+        pu.ciipFlag = true;
 
         // save the to-be-tested merge candidates
-        uint32_t MHIntraMergeCand[NUM_MRG_SATD_CAND];
+        uint32_t CiipMergeCand[NUM_MRG_SATD_CAND];
         for (uint32_t mergeCnt = 0; mergeCnt < std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand); mergeCnt++)
         {
-          MHIntraMergeCand[mergeCnt] = RdModeList[mergeCnt];
+          CiipMergeCand[mergeCnt] = RdModeList[mergeCnt].mergeCand;
         }
         for (uint32_t mergeCnt = 0; mergeCnt < std::min(std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand), 4); mergeCnt++)
         {
-          uint32_t mergeCand = MHIntraMergeCand[mergeCnt];
-          acMergeBuffer[mergeCand] = m_acRealMergeBuffer[mergeCand].getBuf(localUnitArea);
+          uint32_t mergeCand = CiipMergeCand[mergeCnt];
+          acMergeTmpBuffer[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea);
 
           // estimate merge bits
-          uint32_t bitsCand = mergeCand + 1;
-          if (mergeCand == pu.cs->slice->getMaxNumMergeCand() - 1)
-          {
-            bitsCand--;
-          }
+          mergeCtx.setMergeInfo(pu, mergeCand);
 
           // first round
-          for (uint32_t intraCnt = 0; intraCnt < numTestIntraMode; intraCnt++)
+          pu.intraDir[0] = PLANAR_IDX;
+          uint32_t intraCnt = 0;
+          // generate intrainter Y prediction
+          if (mergeCnt == 0)
           {
-            pu.intraDir[0] = (intraCnt < 2) ? intraCnt : ((intraCnt == 2) ? HOR_IDX : VER_IDX);
-
-            // fast 2
-            if (mergeCnt > 0 && bestMHIntraMode != pu.intraDir[0])
-            {
-              continue;
-            }
-            int narrowCase = PU::getNarrowShape(pu.lwidth(), pu.lheight());
-            if (narrowCase == 1 && pu.intraDir[0] == HOR_IDX)
-            {
-              continue;
-            }
-            if (narrowCase == 2 && pu.intraDir[0] == VER_IDX)
-            {
-              continue;
-            }
-            // generate intrainter Y prediction
-            if (mergeCnt == 0)
-            {
-              bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu);
-              m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y(), isUseFilter);
-              m_pcIntraSearch->predIntraAng(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, isUseFilter);
-              m_pcIntraSearch->switchBuffer(pu, COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt));
-            }
-            pu.cs->getPredBuf(pu).copyFrom(acMergeBuffer[mergeCand]);
-            if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
-            {
-              pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
-            }
-            m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt));
+            m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y());
+            m_pcIntraSearch->predIntraAng(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu);
+            m_pcIntraSearch->switchBuffer(pu, COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt));
+          }
+          pu.cs->getPredBuf(pu).copyFrom(acMergeTmpBuffer[mergeCand]);
+          if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
+          {
+            pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
+          }
+          m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt));
 
-            // calculate cost
-            if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
-            {
-               pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getInvLUT());
-            }
-            distParam.cur = pu.cs->getPredBuf(pu).Y();
-            Distortion sadValue = distParam.distFunc(distParam);
-            if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
-            {
-              pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
-            }
-            m_CABACEstimator->getCtx() = SubCtx(Ctx::MHIntraPredMode, ctxStartIntraMode);
-            uint64_t fracModeBits = m_pcIntraSearch->xFracModeBitsIntra(pu, pu.intraDir[0], CHANNEL_TYPE_LUMA);
-            double cost = (double)sadValue + (double)(bitsCand + 1) * sqrtLambdaForFirstPass + (double)fracModeBits * sqrtLambdaForFirstPassIntra;
-            insertPos = -1;
-            updateDoubleCandList(mergeCand + MRG_MAX_NUM_CANDS + MMVD_ADD_NUM, cost, RdModeList, candCostList, RdModeList2, pu.intraDir[0], uiNumMrgSATDCand, &insertPos);
-            if (insertPos != -1)
-            {
-              for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
-              {
-                swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
-              }
-              swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
-            }
-            // fast 2
-            if (mergeCnt == 0 && cost < bestMHIntraCost)
+          // calculate cost
+          if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
+          {
+            pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getInvLUT());
+          }
+          distParam.cur = pu.cs->getPredBuf(pu).Y();
+          Distortion sadValue = distParam.distFunc(distParam);
+          if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
+          {
+            pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
+          }
+          m_CABACEstimator->getCtx() = ctxStart;
+          pu.regularMergeFlag = false;
+          uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
+          double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra;
+          insertPos = -1;
+          updateCandList(ModeInfo(mergeCand, false, false, true), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
+          if (insertPos != -1)
+          {
+            for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
             {
-              bestMHIntraMode = pu.intraDir[0];
-              bestMHIntraCost = cost;
+              swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
             }
+            swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
           }
         }
-        pu.mhIntraFlag = false;
-        m_CABACEstimator->getCtx() = ctxStart;
+        pu.ciipFlag = false;
       }
-#if !JVET_MMVD_OFF_MACRO
-      cu.mmvdSkip = true;
-      int tempNum = 0;
-      tempNum = MMVD_ADD_NUM;
-      for (uint32_t mergeCand = mergeCtx.numValidMergeCand; mergeCand < mergeCtx.numValidMergeCand + tempNum; mergeCand++)
+      if ( pu.cs->sps->getUseMMVD() )
       {
-        const int mmvdMergeCand = mergeCand - mergeCtx.numValidMergeCand;
-        int bitsBaseIdx = 0;
-        int bitsRefineStep = 0;
-        int bitsDirection = 2;
-        int bitsCand = 0;
-        int baseIdx;
-        int refineStep;
-        baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM;
-        refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4;
-        bitsBaseIdx = baseIdx + 1;
-        if (baseIdx == MMVD_BASE_MV_NUM - 1)
-        {
-          bitsBaseIdx--;
-        }
-
-        bitsRefineStep = refineStep + 1;
-        if (refineStep == MMVD_REFINE_STEP - 1)
-        {
-          bitsRefineStep--;
-        }
-
-        bitsCand = bitsBaseIdx + bitsRefineStep + bitsDirection;
-        bitsCand++; // for mmvd_flag
-
-        mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
-
-        PU::spanMotionInfo(pu, mergeCtx);
-        pu.mvRefine = true;
-        distParam.cur = singleMergeTempBuffer->Y();
-        pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1);
-        CHECK(!pu.mmvdMergeFlag, "MMVD merge should be set");
-        // Don't do chroma MC here
-        m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false);
-        pu.mmvdEncOptMode = 0;
-        pu.mvRefine = false;
-        Distortion uiSad = distParam.distFunc(distParam);
-
-
-        double cost = (double)uiSad + (double)bitsCand * sqrtLambdaForFirstPass;
-        insertPos = -1;
-        updateDoubleCandList(mergeCand, cost, RdModeList, candCostList, RdModeList2, (uint32_t)NUM_LUMA_MODE, uiNumMrgSATDCand, &insertPos);
-        if (insertPos != -1)
+        cu.mmvdSkip = true;
+        pu.regularMergeFlag = true;
+        const int tempNum = (mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1;
+        for (int mmvdMergeCand = 0; mmvdMergeCand < tempNum; mmvdMergeCand++)
         {
-          for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
+          int baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM;
+          int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4;
+          if (refineStep >= m_pcEncCfg->getMmvdDisNum())
+            continue;
+          mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
+
+          PU::spanMotionInfo(pu, mergeCtx);
+          pu.mvRefine = true;
+          distParam.cur = singleMergeTempBuffer->Y();
+          pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1);
+          CHECK(!pu.mmvdMergeFlag, "MMVD merge should be set");
+          // Don't do chroma MC here
+          m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer, REF_PIC_LIST_X, true, false);
+          pu.mmvdEncOptMode = 0;
+          pu.mvRefine = false;
+          Distortion uiSad = distParam.distFunc(distParam);
+
+          m_CABACEstimator->getCtx() = ctxStart;
+          uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
+          double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
+          insertPos = -1;
+          updateCandList(ModeInfo(mmvdMergeCand, false, true, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
+          if (insertPos != -1)
           {
-            swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
+            for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
+            {
+              swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
+            }
+            swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
           }
-          swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
         }
       }
-#endif
       // Try to limit number of candidates using SATD-costs
       for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
       {
@@ -2055,34 +2503,36 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
       if (isIntrainterEnabled)
       {
-        pu.mhIntraFlag = true;
+        pu.ciipFlag = true;
         for (uint32_t mergeCnt = 0; mergeCnt < uiNumMrgSATDCand; mergeCnt++)
         {
-          if (RdModeList[mergeCnt] >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM))
+          if (RdModeList[mergeCnt].isCIIP)
           {
-            pu.intraDir[0] = RdModeList2[mergeCnt];
+            pu.intraDir[0] = PLANAR_IDX;
             pu.intraDir[1] = DM_CHROMA_IDX;
-            uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0];
-            bool isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cb, pu, true, pu);
-            m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb(), isUseFilter);
-            m_pcIntraSearch->predIntraAng(COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), pu, isUseFilter);
+            if (pu.chromaSize().width == 2)
+              continue;
+            uint32_t bufIdx = 0;
+            m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
+            m_pcIntraSearch->predIntraAng(COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), pu);
             m_pcIntraSearch->switchBuffer(pu, COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx));
-            isUseFilter = IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Cr, pu, true, pu);
-            m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr(), isUseFilter);
-            m_pcIntraSearch->predIntraAng(COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), pu, isUseFilter);
+
+            m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cr());
+            m_pcIntraSearch->predIntraAng(COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), pu);
             m_pcIntraSearch->switchBuffer(pu, COMPONENT_Cr, pu.cs->getPredBuf(pu).Cr(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx));
           }
         }
-        pu.mhIntraFlag = false;
+        pu.ciipFlag = false;
       }
 
-      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+      tempCS->initStructData( encTestMode.qp );
+      m_CABACEstimator->getCtx() = ctxStart;
     }
     else
     {
       if (bestIsMMVDSkip)
       {
-        uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM;
+        uiNumMrgSATDCand = mergeCtx.numValidMergeCand + ((mergeCtx.numValidMergeCand > 1) ? MMVD_ADD_NUM : MMVD_ADD_NUM >> 1);
       }
       else
       {
@@ -2092,26 +2542,16 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
   }
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
   uint32_t iteration;
-  uint32_t iterationBegin = m_modeCtrl->getIsHashPerfectMatch() ? 1 : 0;
-  if (encTestMode.lossless)
-  {
-    iteration = 1;
-    iterationBegin = 0;
-  }
-  else
-  {
-    iteration = 2;
-  }
+  uint32_t iterationBegin = 0;
+  iteration = 2;
   for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass)
   {
     for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
     {
-      uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];
+      uint32_t uiMergeCand = RdModeList[uiMrgHADIdx].mergeCand;
 
-
-      if (uiNoResidualPass != 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)) // intrainter does not support skip mode
+      if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP) // intrainter does not support skip mode
       {
-        uiMergeCand -= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM); // for skip, map back to normal merge candidate idx and try RDO
         if (isTestSkipMerge[uiMergeCand])
         {
           continue;
@@ -2129,39 +2569,37 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
       partitioner.setCUData( cu );
       cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-      cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
       cu.skip             = false;
       cu.mmvdSkip = false;
       cu.triangle         = false;
     //cu.affine
       cu.predMode         = MODE_INTER;
     //cu.LICFlag
-      cu.transQuantBypass = encTestMode.lossless;
-      cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+      cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
       cu.qp               = encTestMode.qp;
       PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
 
-      if (uiNoResidualPass == 0 && uiMergeCand >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM))
+      if (uiNoResidualPass == 0 && RdModeList[uiMrgHADIdx].isCIIP)
       {
-        uiMergeCand -= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM);
         cu.mmvdSkip = false;
         mergeCtx.setMergeInfo(pu, uiMergeCand);
-        pu.mhIntraFlag = true;
-        pu.intraDir[0] = RdModeList2[uiMrgHADIdx];
+        pu.ciipFlag = true;
+        pu.regularMergeFlag = false;
+        pu.intraDir[0] = PLANAR_IDX;
         CHECK(pu.intraDir[0]<0 || pu.intraDir[0]>(NUM_LUMA_MODE - 1), "out of intra mode");
         pu.intraDir[1] = DM_CHROMA_IDX;
       }
-
-      else if (uiMergeCand >= mergeCtx.numValidMergeCand && uiMergeCand < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM)
+      else if (RdModeList[uiMrgHADIdx].isMMVD)
       {
         cu.mmvdSkip = true;
-        mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand - mergeCtx.numValidMergeCand);
+        pu.regularMergeFlag = true;
+        mergeCtx.setMmvdMergeCandiInfo(pu, uiMergeCand);
       }
       else
       {
         cu.mmvdSkip = false;
+        pu.regularMergeFlag = true;
         mergeCtx.setMergeInfo(pu, uiMergeCand);
       }
       PU::spanMotionInfo( pu, mergeCtx );
@@ -2172,7 +2610,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         if( ( isDMVR && MCTSHelper::isRefBlockAtRestrictedTileBoundary( pu ) ) || ( !isDMVR && !( MCTSHelper::checkMvBufferForMCTSConstraint( pu ) ) ) )
         {
           // Do not use this mode
-          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+          tempCS->initStructData( encTestMode.qp );
           continue;
         }
       }
@@ -2194,31 +2632,41 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
             }
           }
         }
-        if (pu.mhIntraFlag)
+        if (pu.ciipFlag)
         {
-          uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0];
+          uint32_t bufIdx = 0;
           PelBuf tmpBuf = tempCS->getPredBuf(pu).Y();
-          tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Y());
-          if (pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+          tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Y());
+          if (pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
           {
             tmpBuf.rspSignal(m_pcReshape->getFwdLUT());
           }
           m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, bufIdx));
+          if (pu.chromaSize().width > 2)
+          {
           tmpBuf = tempCS->getPredBuf(pu).Cb();
-          tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Cb());
+          tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
           m_pcIntraSearch->geneWeightedPred(COMPONENT_Cb, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx));
           tmpBuf = tempCS->getPredBuf(pu).Cr();
-          tmpBuf.copyFrom(acMergeBuffer[uiMergeCand].Cr());
+          tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
           m_pcIntraSearch->geneWeightedPred(COMPONENT_Cr, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx));
+          }
+          else
+          {
+            tmpBuf = tempCS->getPredBuf(pu).Cb();
+            tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
+            tmpBuf = tempCS->getPredBuf(pu).Cr();
+            tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
+          }
         }
         else
         {
-          if (uiMergeCand >= mergeCtx.numValidMergeCand && uiMergeCand < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM) {
+          if (RdModeList[uiMrgHADIdx].isMMVD)
+          {
             pu.mmvdEncOptMode = 0;
             m_pcInterSearch->motionCompensation(pu);
           }
-          else
-          if (uiNoResidualPass != 0 && uiMergeCand < mergeCtx.numValidMergeCand && RdModeList[uiMrgHADIdx] >= (MRG_MAX_NUM_CANDS + MMVD_ADD_NUM))
+          else if (uiNoResidualPass != 0 && RdModeList[uiMrgHADIdx].isCIIP)
           {
             tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand]);
           }
@@ -2234,7 +2682,7 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         m_pcInterSearch->motionCompensation( pu );
         pu.mvRefine = false;
       }
-      if (!cu.mmvdSkip && !pu.mhIntraFlag && uiNoResidualPass != 0)
+      if (!cu.mmvdSkip && !pu.ciipFlag && uiNoResidualPass != 0)
       {
         CHECK(uiMergeCand >= mergeCtx.numValidMergeCand, "out of normal merge");
         isTestSkipMerge[uiMergeCand] = true;
@@ -2242,11 +2690,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
       xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );
 
-      if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.mhIntraFlag)
+      if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.ciipFlag)
       {
         bestIsSkip = !bestCS->cus.empty() && bestCS->getCU( partitioner.chType )->rootCbf == 0;
       }
-      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+      tempCS->initStructData( encTestMode.qp );
     }// end loop uiMrgHADIdx
 
     if( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
@@ -2291,9 +2739,12 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
   const Slice &slice = *tempCS->slice;
   const SPS &sps = *tempCS->sps;
 
+  if (slice.getPicHeader()->getMaxNumTriangleCand() < 2)
+    return;
+
   CHECK( slice.getSliceType() != B_SLICE, "Triangle mode is only applied to B-slices" );
 
-  tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+  tempCS->initStructData( encTestMode.qp );
 
   bool trianglecandHasNoResidual[TRIANGLE_MAX_NUM_CANDS];
   for( int mergeCand = 0; mergeCand < TRIANGLE_MAX_NUM_CANDS; mergeCand++ )
@@ -2301,31 +2752,23 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
     trianglecandHasNoResidual[mergeCand] = false;
   }
 
-  bool bestIsSkip;
-  CodingUnit* cuTemp = bestCS->getCU(partitioner.chType);
-  if (cuTemp)
-    bestIsSkip = m_pcEncCfg->getUseFastDecisionForMerge() ? bestCS->getCU(partitioner.chType)->rootCbf == 0 : false;
-  else
-    bestIsSkip = false;
+  bool bestIsSkip = false;
   uint8_t                                         numTriangleCandidate   = TRIANGLE_MAX_NUM_CANDS;
   uint8_t                                         triangleNumMrgSATDCand = TRIANGLE_MAX_NUM_SATD_CANDS;
   PelUnitBuf                                      triangleBuffer[TRIANGLE_MAX_NUM_UNI_CANDS];
   PelUnitBuf                                      triangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS];
   static_vector<uint8_t, TRIANGLE_MAX_NUM_CANDS> triangleRdModeList;
   static_vector<double,  TRIANGLE_MAX_NUM_CANDS> tianglecandCostList;
+  uint8_t                                         numTriangleCandComb = slice.getPicHeader()->getMaxNumTriangleCand() * (slice.getPicHeader()->getMaxNumTriangleCand() - 1) * 2;
 
-  if( auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >( m_modeCtrl ) )
-  {
-    bestIsSkip |= blkCache->isSkip( tempCS->area );
-  }
 
   DistParam distParam;
-  const bool useHadamard = !encTestMode.lossless;
+  const bool useHadamard = !tempCS->slice->getDisableSATDForRD();
   m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, useHadamard );
 
   const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
 
-  const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(encTestMode.lossless);
+  const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );
 
   MergeCtx triangleMrgCtx;
   {
@@ -2333,20 +2776,19 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
     cu.cs       = tempCS;
     cu.predMode = MODE_INTER;
     cu.slice    = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+    cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
     cu.triangle = true;
     cu.mmvdSkip = false;
-    cu.GBiIdx   = GBI_DEFAULT;
+    cu.BcwIdx   = BCW_DEFAULT;
 
     PredictionUnit pu( tempCS->area );
     pu.cu = &cu;
     pu.cs = tempCS;
-
+    pu.regularMergeFlag = false;
 
     PU::getTriangleMergeCandidates( pu, triangleMrgCtx );
-    for( uint8_t mergeCand = 0; mergeCand < TRIANGLE_MAX_NUM_UNI_CANDS; mergeCand++ )
+    const uint8_t maxNumTriangleCand = pu.cs->picHeader->getMaxNumTriangleCand();
+    for (uint8_t mergeCand = 0; mergeCand < maxNumTriangleCand; mergeCand++)
     {
       triangleBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea);
       triangleMrgCtx.setMergeInfo( pu, mergeCand );
@@ -2355,43 +2797,31 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
       if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( pu ) ) ) )
       {
         // Do not use this mode
-        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        tempCS->initStructData( encTestMode.qp );
         return;
       }
       m_pcInterSearch->motionCompensation( pu, triangleBuffer[mergeCand] );
     }
   }
 
-  bool tempBufSet = bestIsSkip ? false : true;
-  triangleNumMrgSATDCand = bestIsSkip ? TRIANGLE_MAX_NUM_CANDS : TRIANGLE_MAX_NUM_SATD_CANDS;
-  if( bestIsSkip )
-  {
-    for( uint8_t i = 0; i < TRIANGLE_MAX_NUM_CANDS; i++ )
-    {
-      triangleRdModeList.push_back(i);
-    }
-  }
-  else
+  triangleNumMrgSATDCand = min(triangleNumMrgSATDCand, numTriangleCandComb);
   {
     CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
 
     partitioner.setCUData( cu );
     cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+    cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
     cu.skip             = false;
     cu.predMode         = MODE_INTER;
-    cu.transQuantBypass = encTestMode.lossless;
-    cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+    cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
     cu.qp               = encTestMode.qp;
     cu.triangle         = true;
     cu.mmvdSkip         = false;
-    cu.GBiIdx           = GBI_DEFAULT;
+    cu.BcwIdx           = BCW_DEFAULT;
 
     PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
 
-    if( abs(g_aucLog2[cu.lwidth()] - g_aucLog2[cu.lheight()]) >= 2 )
+    if( abs(floorLog2(cu.lwidth()) - floorLog2(cu.lheight())) >= 2 )
     {
       numTriangleCandidate = 30;
     }
@@ -2400,6 +2830,8 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
       numTriangleCandidate = TRIANGLE_MAX_NUM_CANDS;
     }
 
+    numTriangleCandidate = min(numTriangleCandidate, numTriangleCandComb);
+
     for( uint8_t mergeCand = 0; mergeCand < numTriangleCandidate; mergeCand++ )
     {
       bool    splitDir = m_triangleModeTest[mergeCand].m_splitDir;
@@ -2410,6 +2842,7 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
       pu.triangleMergeIdx0 = candIdx0;
       pu.triangleMergeIdx1 = candIdx1;
       pu.mergeFlag = true;
+      pu.regularMergeFlag = false;
       triangleWeightedBuffer[mergeCand] = m_acTriangleWeightedBuffer[mergeCand].getBuf( localUnitArea );
       triangleBuffer[candIdx0] = m_acMergeBuffer[candIdx0].getBuf( localUnitArea );
       triangleBuffer[candIdx1] = m_acMergeBuffer[candIdx1].getBuf( localUnitArea );
@@ -2423,9 +2856,7 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
 
       double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
 
-      static_vector<int, TRIANGLE_MAX_NUM_CANDS> * nullList = nullptr;
       updateCandList( mergeCand, cost, triangleRdModeList, tianglecandCostList
-        , *nullList, -1
         , triangleNumMrgSATDCand );
     }
 
@@ -2451,25 +2882,20 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
       pu.triangleMergeIdx0 = candIdx0;
       pu.triangleMergeIdx1 = candIdx1;
       pu.mergeFlag = true;
-
+      pu.regularMergeFlag = false;
       m_pcInterSearch->weightedTriangleBlk( pu, splitDir, CHANNEL_TYPE_CHROMA, triangleWeightedBuffer[mergeCand], triangleBuffer[candIdx0], triangleBuffer[candIdx1] );
     }
 
-    tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+    tempCS->initStructData( encTestMode.qp );
   }
+
+  triangleNumMrgSATDCand = min(triangleNumMrgSATDCand, (uint8_t)triangleRdModeList.size());
+
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
   {
     uint8_t iteration;
-    uint8_t iterationBegin = m_modeCtrl->getIsHashPerfectMatch() ? 1 : 0;
-    if (encTestMode.lossless)
-    {
-      iteration = 1;
-      iterationBegin = 0;
-    }
-    else
-    {
-      iteration = 2;
-    }
+    uint8_t iterationBegin = 0;
+    iteration = 2;
     for (uint8_t noResidualPass = iterationBegin; noResidualPass < iteration; ++noResidualPass)
     {
       for( uint8_t mrgHADIdx = 0; mrgHADIdx < triangleNumMrgSATDCand; mrgHADIdx++ )
@@ -2490,51 +2916,37 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
 
         partitioner.setCUData(cu);
         cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-        cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+        cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
         cu.skip = false;
         cu.predMode = MODE_INTER;
-        cu.transQuantBypass = encTestMode.lossless;
-        cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+        cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
         cu.qp = encTestMode.qp;
         cu.triangle = true;
         cu.mmvdSkip = false;
-        cu.GBiIdx   = GBI_DEFAULT;
+        cu.BcwIdx   = BCW_DEFAULT;
         PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType);
 
         pu.triangleSplitDir = splitDir;
         pu.triangleMergeIdx0 = candIdx0;
         pu.triangleMergeIdx1 = candIdx1;
         pu.mergeFlag = true;
-
+        pu.regularMergeFlag = false;
         PU::spanTriangleMotionInfo(pu, triangleMrgCtx, splitDir, candIdx0, candIdx1 );
 
         if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) )
         {
           // Do not use this mode
-          tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+          tempCS->initStructData( encTestMode.qp );
           return;
         }
-        if( tempBufSet )
-        {
-          tempCS->getPredBuf().copyFrom( triangleWeightedBuffer[mergeCand] );
-        }
-        else
-        {
-          triangleBuffer[candIdx0] = m_acMergeBuffer[candIdx0].getBuf( localUnitArea );
-          triangleBuffer[candIdx1] = m_acMergeBuffer[candIdx1].getBuf( localUnitArea );
-          PelUnitBuf predBuf         = tempCS->getPredBuf();
-          m_pcInterSearch->weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, triangleBuffer[candIdx0], triangleBuffer[candIdx1] );
-        }
-
+        tempCS->getPredBuf().copyFrom( triangleWeightedBuffer[mergeCand] );
         xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) );
 
         if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
         {
           bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
         }
-        tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+        tempCS->initStructData(encTestMode.qp);
       }// end loop mrgHADIdx
     }
   }
@@ -2560,7 +2972,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
 
   CHECK( slice.getSliceType() == I_SLICE, "Affine Merge modes not available for I-slices" );
 
-  tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+  tempCS->initStructData( encTestMode.qp );
 
   AffineMergeCtx affineMergeCtx;
   const SPS &sps = *tempCS->sps;
@@ -2579,15 +2991,13 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
     cu.cs = tempCS;
     cu.predMode = MODE_INTER;
     cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+    cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
     cu.mmvdSkip = false;
 
     PredictionUnit pu( tempCS->area );
     pu.cu = &cu;
     pu.cs = tempCS;
-
+    pu.regularMergeFlag = false;
     PU::getAffineMergeCand( pu, affineMergeCtx );
 
     if ( affineMergeCtx.numValidMergeCand <= 0 )
@@ -2630,26 +3040,23 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
     {
       RdModeList.clear();
       mrgTempBufSet = true;
-      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( encTestMode.lossless );
+      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );
 
       CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );
 
       partitioner.setCUData( cu );
       cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-      cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
       cu.skip = false;
       cu.affine = true;
       cu.predMode = MODE_INTER;
-      cu.transQuantBypass = encTestMode.lossless;
-      cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
       cu.qp = encTestMode.qp;
 
       PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
 
       DistParam distParam;
-      const bool bUseHadamard = !encTestMode.lossless;
+      const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
       m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, bUseHadamard );
 
       const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height ) );
@@ -2661,9 +3068,10 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
         // set merge information
         pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
         pu.mergeFlag = true;
+        pu.regularMergeFlag = false;
         pu.mergeIdx = uiMergeCand;
         cu.affineType = affineMergeCtx.affineType[uiMergeCand];
-        cu.GBiIdx = affineMergeCtx.GBiIdx[uiMergeCand];
+        cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
 
         pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
         if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
@@ -2682,18 +3090,16 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
 
         distParam.cur = acMergeBuffer[uiMergeCand].Y();
 
-        m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand] );
+        m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false );
 
         Distortion uiSad = distParam.distFunc( distParam );
         uint32_t   uiBitsCand = uiMergeCand + 1;
-        if ( uiMergeCand == tempCS->slice->getMaxNumAffineMergeCand() - 1 )
+        if ( uiMergeCand == tempCS->picHeader->getMaxNumAffineMergeCand() - 1 )
         {
           uiBitsCand--;
         }
         double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
-        static_vector<int, AFFINE_MRG_MAX_NUM_CANDS> emptyList;
         updateCandList( uiMergeCand, cost, RdModeList, candCostList
-          , emptyList, -1
           , uiNumMrgSATDCand );
 
         CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" );
@@ -2709,7 +3115,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
         }
       }
 
-      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+      tempCS->initStructData( encTestMode.qp );
     }
     else
     {
@@ -2718,16 +3124,8 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
   }
 
   uint32_t iteration;
-  uint32_t iterationBegin = m_modeCtrl->getIsHashPerfectMatch() ? 1 : 0;
-  if (encTestMode.lossless)
-  {
-    iteration = 1;
-    iterationBegin = 0;
-  }
-  else
-  {
-    iteration = 2;
-  }
+  uint32_t iterationBegin = 0;
+  iteration = 2;
   for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass)
   {
     for ( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
@@ -2745,14 +3143,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
 
       partitioner.setCUData( cu );
       cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-      cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+      cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
       cu.skip = false;
       cu.affine = true;
       cu.predMode = MODE_INTER;
-      cu.transQuantBypass = encTestMode.lossless;
-      cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
       cu.qp = encTestMode.qp;
       PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
 
@@ -2761,7 +3156,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
       pu.mergeIdx = uiMergeCand;
       pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
       cu.affineType = affineMergeCtx.affineType[uiMergeCand];
-      cu.GBiIdx = affineMergeCtx.GBiIdx[uiMergeCand];
+      cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
 
       pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
       if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
@@ -2781,12 +3176,13 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
       if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) )
       {
         // Do not use this mode
-        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        tempCS->initStructData( encTestMode.qp );
         return;
       }
       if ( mrgTempBufSet )
       {
-        tempCS->getPredBuf().copyFrom( acMergeBuffer[uiMergeCand] );
+        tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand], true, false);   // Copy Luma Only
+        m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true);
       }
       else
       {
@@ -2799,7 +3195,7 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
       {
         bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
       }
-      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+      tempCS->initStructData( encTestMode.qp );
     }// end loop uiMrgHADIdx
 
     if ( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
@@ -2842,15 +3238,14 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
 // ibc merge/skip mode check
 void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
 {
-  assert(tempCS->chType != CHANNEL_TYPE_CHROMA); // chroma IBC is derived
-
-  if (tempCS->area.lwidth() > IBC_MAX_CAND_SIZE || tempCS->area.lheight() > IBC_MAX_CAND_SIZE) // currently only check 32x32 and below block for ibc merge/skip
+  assert(partitioner.chType != CHANNEL_TYPE_CHROMA); // chroma IBC is derived
+  if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64
   {
     return;
   }
   const SPS &sps = *tempCS->sps;
 
-  tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+  tempCS->initStructData(encTestMode.qp);
   MergeCtx mergeCtx;
 
 
@@ -2866,17 +3261,14 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
     cu.cs = tempCS;
     cu.predMode = MODE_IBC;
     cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos());
-#endif
+    cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
     PredictionUnit pu(tempCS->area);
     pu.cu = &cu;
     pu.cs = tempCS;
     cu.mmvdSkip = false;
     pu.mmvdMergeFlag = false;
+    pu.regularMergeFlag = false;
     cu.triangle = false;
-    pu.shareParentPos = tempCS->sharedBndPos;
-    pu.shareParentSize = tempCS->sharedBndSize;
     PU::getIBCMergeCandidates(pu, mergeCtx);
   }
 
@@ -2898,30 +3290,28 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
     static_vector<double, MRG_MAX_NUM_CANDS>  candCostList(MRG_MAX_NUM_CANDS, MAX_DOUBLE);
     // 1. Pass: get SATD-cost for selected candidates and reduce their count
     {
-      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(encTestMode.lossless);
+      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );
 
       CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType);
 
       partitioner.setCUData(cu);
       cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-      cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos());
-#endif
+      cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
       cu.skip = false;
       cu.predMode = MODE_IBC;
-      cu.transQuantBypass = encTestMode.lossless;
-      cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
       cu.qp = encTestMode.qp;
       cu.mmvdSkip = false;
       cu.triangle = false;
       DistParam distParam;
-      const bool bUseHadamard = !encTestMode.lossless;
+      const bool bUseHadamard = !cu.slice->getDisableSATDForRD();
       PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType); //tempCS->addPU(cu);
       pu.mmvdMergeFlag = false;
+      pu.regularMergeFlag = false;
       Picture* refPic = pu.cu->slice->getPic();
       const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
       const Pel*        piRefSrch = refBuf.buf;
-      if (tempCS->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+      if (tempCS->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() )
       {
         const CompArea &area = cu.blocks[COMPONENT_Y];
         CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
@@ -2942,13 +3332,13 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
         const int cuPelY = pu.Y().y;
         int roiWidth = pu.lwidth();
         int roiHeight = pu.lheight();
-        const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
-        const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
+        const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
+        const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
         const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
         int xPred = pu.bv.getHor();
         int yPred = pu.bv.getVer();
 
-        if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, xPred, yPred, lcuWidth)) // not valid bv derived
+        if (!m_pcInterSearch->searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived
         {
           numValidBv--;
           continue;
@@ -2959,15 +3349,13 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
 
         Distortion sad = distParam.distFunc(distParam);
         unsigned int bitsCand = mergeCand + 1;
-        if (mergeCand == tempCS->slice->getMaxNumMergeCand() - 1)
+        if (mergeCand == tempCS->picHeader->getMaxNumMergeCand() - 1)
         {
           bitsCand--;
         }
         double cost = (double)sad + (double)bitsCand * sqrtLambdaForFirstPass;
-        static_vector<int, MRG_MAX_NUM_CANDS> * nullList = nullptr;
 
         updateCandList(mergeCand, cost, RdModeList, candCostList
-          , *nullList, -1
          , numMrgSATDCand);
       }
 
@@ -2990,16 +3378,16 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
         tempCS->fracBits = 0;
         tempCS->cost = MAX_DOUBLE;
         tempCS->costDbOffset = 0;
-        tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+        tempCS->initStructData(encTestMode.qp);
         return;
       }
 
-      tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+      tempCS->initStructData(encTestMode.qp);
     }
   //}
 
 
-  const unsigned int iteration = encTestMode.lossless ? 1 : 2;
+  const unsigned int iteration = 2;
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
   // 2. Pass: check candidates using full RD test
   for (unsigned int numResidualPass = 0; numResidualPass < iteration; numResidualPass++)
@@ -3018,13 +3406,10 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
 
             partitioner.setCUData(cu);
             cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-            cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos());
-#endif
+            cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
             cu.skip = false;
             cu.predMode = MODE_IBC;
-            cu.transQuantBypass = encTestMode.lossless;
-            cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+            cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
             cu.qp = encTestMode.qp;
             cu.sbtInfo = 0;
 
@@ -3033,18 +3418,24 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
             pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block
             cu.mmvdSkip = false;
             pu.mmvdMergeFlag = false;
+            pu.regularMergeFlag = false;
             cu.triangle = false;
             mergeCtx.setMergeInfo(pu, mergeCand);
             PU::spanMotionInfo(pu, mergeCtx);
 
             assert(mergeCtx.mrgTypeNeighbours[mergeCand] == MRG_TYPE_IBC); //  should be IBC candidate at this round
-            const bool chroma = !(CS::isDualITree(*tempCS));
+            const bool chroma = !pu.cu->isSepTree();
 
             //  MC
             m_pcInterSearch->motionCompensation(pu,REF_PIC_LIST_0, true, chroma);
             m_CABACEstimator->getCtx() = m_CurrCtx->start;
 
             m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0), true, chroma);
+            if (tempCS->slice->getSPS()->getUseColorTrans())
+            {
+              bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
+              bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
+            }
             xEncodeDontSplit(*tempCS, partitioner);
 
 #if ENABLE_QPA_SUB_CTU
@@ -3061,7 +3452,7 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
             DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
             xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
 
-            tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+            tempCS->initStructData(encTestMode.qp);
           }
 
             if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
@@ -3083,12 +3474,12 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
 
 void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
 {
-  if (tempCS->area.lwidth() > IBC_MAX_CAND_SIZE || tempCS->area.lheight() > IBC_MAX_CAND_SIZE) // currently only check 32x32 and below block for ibc merge/skip
+  if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64
   {
     return;
   }
 
-    tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+    tempCS->initStructData(encTestMode.qp);
 
     m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
@@ -3096,13 +3487,10 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best
 
     partitioner.setCUData(cu);
     cu.slice = tempCS->slice;
-#if HEVC_TILES_WPP
-    cu.tileIdx = tempCS->picture->tileMap->getTileIdxMap(tempCS->area.lumaPos());
-#endif
+    cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
     cu.skip = false;
     cu.predMode = MODE_IBC;
-    cu.transQuantBypass = encTestMode.lossless;
-    cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+    cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
     cu.qp = encTestMode.qp;
     cu.imv = 0;
     cu.sbtInfo = 0;
@@ -3114,27 +3502,30 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best
     PredictionUnit& pu = *cu.firstPU;
     cu.mmvdSkip = false;
     pu.mmvdMergeFlag = false;
+    pu.regularMergeFlag = false;
 
     pu.intraDir[0] = DC_IDX; // set intra pred for ibc block
     pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block
 
     pu.interDir = 1; // use list 0 for IBC mode
     pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; // last idx in the list
-
-    if (partitioner.chType == CHANNEL_TYPE_LUMA)
-    {
       bool bValid = m_pcInterSearch->predIBCSearch(cu, partitioner, m_ctuIbcSearchRangeX, m_ctuIbcSearchRangeY, m_ibcHashMap);
 
       if (bValid)
       {
         PU::spanMotionInfo(pu);
-        const bool chroma = !(CS::isDualITree(*tempCS));
+        const bool chroma = !pu.cu->isSepTree();
         //  MC
         m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, true, chroma);
 
         {
 
           m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, true, chroma);
+          if (tempCS->slice->getSPS()->getUseColorTrans())
+          {
+            bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
+            bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
+          }
 
           xEncodeDontSplit(*tempCS, partitioner);
 
@@ -3167,116 +3558,49 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best
         tempCS->cost = MAX_DOUBLE;
         tempCS->costDbOffset = 0;
       }
-    }
- // chroma CU ibc comp
-    else
-    {
-      bool success = true;
-      // chroma tree, reuse luma bv at minimal block level
-      // enabled search only when each chroma sub-block has a BV from its luma sub-block
-      assert(tempCS->getIbcLumaCoverage(pu.Cb()) == IBC_LUMA_COVERAGE_FULL);
-      // check if each BV for the chroma sub-block is valid
-      //static const UInt unitArea = MIN_PU_SIZE * MIN_PU_SIZE;
-      const CompArea lumaArea = CompArea(COMPONENT_Y, pu.chromaFormat, pu.Cb().lumaPos(), recalcSize(pu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, pu.Cb().size()));
-      PredictionUnit subPu;
-      subPu.cs = pu.cs;
-      subPu.cu = pu.cu;
-      const ComponentID compID = COMPONENT_Cb; // use Cb to represent both Cb and CR, as their structures are the same
-      int shiftHor = ::getComponentScaleX(compID, pu.chromaFormat);
-      int shiftVer = ::getComponentScaleY(compID, pu.chromaFormat);
-      //const ChromaFormat  chFmt = pu.chromaFormat;
-
-      for (int y = lumaArea.y; y < lumaArea.y + lumaArea.height; y += MIN_PU_SIZE)
-      {
-        for (int x = lumaArea.x; x < lumaArea.x + lumaArea.width; x += MIN_PU_SIZE)
-        {
-          const MotionInfo &curMi = pu.cs->picture->cs->getMotionInfo(Position{ x, y });
-
-          subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, MIN_PU_SIZE, MIN_PU_SIZE)));
-          Position offsetRef = subPu.blocks[compID].pos().offset((curMi.bv.getHor() >> shiftHor), (curMi.bv.getVer() >> shiftVer));
-          Position refEndPos(offsetRef.x + subPu.blocks[compID].size().width - 1, offsetRef.y + subPu.blocks[compID].size().height - 1 );
-
-          if (!subPu.cs->isDecomp(refEndPos, toChannelType(compID)) || !subPu.cs->isDecomp(offsetRef, toChannelType(compID))) // ref block is not yet available for this chroma sub-block
-          {
-            success = false;
-            break;
-          }
-        }
-        if (!success)
-          break;
-      }
-      ////////////////////////////////////////////////////////////////////////////
-
-      if (success)
-      {
-        //pu.mergeType = MRG_TYPE_IBC;
-        m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, false, true); // luma=0, chroma=1
-        m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, false, true);
-
-        xEncodeDontSplit(*tempCS, partitioner);
-
-        xCheckDQP(*tempCS, partitioner);
-        tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
-        if ( m_bestModeUpdated )
-        {
-          xCalDebCost( *tempCS, partitioner );
-        }
-
-        DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
-
-        xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
-      }
-      else
-      {
-        tempCS->dist = 0;
-        tempCS->fracBits = 0;
-        tempCS->cost = MAX_DOUBLE;
-        tempCS->costDbOffset = 0;
-      }
-    }
-  }
+}
   // check ibc mode in encoder RD
   //////////////////////////////////////////////////////////////////////////////////////////////
 
 void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
 {
-  tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+  tempCS->initStructData( encTestMode.qp );
 
 
   m_pcInterSearch->setAffineModeSelected(false);
 
   if( tempCS->slice->getCheckLDC() )
   {
-    m_bestGbiCost[0] = m_bestGbiCost[1] = std::numeric_limits<double>::max();
-    m_bestGbiIdx[0] = m_bestGbiIdx[1] = -1;
+    m_bestBcwCost[0] = m_bestBcwCost[1] = std::numeric_limits<double>::max();
+    m_bestBcwIdx[0] = m_bestBcwIdx[1] = -1;
   }
 
   m_pcInterSearch->resetBufferedUniMotions();
-  int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1);
-  gbiLoopNum = (tempCS->sps->getUseGBi() ? gbiLoopNum : 1);
+  int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1);
+  bcwLoopNum = (tempCS->sps->getUseBcw() ? bcwLoopNum : 1);
 
-  if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT )
+  if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT )
   {
-    gbiLoopNum = 1;
+    bcwLoopNum = 1;
   }
 
   double curBestCost = bestCS->cost;
-  double equGBiCost = MAX_DOUBLE;
+  double equBcwCost = MAX_DOUBLE;
 
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
-  for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
+  for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
   {
-    if( m_pcEncCfg->getUseGBiFast() )
+    if( m_pcEncCfg->getUseBcwFast() )
     {
       auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);
 
       if( blkCache )
       {
         bool isBestInter = blkCache->getInter(bestCS->area);
-        uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area);
+        uint8_t bestBcwIdx = blkCache->getBcwIdx(bestCS->area);
 
-        if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx )
+        if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx )
         {
           continue;
         }
@@ -3284,7 +3608,7 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
     }
     if( !tempCS->slice->getCheckLDC() )
     {
-      if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 )
+      if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 )
       {
         continue;
       }
@@ -3294,36 +3618,33 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
 
   partitioner.setCUData( cu );
   cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-  cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+  cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
   cu.skip             = false;
   cu.mmvdSkip = false;
 //cu.affine
   cu.predMode         = MODE_INTER;
-  cu.transQuantBypass = encTestMode.lossless;
-  cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+  cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
   cu.qp               = encTestMode.qp;
   CU::addPUs( cu );
 
-  cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx];
-  uint8_t gbiIdx = cu.GBiIdx;
-  bool  testGbi = (gbiIdx != GBI_DEFAULT);
+  cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
+  uint8_t bcwIdx = cu.BcwIdx;
+  bool  testBcw = (bcwIdx != BCW_DEFAULT);
 
   m_pcInterSearch->predInterSearch( cu, partitioner );
 
-  gbiIdx = CU::getValidGbiIdx(cu);
-  if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni.
+  bcwIdx = CU::getValidBcwIdx(cu);
+  if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni.
   {
-    tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+    tempCS->initStructData(encTestMode.qp);
     continue;
   }
-  CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )");
+  CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");
 
   bool isEqualUni = false;
-  if( m_pcEncCfg->getUseGBiFast() )
+  if( m_pcEncCfg->getUseBcwFast() )
   {
-    if( cu.firstPU->interDir != 3 && testGbi == 0 )
+    if( cu.firstPU->interDir != 3 && testBcw == 0 )
     {
       isEqualUni = true;
     }
@@ -3331,33 +3652,33 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
 
   xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
                         , 0
-                        , &equGBiCost
+                        , &equBcwCost
   );
 
-  if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT )
+  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT )
     m_pcInterSearch->setAffineModeSelected((bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag)));
 
-  tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+  tempCS->initStructData(encTestMode.qp);
 
   double skipTH = MAX_DOUBLE;
-  skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE);
-  if( equGBiCost > curBestCost * skipTH )
+  skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE);
+  if( equBcwCost > curBestCost * skipTH )
   {
     break;
   }
 
-  if( m_pcEncCfg->getUseGBiFast() )
+  if( m_pcEncCfg->getUseBcwFast() )
   {
     if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 )
     {
       break;
     }
   }
-  if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() )
+  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() )
   {
     break;
   }
- }  // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
+ }  // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
   if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
   {
     xCalDebCost( *bestCS, partitioner );
@@ -3367,13 +3688,13 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
 
 
 
-
-bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
+bool EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, double &bestIntPelCost)
 {
   int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT );
   m_pcInterSearch->setAffineModeSelected(false);
-  // Only int-Pel, 4-Pel and fast 4-Pel allowed
-  CHECK( iIMV != 1 && iIMV != 2 && iIMV != 3, "Unsupported IMV Mode" );
+  // Only Half-Pel, int-Pel, 4-Pel and fast 4-Pel allowed
+  CHECK(iIMV < 1 || iIMV > 4, "Unsupported IMV Mode");
+  const bool testAltHpelFilter = iIMV == 4;
   // Fast 4-Pel Mode
 
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
@@ -3381,33 +3702,33 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
   EncTestMode encTestModeBase = encTestMode;                                        // copy for clearing non-IMV options
   encTestModeBase.opts        = EncTestModeOpts( encTestModeBase.opts & ETO_IMV );  // clear non-IMV options (is that intended?)
 
-  tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+  tempCS->initStructData( encTestMode.qp );
 
   m_pcInterSearch->resetBufferedUniMotions();
-  int gbiLoopNum = (tempCS->slice->isInterB() ? GBI_NUM : 1);
-  gbiLoopNum = (tempCS->slice->getSPS()->getUseGBi() ? gbiLoopNum : 1);
+  int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1);
+  bcwLoopNum = (tempCS->slice->getSPS()->getUseBcw() ? bcwLoopNum : 1);
 
-  if( tempCS->area.lwidth() * tempCS->area.lheight() < GBI_SIZE_CONSTRAINT )
+  if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT )
   {
-    gbiLoopNum = 1;
+    bcwLoopNum = 1;
   }
 
   bool validMode = false;
   double curBestCost = bestCS->cost;
-  double equGBiCost = MAX_DOUBLE;
+  double equBcwCost = MAX_DOUBLE;
 
-  for( int gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
+  for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
   {
-    if( m_pcEncCfg->getUseGBiFast() )
+    if( m_pcEncCfg->getUseBcwFast() )
     {
       auto blkCache = dynamic_cast< CacheBlkInfoCtrl* >(m_modeCtrl);
 
       if( blkCache )
       {
         bool isBestInter = blkCache->getInter(bestCS->area);
-        uint8_t bestGBiIdx = blkCache->getGbiIdx(bestCS->area);
+        uint8_t bestBcwIdx = blkCache->getBcwIdx(bestCS->area);
 
-        if( isBestInter && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT && g_GbiSearchOrder[gbiLoopIdx] != bestGBiIdx )
+        if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx )
         {
           continue;
         }
@@ -3416,15 +3737,15 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
 
     if( !tempCS->slice->getCheckLDC() )
     {
-      if( gbiLoopIdx != 0 && gbiLoopIdx != 3 && gbiLoopIdx != 4 )
+      if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 )
       {
         continue;
       }
     }
 
-    if( m_pcEncCfg->getUseGBiFast() && tempCS->slice->getCheckLDC() && g_GbiSearchOrder[gbiLoopIdx] != GBI_DEFAULT
-      && (m_bestGbiIdx[0] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[0])
-      && (m_bestGbiIdx[1] >= 0 && g_GbiSearchOrder[gbiLoopIdx] != m_bestGbiIdx[1]))
+    if( m_pcEncCfg->getUseBcwFast() && tempCS->slice->getCheckLDC() && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT
+      && (m_bestBcwIdx[0] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[0])
+      && (m_bestBcwIdx[1] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[1]))
     {
       continue;
     }
@@ -3433,28 +3754,32 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
 
   partitioner.setCUData( cu );
   cu.slice            = tempCS->slice;
-#if HEVC_TILES_WPP
-  cu.tileIdx          = tempCS->picture->tileMap->getTileIdxMap( tempCS->area.lumaPos() );
-#endif
+  cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
   cu.skip             = false;
   cu.mmvdSkip = false;
 //cu.affine
   cu.predMode         = MODE_INTER;
-  cu.transQuantBypass = encTestMode.lossless;
-  cu.chromaQpAdj      = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
+  cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
   cu.qp               = encTestMode.qp;
 
   CU::addPUs( cu );
 
-  cu.imv      = iIMV > 1 ? 2 : 1;
+  if (testAltHpelFilter)
+  {
+    cu.imv = IMV_HPEL;
+  }
+  else
+  {
+    cu.imv = iIMV == 1 ? IMV_FPEL : IMV_4PEL;
+  }
 
-  bool testGbi;
-  uint8_t gbiIdx;
-  bool affineAmvrEanbledFlag = cu.slice->getSPS()->getAffineAmvrEnabledFlag();
+  bool testBcw;
+  uint8_t bcwIdx;
+  bool affineAmvrEanbledFlag = !testAltHpelFilter && cu.slice->getSPS()->getAffineAmvrEnabledFlag();
 
-  cu.GBiIdx = g_GbiSearchOrder[gbiLoopIdx];
-  gbiIdx = cu.GBiIdx;
-  testGbi = (gbiIdx != GBI_DEFAULT);
+  cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
+  bcwIdx = cu.BcwIdx;
+  testBcw = (bcwIdx != BCW_DEFAULT);
 
   cu.firstPU->interDir = 10;
 
@@ -3462,7 +3787,7 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
 
   if ( cu.firstPU->interDir <= 3 )
   {
-    gbiIdx = CU::getValidGbiIdx(cu);
+    bcwIdx = CU::getValidBcwIdx(cu);
   }
   else
   {
@@ -3472,20 +3797,20 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
   if( m_pcEncCfg->getMCTSEncConstraint() && ( ( cu.firstPU->refIdx[L0] < 0 && cu.firstPU->refIdx[L1] < 0 ) || ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) ) )
   {
     // Do not use this mode
-    tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+    tempCS->initStructData( encTestMode.qp );
     continue;
   }
-  if( testGbi && gbiIdx == GBI_DEFAULT ) // Enabled GBi but the search results is uni.
+  if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni.
   {
-    tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+    tempCS->initStructData(encTestMode.qp);
     continue;
   }
-  CHECK(!(testGbi || (!testGbi && gbiIdx == GBI_DEFAULT)), " !( bTestGbi || (!bTestGbi && gbiIdx == GBI_DEFAULT ) )");
+  CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");
 
   bool isEqualUni = false;
-  if( m_pcEncCfg->getUseGBiFast() )
+  if( m_pcEncCfg->getUseBcwFast() )
   {
-    if( cu.firstPU->interDir != 3 && testGbi == 0 )
+    if( cu.firstPU->interDir != 3 && testBcw == 0 )
     {
       isEqualUni = true;
     }
@@ -3501,7 +3826,7 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
     }
     if ( affineAmvrEanbledFlag )
     {
-      tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+      tempCS->initStructData( encTestMode.qp );
       continue;
     }
     else
@@ -3512,31 +3837,35 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
 
   xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0
                         , 0
-                        , &equGBiCost
+                        , &equBcwCost
   );
 
-  tempCS->initStructData(encTestMode.qp, encTestMode.lossless);
+  if( cu.imv == IMV_FPEL && tempCS->cost < bestIntPelCost )
+  {
+    bestIntPelCost = tempCS->cost;
+  }
+  tempCS->initStructData(encTestMode.qp);
 
   double skipTH = MAX_DOUBLE;
-  skipTH = (m_pcEncCfg->getUseGBiFast() ? 1.05 : MAX_DOUBLE);
-  if( equGBiCost > curBestCost * skipTH )
+  skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE);
+  if( equBcwCost > curBestCost * skipTH )
   {
     break;
   }
 
-  if( m_pcEncCfg->getUseGBiFast() )
+  if( m_pcEncCfg->getUseBcwFast() )
   {
     if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 )
     {
       break;
     }
   }
-  if( g_GbiSearchOrder[gbiLoopIdx] == GBI_DEFAULT && xIsGBiSkip(cu) && m_pcEncCfg->getUseGBiFast() )
+  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() )
   {
     break;
   }
   validMode = true;
- } // for( UChar gbiLoopIdx = 0; gbiLoopIdx < gbiLoopNum; gbiLoopIdx++ )
+ } // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
 
   if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
   {
@@ -3562,16 +3891,16 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
   const ChromaFormat format = cs.area.chromaFormat;
   CodingUnit*                cu = cs.getCU(partitioner.chType);
   const Position lumaPos = cu->Y().valid() ? cu->Y().pos() : recalcPosition( format, cu->chType, CHANNEL_TYPE_LUMA, cu->blocks[cu->chType].pos() );
-  bool topEdgeAvai  = lumaPos.y > 0 && ( ( lumaPos.y % 8 ) == 0 );
-  bool leftEdgeAvai = lumaPos.x > 0 && ( ( lumaPos.x % 8 ) == 0 );
+  bool topEdgeAvai = lumaPos.y > 0 && ((lumaPos.y % 4) == 0);
+  bool leftEdgeAvai = lumaPos.x > 0 && ((lumaPos.x % 4) == 0);
   bool anyEdgeAvai = topEdgeAvai || leftEdgeAvai;
   cs.costDbOffset = 0;
 
   if ( calDist )
   {
     const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType ), *cs.picture );
-    ComponentID compStr = ( CS::isDualITree( cs ) && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
-    ComponentID compEnd = ( CS::isDualITree( cs ) && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr;
+    ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
+    ComponentID compEnd = ( cu->isSepTree() && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr;
     Distortion finalDistortion = 0;
     for ( int comp = compStr; comp <= compEnd; comp++ )
     {
@@ -3586,8 +3915,8 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
 
   if ( anyEdgeAvai && m_pcEncCfg->getUseEncDbOpt() )
   {
-    ComponentID compStr = ( CS::isDualITree( cs ) && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
-    ComponentID compEnd = ( CS::isDualITree( cs ) &&  isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr;
+    ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
+    ComponentID compEnd = ( cu->isSepTree() &&  isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr;
 
     const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType ), *cs.picture );
 
@@ -3607,7 +3936,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
       //Copy current CU's reco to Deblock Pic Buffer
       const CompArea&  curCompArea = currCsArea.block( compId );
       picDbBuf.getBuf( curCompArea ).copyFrom( cs.getRecoBuf( curCompArea ) );
-      if ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) )
+      if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma(compId))
       {
         picDbBuf.getBuf( curCompArea ).rspSignal( m_pcReshape->getInvLUT() );
       }
@@ -3617,7 +3946,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
       {
         const CompArea&  compArea = areaLeft.block(compId);
         picDbBuf.getBuf( compArea ).copyFrom( cs.picture->getRecoBuf( compArea ) );
-        if ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) )
+        if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma(compId))
         {
           picDbBuf.getBuf( compArea ).rspSignal( m_pcReshape->getInvLUT() );
         }
@@ -3627,7 +3956,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
       {
         const CompArea&  compArea = areaTop.block( compId );
         picDbBuf.getBuf( compArea ).copyFrom( cs.picture->getRecoBuf( compArea ) );
-        if ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma( compId ) )
+        if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getSliceReshaperInfo().getUseSliceReshaper() && isLuma(compId))
         {
           picDbBuf.getBuf( compArea ).rspSignal( m_pcReshape->getInvLUT() );
         }
@@ -3694,9 +4023,10 @@ Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf rec
 {
   Distortion dist = 0;
 #if WCG_EXT
+  m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
   CPelBuf orgLuma = cs.picture->getOrigBuf( cs.area.blocks[COMPONENT_Y] );
   if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
-    m_pcEncCfg->getReshaper() && ( cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() ) ) )
+    m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
   {
     if ( compID == COMPONENT_Y && !afterDb && !m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())
     {
@@ -3711,7 +4041,7 @@ Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf rec
       dist += m_pcRdCost->getDistPart( org, reco, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
     }
   }
-  else if ( m_pcEncCfg->getReshaper() && cs.slice->getReshapeInfo().getUseSliceReshaper() && cs.slice->isIntra() ) //intra slice
+  else if (m_pcEncCfg->getLmcs() && cs.picHeader->getLmcsEnabledFlag() && cs.slice->isIntra()) //intra slice
   {
     if ( compID == COMPONENT_Y && afterDb )
     {
@@ -3740,13 +4070,9 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
                                   , const EncTestMode& encTestMode
                                   , int residualPass
                                   , bool* bestHasNonResi
-                                  , double* equGBiCost
+                                  , double* equBcwCost
   )
 {
-  if( residualPass == 1 && encTestMode.lossless )
-  {
-    return;
-  }
 
   CodingUnit*            cu        = tempCS->getCU( partitioner.chType );
   double   bestCostInternal        = MAX_DOUBLE;
@@ -3757,18 +4083,61 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
   bool              swapped        = false; // avoid unwanted data copy
   bool             reloadCU        = false;
 
-  // Not allow very big |MVd| to avoid CABAC crash caused by too large MVd. Normally no impact on coding performance.
-  const int maxMvd = 1 << 15;
   const PredictionUnit& pu = *cu->firstPU;
-  if (!cu->affine)
+
+  // clang-format off
+  const int affineShiftTab[3] =
+  {
+    MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
+    MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH,
+    MV_PRECISION_INTERNAL - MV_PRECISION_INT
+  };
+
+  const int normalShiftTab[NUM_IMV_MODES] =
+  {
+    MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
+    MV_PRECISION_INTERNAL - MV_PRECISION_INT,
+    MV_PRECISION_INTERNAL - MV_PRECISION_4PEL,
+    MV_PRECISION_INTERNAL - MV_PRECISION_HALF,
+  };
+  // clang-format on
+
+  int mvShift;
+
+  for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
+  {
+    if (pu.refIdx[refList] >= 0)
+    {
+      if (!cu->affine)
+      {
+        mvShift = normalShiftTab[cu->imv];
+        Mv signaledmvd(pu.mvd[refList].getHor() >> mvShift, pu.mvd[refList].getVer() >> mvShift);
+        if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
+          return;
+      }
+      else
+      {
+        for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
+        {
+          mvShift = affineShiftTab[cu->imv];
+          Mv signaledmvd(pu.mvdAffi[refList][ctrlP].getHor() >> mvShift, pu.mvdAffi[refList][ctrlP].getVer() >> mvShift);
+          if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
+            return;
+        }
+      }
+    }
+  }
+  // avoid MV exceeding 18-bit dynamic range
+  const int maxMv = 1 << 17;
+  if (!cu->affine && !pu.mergeFlag)
   {
-    if ((pu.refIdx[0] >= 0 && (pu.mvd[0].getAbsHor() >= maxMvd || pu.mvd[0].getAbsVer() >= maxMvd))
-      || (pu.refIdx[1] >= 0 && (pu.mvd[1].getAbsHor() >= maxMvd || pu.mvd[1].getAbsVer() >= maxMvd)))
+    if ( (pu.refIdx[0] >= 0 && (pu.mv[0].getAbsHor() >= maxMv || pu.mv[0].getAbsVer() >= maxMv))
+      || (pu.refIdx[1] >= 0 && (pu.mv[1].getAbsHor() >= maxMv || pu.mv[1].getAbsVer() >= maxMv)))
     {
       return;
     }
   }
-  else
+  if (cu->affine && !pu.mergeFlag)
   {
     for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
     {
@@ -3776,7 +4145,7 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
       {
         for (int ctrlP = 1 + (cu->affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
         {
-          if (pu.mvdAffi[refList][ctrlP].getAbsHor() >= maxMvd || pu.mvdAffi[refList][ctrlP].getAbsVer() >= maxMvd)
+          if (pu.mvAffi[refList][ctrlP].getAbsHor() >= maxMv || pu.mvAffi[refList][ctrlP].getAbsVer() >= maxMv)
           {
             return;
           }
@@ -3786,6 +4155,11 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
   }
   const bool mtsAllowed = tempCS->sps->getUseInterMTS() && CU::isInter( *cu ) && partitioner.currArea().lwidth() <= MTS_INTER_MAX_CU_SIZE && partitioner.currArea().lheight() <= MTS_INTER_MAX_CU_SIZE;
   uint8_t sbtAllowed = cu->checkAllowedSbt();
+  //SBT resolution-dependent fast algorithm: not try size-64 SBT in RDO for low-resolution sequences (now resolution below HD)
+  if( tempCS->pps->getPicWidthInLumaSamples() < (uint32_t)m_pcEncCfg->getSBTFast64WidthTh() )
+  {
+    sbtAllowed = ((cu->lwidth() > 32 || cu->lheight() > 32)) ? 0 : sbtAllowed;
+  }
   uint8_t numRDOTried = 0;
   Distortion sbtOffDist = 0;
   bool    sbtOffRootCbf = 0;
@@ -3837,7 +4211,7 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
       }
       else if( false == swapped )
       {
-        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        tempCS->initStructData( encTestMode.qp );
         tempCS->copyStructure( *bestCS, partitioner.chType );
         tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
         bestCost = bestCS->cost;
@@ -3866,6 +4240,11 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
     if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) )
     {
     m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
+    if (tempCS->slice->getSPS()->getUseColorTrans())
+    {
+      bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
+      bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
+    }
     numRDOTried += mtsAllowed ? 2 : 1;
     xEncodeDontSplit( *tempCS, partitioner );
 
@@ -3875,13 +4254,13 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
     if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) )
     {
       bestCostInternal = tempCS->cost;
-      if (!(tempCS->getPU(partitioner.chType)->mhIntraFlag))
+      if (!(tempCS->getPU(partitioner.chType)->ciipFlag))
       *bestHasNonResi  = !cu->rootCbf;
     }
 
     if (cu->rootCbf == false)
     {
-      if (tempCS->getPU(partitioner.chType)->mhIntraFlag)
+      if (tempCS->getPU(partitioner.chType)->ciipFlag)
       {
         tempCS->cost = MAX_DOUBLE;
         tempCS->costDbOffset = 0;
@@ -3892,12 +4271,8 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
     sbtOffCost = tempCS->cost;
     sbtOffDist = tempCS->dist;
     sbtOffRootCbf = cu->rootCbf;
-    currBestSbt = CU::getSbtInfo( cu->firstTU->mtsIdx > 1 ? SBT_OFF_MTS : SBT_OFF_DCT, 0 );
-    currBestTrs = cu->firstTU->mtsIdx;
-    if( cu->lwidth() <= MAX_TB_SIZEY && cu->lheight() <= MAX_TB_SIZEY )
-    {
-      CHECK( tempCS->tus.size() != 1, "tu must be only one" );
-    }
+    currBestSbt = CU::getSbtInfo(cu->firstTU->mtsIdx[COMPONENT_Y] > MTS_SKIP ? SBT_OFF_MTS : SBT_OFF_DCT, 0);
+    currBestTrs = cu->firstTU->mtsIdx[COMPONENT_Y];
 
 #if WCG_EXT
     DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
@@ -3986,7 +4361,7 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
       }
       else if( false == swapped )
       {
-        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        tempCS->initStructData( encTestMode.qp );
         tempCS->copyStructure( *bestCS, partitioner.chType );
         tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
         bestCost = bestCS->cost;
@@ -4012,6 +4387,11 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
 
       //try residual coding
       m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
+      if (tempCS->slice->getSPS()->getUseColorTrans())
+      {
+        bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
+        bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
+      }
       numRDOTried++;
 
       xEncodeDontSplit( *tempCS, partitioner );
@@ -4021,14 +4401,14 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
       if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) )
       {
         bestCostInternal = tempCS->cost;
-        if( !( tempCS->getPU( partitioner.chType )->mhIntraFlag ) )
+        if( !( tempCS->getPU( partitioner.chType )->ciipFlag ) )
           *bestHasNonResi = !cu->rootCbf;
       }
 
       if( tempCS->cost < currBestCost )
       {
         currBestSbt = cu->sbtInfo;
-        currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx;
+        currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx[COMPONENT_Y];
         assert( currBestTrs == 0 || currBestTrs == 1 );
         currBestCost = tempCS->cost;
       }
@@ -4055,30 +4435,30 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
   tempCS->cost = currBestCost;
   if( ETM_INTER_ME == encTestMode.type )
   {
-    if( equGBiCost != NULL )
+    if( equBcwCost != NULL )
     {
-      if( tempCS->cost < ( *equGBiCost ) && cu->GBiIdx == GBI_DEFAULT )
+      if( tempCS->cost < ( *equBcwCost ) && cu->BcwIdx == BCW_DEFAULT )
       {
-        ( *equGBiCost ) = tempCS->cost;
+        ( *equBcwCost ) = tempCS->cost;
       }
     }
     else
     {
-      CHECK( equGBiCost == NULL, "equGBiCost == NULL" );
+      CHECK( equBcwCost == NULL, "equBcwCost == NULL" );
     }
-    if( tempCS->slice->getCheckLDC() && !cu->imv && cu->GBiIdx != GBI_DEFAULT && tempCS->cost < m_bestGbiCost[1] )
+    if( tempCS->slice->getCheckLDC() && !cu->imv && cu->BcwIdx != BCW_DEFAULT && tempCS->cost < m_bestBcwCost[1] )
     {
-      if( tempCS->cost < m_bestGbiCost[0] )
+      if( tempCS->cost < m_bestBcwCost[0] )
       {
-        m_bestGbiCost[1] = m_bestGbiCost[0];
-        m_bestGbiCost[0] = tempCS->cost;
-        m_bestGbiIdx[1] = m_bestGbiIdx[0];
-        m_bestGbiIdx[0] = cu->GBiIdx;
+        m_bestBcwCost[1] = m_bestBcwCost[0];
+        m_bestBcwCost[0] = tempCS->cost;
+        m_bestBcwIdx[1] = m_bestBcwIdx[0];
+        m_bestBcwIdx[0] = cu->BcwIdx;
       }
       else
       {
-        m_bestGbiCost[1] = tempCS->cost;
-        m_bestGbiIdx[1] = cu->GBiIdx;
+        m_bestBcwCost[1] = tempCS->cost;
+        m_bestBcwIdx[1] = cu->BcwIdx;
       }
     }
   }
@@ -4090,6 +4470,8 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner )
   m_CABACEstimator->resetBits();
 
   m_CABACEstimator->split_cu_mode( CU_DONT_SPLIT, cs, partitioner );
+  if( partitioner.treeType == TREE_C )
+    CHECK( m_CABACEstimator->getEstFracBits() != 0, "must be 0 bit" );
 
   cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits
   cs.cost      = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
@@ -4099,6 +4481,7 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner )
 #if REUSE_CU_RESULTS
 void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner )
 {
+  m_pcRdCost->setChromaFormat(tempCS->sps->getChromaFormatIdc());
   BestEncInfoCache* bestEncCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl );
   CHECK( !bestEncCache, "If this mode is chosen, mode controller has to implement the mode caching capabilities" );
   EncTestMode cachedMode;
@@ -4106,11 +4489,11 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best
   if( bestEncCache->setCsFrom( *tempCS, cachedMode, partitioner ) )
   {
     CodingUnit& cu = *tempCS->cus.front();
-    cu.shareParentPos = tempCS->sharedBndPos;
-    cu.shareParentSize = tempCS->sharedBndSize;
     partitioner.setCUData( cu );
 
-    if( CU::isIntra( cu ) )
+    if( CU::isIntra( cu )
+    || CU::isPLT(cu)
+    )
     {
       xReconIntraQT( cu );
     }
@@ -4136,7 +4519,7 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best
     {
       const ComponentID compID = ComponentID( comp );
 
-      if( CS::isDualITree( *tempCS ) && toChannelType( compID ) != partitioner.chType )
+      if( partitioner.isSepTree( *tempCS ) && toChannelType( compID ) != partitioner.chType )
       {
         continue;
       }
@@ -4146,7 +4529,7 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best
 
 #if WCG_EXT
       if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
-        m_pcEncCfg->getReshaper() && (tempCS->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())))
+        m_pcEncCfg->getLmcs() && (tempCS->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
       {
         const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]);
         if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h
index 270137a9c39d678262b51686370beced278c8648..3a9fbf7d42d1612d121218d633f4e18bc8fe1af7 100644
--- a/source/Lib/EncoderLib/EncCu.h
+++ b/source/Lib/EncoderLib/EncCu.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -74,6 +74,7 @@ struct TriangleMotionInfo
   uint8_t   m_candIdx1;
 
   TriangleMotionInfo ( uint8_t splitDir, uint8_t candIdx0, uint8_t candIdx1 ): m_splitDir(splitDir), m_candIdx0(candIdx0), m_candIdx1(candIdx1) { }
+  TriangleMotionInfo() { m_splitDir = m_candIdx0 = m_candIdx1 = 0; }
 };
 class EncCu
   : DecCu
@@ -90,7 +91,7 @@ private:
   CtxPair*              m_CurrCtx;
   CtxCache*             m_CtxCache;
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   int                   m_dataId;
 #endif
 
@@ -101,6 +102,8 @@ private:
 
   CodingStructure    ***m_pTempCS;
   CodingStructure    ***m_pBestCS;
+  CodingStructure    ***m_pTempCS2;
+  CodingStructure    ***m_pBestCS2;
   //  Access channel
   EncCfg*               m_pcEncCfg;
   IntraSearch*          m_pcIntraSearch;
@@ -114,35 +117,35 @@ private:
   RateCtrl*             m_pcRateCtrl;
   IbcHashMap            m_ibcHashMap;
   EncModeCtrl          *m_modeCtrl;
-  int                  m_shareState;
-  uint32_t             m_shareBndPosX;
-  uint32_t             m_shareBndPosY;
-  SizeType             m_shareBndSizeW;
-  SizeType             m_shareBndSizeH;
 
   PelStorage            m_acMergeBuffer[MMVD_MRG_MAX_RD_BUF_NUM];
   PelStorage            m_acRealMergeBuffer[MRG_MAX_NUM_CANDS];
+  PelStorage            m_acMergeTmpBuffer[MRG_MAX_NUM_CANDS];
   PelStorage            m_acTriangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS]; // to store weighted prediction pixles
   double                m_mergeBestSATDCost;
   MotionInfo            m_SubPuMiBuf      [( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 )];
 
   int                   m_ctuIbcSearchRangeX;
   int                   m_ctuIbcSearchRangeY;
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   EncLib*               m_pcEncLib;
 #endif
-  int                   m_bestGbiIdx[2];
-  double                m_bestGbiCost[2];
-  static const TriangleMotionInfo  m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS];
-  uint8_t                          m_triangleIdxBins[2][TRIANGLE_MAX_NUM_UNI_CANDS][TRIANGLE_MAX_NUM_UNI_CANDS];
+  int                   m_bestBcwIdx[2];
+  double                m_bestBcwCost[2];
+  TriangleMotionInfo    m_triangleModeTest[TRIANGLE_MAX_NUM_CANDS];
+  uint8_t               m_triangleIdxBins[2][TRIANGLE_MAX_NUM_UNI_CANDS][TRIANGLE_MAX_NUM_UNI_CANDS];
 #if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
-  void    updateLambda      ( Slice* slice, const int dQP, const bool updateRdCostLambda );
+  void    updateLambda      ( Slice* slice, const int dQP,
+ #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
+                              const bool useWCGChromaControl,
+ #endif
+                              const bool updateRdCostLambda );
 #endif
   double                m_sbtCostSave[2];
-
 public:
   /// copy parameters from encoder class
   void  init                ( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int jId = 0 ) );
+
   void setDecCuReshaperInEncCU(EncReshape* pcReshape, ChromaFormat chromaFormatIDC) { initDecCuReshaper((Reshape*) pcReshape, chromaFormatIDC); }
   /// create internal buffers
   void  create              ( EncCfg* encCfg );
@@ -163,6 +166,7 @@ public:
   IbcHashMap& getIbcHashMap()              { return m_ibcHashMap;        }
   EncCfg*     getEncCfg()            const { return m_pcEncCfg;          }
 
+  EncCu();
   ~EncCu();
 
 protected:
@@ -170,7 +174,7 @@ protected:
   void xCalDebCost            ( CodingStructure &cs, Partitioner &partitioner, bool calDist = false );
   Distortion getDistortionDb  ( CodingStructure &cs, CPelBuf org, CPelBuf reco, ComponentID compID, const CompArea& compArea, bool afterDb );
 
-  void xCompressCU            ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm );
+  void xCompressCU            ( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& pm, double maxCostAllowed = MAX_DOUBLE );
 #if ENABLE_SPLIT_PARALLELISM
   void xCompressCUParallel    ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm );
   void copyState              ( EncCu* other, Partitioner& pm, const UnitArea& currArea, const bool isDist );
@@ -179,10 +183,9 @@ protected:
   bool
     xCheckBestMode         ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestmode );
 
-  void xCheckModeSplit        ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
+  void xCheckModeSplit        ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass );
 
-  void xCheckRDCostIntra      ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
-  void xCheckIntraPCM         ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
+  bool xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, bool adaptiveColorTrans);
 
   void xCheckDQP              ( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx = false);
   void xFillPCMBuffer         ( CodingUnit &cu);
@@ -191,7 +194,7 @@ protected:
   void xCheckRDCostAffineMerge2Nx2N
                               ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode );
   void xCheckRDCostInter      ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
-  bool xCheckRDCostInterIMV   ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
+  bool xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode, double &bestIntPelCost);
   void xEncodeDontSplit       ( CodingStructure &cs, Partitioner &partitioner);
 
   void xCheckRDCostMerge2Nx2N ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
@@ -204,12 +207,12 @@ protected:
                              , const EncTestMode& encTestMode
                              , int residualPass       = 0
                              , bool* bestHasNonResi   = NULL
-                             , double* equGBiCost     = NULL
+                             , double* equBcwCost     = NULL
                            );
 #if REUSE_CU_RESULTS
   void xReuseCachedResult     ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &Partitioner );
 #endif
-  bool xIsGBiSkip(const CodingUnit& cu)
+  bool xIsBcwSkip(const CodingUnit& cu)
   {
     if (cu.slice->getSliceType() != B_SLICE)
     {
@@ -222,6 +225,8 @@ protected:
   }
   void xCheckRDCostIBCMode    ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
   void xCheckRDCostIBCModeMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode );
+
+  void xCheckPLT              ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode );
 };
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index 9618b0b34520bf2cdcf3db64dc1c2e6c33c9c668..0bc3ea911b653695611ded021506232dcd0fb76b 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -80,7 +80,6 @@ int getLSB(int poc, int maxLSB)
   }
 }
 
-
 EncGOP::EncGOP()
 {
   m_iLastIDR            = 0;
@@ -88,6 +87,7 @@ EncGOP::EncGOP()
   m_iNumPicCoded        = 0; //Niko
   m_bFirst              = true;
   m_iLastRecoveryPicPOC = 0;
+  m_latestDRAPPOC       = MAX_INT;
   m_lastRasPoc          = MAX_INT;
 
   m_pcCfg               = NULL;
@@ -101,7 +101,8 @@ EncGOP::EncGOP()
   m_numLongTermRefPicSPS = 0;
   ::memset(m_ltRefPicPocLsbSps, 0, sizeof(m_ltRefPicPocLsbSps));
   ::memset(m_ltRefPicUsedByCurrPicFlag, 0, sizeof(m_ltRefPicUsedByCurrPicFlag));
-  m_lastBPSEI           = 0;
+  ::memset(m_lastBPSEI, 0, sizeof(m_lastBPSEI));
+  m_rapWithLeading      = false;
   m_bufferingPeriodSEIPresentInAU = false;
   m_associatedIRAPType  = NAL_UNIT_CODED_SLICE_IDR_N_LP;
   m_associatedIRAPPOC   = 0;
@@ -109,6 +110,22 @@ EncGOP::EncGOP()
   m_pcDeblockingTempPicYuv = NULL;
 #endif
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  m_ppcFrameOrg             = nullptr;
+  m_ppcFrameRec             = nullptr;
+
+  m_pcConvertFormat         = nullptr;
+  m_pcConvertIQuantize      = nullptr;
+  m_pcColorTransform        = nullptr;
+  m_pcDistortionDeltaE      = nullptr;
+  m_pcTransferFct           = nullptr;
+
+  m_pcColorTransformParams  = nullptr;
+  m_pcFrameFormat           = nullptr;
+
+  m_metricTime = std::chrono::milliseconds(0);
+#endif
+
   m_bInitAMaxBT         = true;
   m_bgPOC = -1;
   m_picBg = NULL;
@@ -126,6 +143,28 @@ EncGOP::~EncGOP()
     // reset potential decoder resources
     tryDecodePicture( NULL, 0, std::string("") );
   }
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  delete [] m_ppcFrameOrg;
+  delete [] m_ppcFrameRec;
+
+  m_ppcFrameOrg = m_ppcFrameRec = nullptr;
+
+  delete m_pcConvertFormat;
+  delete m_pcConvertIQuantize;
+  delete m_pcColorTransform;
+  delete m_pcDistortionDeltaE;
+  delete m_pcTransferFct;
+  delete m_pcColorTransformParams;
+  delete m_pcFrameFormat;
+
+  m_pcConvertFormat         = nullptr;
+  m_pcConvertIQuantize      = nullptr;
+  m_pcColorTransform        = nullptr;
+  m_pcDistortionDeltaE      = nullptr;
+  m_pcTransferFct           = nullptr;
+  m_pcColorTransformParams  = nullptr;
+  m_pcFrameFormat           = nullptr;
+#endif
 }
 
 /** Create list to contain pointers to CTU start addresses of slice.
@@ -170,15 +209,16 @@ void EncGOP::init ( EncLib* pcEncLib )
   m_HLSWriter            = pcEncLib->getHLSWriter();
   m_pcLoopFilter         = pcEncLib->getLoopFilter();
   m_pcSAO                = pcEncLib->getSAO();
-  m_pcALF = pcEncLib->getALF();
+  m_pcALF                = pcEncLib->getALF();
   m_pcRateCtrl           = pcEncLib->getRateCtrl();
-  m_lastBPSEI          = 0;
-  m_totalCoded         = 0;
+  ::memset(m_lastBPSEI, 0, sizeof(m_lastBPSEI));
+  ::memset(m_totalCoded, 0, sizeof(m_totalCoded));
+  m_HRD                = pcEncLib->getHRD();
 
   m_AUWriterIf = pcEncLib->getAUWriterIf();
 
 #if WCG_EXT
-  if (m_pcCfg->getReshaper())
+  if (m_pcCfg->getLmcs())
   {
     pcEncLib->getRdCost()->setReshapeInfo(m_pcCfg->getReshapeSignalType(), m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA));
     pcEncLib->getRdCost()->initLumaLevelToWeightTableReshape();
@@ -190,82 +230,197 @@ void EncGOP::init ( EncLib* pcEncLib )
   }
   pcEncLib->getALF()->getLumaLevelWeightTable() = pcEncLib->getRdCost()->getLumaLevelWeightTable();
   int alfWSSD = 0;
-  if (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ )
+  if (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ )
   {
     alfWSSD = 1;
   }
   pcEncLib->getALF()->setAlfWSSD(alfWSSD);
 #endif
   m_pcReshaper = pcEncLib->getReshaper();
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics();
+  if(calculateHdrMetrics)
+  {
+    //allocate frame buffers and initialize class members
+    int chainNumber = 5;
+
+    m_ppcFrameOrg = new hdrtoolslib::Frame* [chainNumber];
+    m_ppcFrameRec = new hdrtoolslib::Frame* [chainNumber];
+
+    double* whitePointDeltaE = new double[hdrtoolslib::NB_REF_WHITE];
+    for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++)
+    {
+      whitePointDeltaE[i] = m_pcCfg->getWhitePointDeltaE(i);
+    }
+
+    double maxSampleValue                       = m_pcCfg->getMaxSampleValue();
+    hdrtoolslib::SampleRange sampleRange        = m_pcCfg->getSampleRange();
+    hdrtoolslib::ChromaFormat chFmt             = hdrtoolslib::ChromaFormat(m_pcCfg->getChromaFormatIdc());
+    int bitDepth = m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA);
+    hdrtoolslib::ColorPrimaries colorPrimaries  = m_pcCfg->getColorPrimaries();
+    bool enableTFunctionLUT                     = m_pcCfg->getEnableTFunctionLUT();
+    hdrtoolslib::ChromaLocation* chromaLocation = new hdrtoolslib::ChromaLocation[2];
+    for (int i=0; i<2; i++)
+    {
+      chromaLocation[i] = m_pcCfg->getChromaLocation(i);
+    }
+    int chromaUpFilter  = m_pcCfg->getChromaUPFilter();
+    int cropOffsetLeft   = m_pcCfg->getCropOffsetLeft();
+    int cropOffsetTop    = m_pcCfg->getCropOffsetTop();
+    int cropOffsetRight  = m_pcCfg->getCropOffsetRight();
+    int cropOffsetBottom = m_pcCfg->getCropOffsetBottom();
+
+    int width  = m_pcCfg->getSourceWidth() - cropOffsetLeft + cropOffsetRight;
+    int height = m_pcCfg->getSourceHeight() - cropOffsetTop  + cropOffsetBottom;
+
+    m_ppcFrameOrg[0] = new hdrtoolslib::Frame(width, height, false, hdrtoolslib::CM_YCbCr, colorPrimaries, chFmt, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0);
+    m_ppcFrameRec[0] = new hdrtoolslib::Frame(width, height, false, hdrtoolslib::CM_YCbCr, colorPrimaries, chFmt, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0);
+
+    m_ppcFrameOrg[1] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], false, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0);
+    m_ppcFrameRec[1] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], false, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, sampleRange, bitDepth, false, hdrtoolslib::TF_PQ, 0);                                // 420 to 444 conversion
+
+    m_ppcFrameOrg[2] =  new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0);
+    m_ppcFrameRec[2] =  new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_YCbCr, colorPrimaries, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0);                                // 444 to Float conversion
+
+    m_ppcFrameOrg[3] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0);
+    m_ppcFrameRec[3] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_PQ, 0);                                // YCbCr to RGB conversion
+
+    m_ppcFrameOrg[4] = new hdrtoolslib::Frame(m_ppcFrameOrg[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameOrg[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_NULL, 0);
+    m_ppcFrameRec[4] = new hdrtoolslib::Frame(m_ppcFrameRec[0]->m_width[hdrtoolslib::Y_COMP], m_ppcFrameRec[0]->m_height[hdrtoolslib::Y_COMP], true, hdrtoolslib::CM_RGB, hdrtoolslib::CP_2020, hdrtoolslib::CF_444, hdrtoolslib::SR_UNKNOWN, 32, false, hdrtoolslib::TF_NULL, 0);                                // Inverse Transfer Function
+
+    m_pcFrameFormat                   = new hdrtoolslib::FrameFormat();
+    m_pcFrameFormat->m_isFloat        = true;
+    m_pcFrameFormat->m_chromaFormat   = hdrtoolslib::CF_UNKNOWN;
+    m_pcFrameFormat->m_colorSpace     = hdrtoolslib::CM_RGB;
+    m_pcFrameFormat->m_colorPrimaries = hdrtoolslib::CP_2020;
+    m_pcFrameFormat->m_sampleRange    = hdrtoolslib::SR_UNKNOWN;
+
+    m_pcConvertFormat     = hdrtoolslib::ConvertColorFormat::create(width, height, chFmt, hdrtoolslib::CF_444, chromaUpFilter, chromaLocation, chromaLocation);
+    m_pcConvertIQuantize  = hdrtoolslib::Convert::create(&m_ppcFrameOrg[1]->m_format, &m_ppcFrameOrg[2]->m_format);
+    m_pcColorTransform    = hdrtoolslib::ColorTransform::create(m_ppcFrameOrg[2]->m_colorSpace, m_ppcFrameOrg[2]->m_colorPrimaries, m_ppcFrameOrg[3]->m_colorSpace, m_ppcFrameOrg[3]->m_colorPrimaries, true, 1);
+    m_pcDistortionDeltaE  = new hdrtoolslib::DistortionMetricDeltaE(m_pcFrameFormat, false, maxSampleValue, whitePointDeltaE, 1);
+    m_pcTransferFct       = hdrtoolslib::TransferFunction::create(hdrtoolslib::TF_PQ, true, (float) maxSampleValue, 0, 0.0, 1.0, enableTFunctionLUT);
+  }
+#endif
 }
 
-#if HEVC_VPS
 int EncGOP::xWriteVPS (AccessUnit &accessUnit, const VPS *vps)
 {
   OutputNALUnit nalu(NAL_UNIT_VPS);
   m_HLSWriter->setBitstream( &nalu.m_Bitstream );
+  CHECK( nalu.m_temporalId, "The value of TemporalId of VPS NAL units shall be equal to 0" );
   m_HLSWriter->codeVPS( vps );
   accessUnit.push_back(new NALUnitEBSP(nalu));
   return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
 }
-#endif
 
-int EncGOP::xWriteSPS (AccessUnit &accessUnit, const SPS *sps)
+int EncGOP::xWriteDPS (AccessUnit &accessUnit, const DPS *dps)
+{
+  if (dps->getDecodingParameterSetId() !=0)
+  {
+    OutputNALUnit nalu(NAL_UNIT_DPS);
+    m_HLSWriter->setBitstream( &nalu.m_Bitstream );
+    CHECK( nalu.m_temporalId, "The value of TemporalId of DPS NAL units shall be equal to 0" );
+    m_HLSWriter->codeDPS( dps );
+    accessUnit.push_back(new NALUnitEBSP(nalu));
+    return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
+  }
+  else
+  {
+    return 0;
+  }
+}
+
+int EncGOP::xWriteSPS( AccessUnit &accessUnit, const SPS *sps, const int layerId )
 {
   OutputNALUnit nalu(NAL_UNIT_SPS);
   m_HLSWriter->setBitstream( &nalu.m_Bitstream );
+  nalu.m_nuhLayerId = layerId;
+  CHECK( nalu.m_temporalId, "The value of TemporalId of SPS NAL units shall be equal to 0" );
   m_HLSWriter->codeSPS( sps );
   accessUnit.push_back(new NALUnitEBSP(nalu));
   return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
 
 }
 
-int EncGOP::xWritePPS (AccessUnit &accessUnit, const PPS *pps)
+int EncGOP::xWritePPS( AccessUnit &accessUnit, const PPS *pps, const SPS *sps, const int layerId )
 {
   OutputNALUnit nalu(NAL_UNIT_PPS);
   m_HLSWriter->setBitstream( &nalu.m_Bitstream );
-  m_HLSWriter->codePPS( pps );
+  nalu.m_nuhLayerId = layerId;
+  CHECK( nalu.m_temporalId < accessUnit.temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" );
+  m_HLSWriter->codePPS( pps, sps );
   accessUnit.push_back(new NALUnitEBSP(nalu));
   return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
 }
 
-int EncGOP::xWriteAPS(AccessUnit &accessUnit, APS *aps)
+int EncGOP::xWriteAPS( AccessUnit &accessUnit, APS *aps, const int layerId, const bool isPrefixNUT )
 {
-  OutputNALUnit nalu(NAL_UNIT_APS);
+  OutputNALUnit nalu( isPrefixNUT ? NAL_UNIT_PREFIX_APS : NAL_UNIT_SUFFIX_APS );
   m_HLSWriter->setBitstream(&nalu.m_Bitstream);
+  nalu.m_nuhLayerId = layerId;
+  nalu.m_temporalId = aps->getTemporalId();
+  aps->setLayerId( layerId );
+  CHECK( nalu.m_temporalId < accessUnit.temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" );
   m_HLSWriter->codeAPS(aps);
   accessUnit.push_back(new NALUnitEBSP(nalu));
   return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
 }
 
-int EncGOP::xWriteParameterSets (AccessUnit &accessUnit, Slice *slice, const bool bSeqFirst)
+int EncGOP::xWriteParameterSets( AccessUnit &accessUnit, Slice *slice, const bool bSeqFirst )
 {
   int actualTotalBits = 0;
 
-#if HEVC_VPS
-  if (bSeqFirst)
+  if( bSeqFirst )
   {
-    actualTotalBits += xWriteVPS(accessUnit, m_pcEncLib->getVPS());
-  }
-#endif
-  if (m_pcEncLib->SPSNeedsWriting(slice->getSPS()->getSPSId())) // Note this assumes that all changes to the SPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer).
-  {
-    CHECK(!(bSeqFirst), "Unspecified error"); // Implementations that use more than 1 SPS need to be aware of activation issues.
-    actualTotalBits += xWriteSPS(accessUnit, slice->getSPS());
+    if (slice->getSPS()->getVPSId() != 0)
+    {
+      actualTotalBits += xWriteVPS(accessUnit, m_pcEncLib->getVPS());
+    }
+    actualTotalBits += xWriteDPS( accessUnit, m_pcEncLib->getDPS() );
+
+    if( m_pcEncLib->SPSNeedsWriting( slice->getSPS()->getSPSId() ) ) // Note this assumes that all changes to the SPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer).
+    {
+      CHECK( !( bSeqFirst ), "Unspecified error" ); // Implementations that use more than 1 SPS need to be aware of activation issues.
+      actualTotalBits += xWriteSPS( accessUnit, slice->getSPS(), m_pcEncLib->getLayerId() );
+    }
   }
-  if (m_pcEncLib->PPSNeedsWriting(slice->getPPS()->getPPSId())) // Note this assumes that all changes to the PPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer).
+
+  if( m_pcEncLib->PPSNeedsWriting( slice->getPPS()->getPPSId() ) ) // Note this assumes that all changes to the PPS are made at the EncLib level prior to picture creation (EncLib::xGetNewPicBuffer).
   {
-    actualTotalBits += xWritePPS(accessUnit, slice->getPPS());
+    actualTotalBits += xWritePPS( accessUnit, slice->getPPS(), slice->getSPS(), m_pcEncLib->getLayerId() );
   }
 
   return actualTotalBits;
 }
 
+int EncGOP::xWritePicHeader( AccessUnit &accessUnit, PicHeader *picHeader )
+{
+  OutputNALUnit nalu(NAL_UNIT_PH);
+  m_HLSWriter->setBitstream( &nalu.m_Bitstream );
+  nalu.m_temporalId = accessUnit.temporalId;
+  nalu.m_nuhLayerId = m_pcEncLib->getLayerId();
+  m_HLSWriter->codePictureHeader( picHeader );
+  accessUnit.push_back(new NALUnitEBSP(nalu));
+  return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
+}
+
 void EncGOP::xWriteAccessUnitDelimiter (AccessUnit &accessUnit, Slice *slice)
 {
   AUDWriter audWriter;
   OutputNALUnit nalu(NAL_UNIT_ACCESS_UNIT_DELIMITER);
-
+  nalu.m_temporalId = slice->getTLayer();
+  int vpsId = slice->getSPS()->getVPSId();
+  if (vpsId == 0)
+  {
+    nalu.m_nuhLayerId = 0;
+  }
+  else
+  {
+    nalu.m_nuhLayerId = slice->getVPS()->getLayerId(0);
+  }
+  CHECK( nalu.m_temporalId != accessUnit.temporalId, "TemporalId shall be equal to the TemporalId of the AU containing the NAL unit" );
   int picType = slice->isIntra() ? 0 : (slice->isInterP() ? 1 : 2);
 
   audWriter.codeAUD(nalu.m_Bitstream, picType);
@@ -280,8 +435,8 @@ void EncGOP::xWriteSEI (NalUnitType naluType, SEIMessages& seiMessages, AccessUn
   {
     return;
   }
-  OutputNALUnit nalu(naluType, temporalId);
-  m_seiWriter.writeSEImessages(nalu.m_Bitstream, seiMessages, sps, false);
+  OutputNALUnit nalu( naluType, m_pcEncLib->getLayerId(), temporalId );
+  m_seiWriter.writeSEImessages(nalu.m_Bitstream, seiMessages, sps, *m_HRD, false, temporalId);
   auPos = accessUnit.insert(auPos, new NALUnitEBSP(nalu));
   auPos++;
 }
@@ -297,8 +452,8 @@ void EncGOP::xWriteSEISeparately (NalUnitType naluType, SEIMessages& seiMessages
   {
     SEIMessages tmpMessages;
     tmpMessages.push_back(*sei);
-    OutputNALUnit nalu(naluType, temporalId);
-    m_seiWriter.writeSEImessages(nalu.m_Bitstream, tmpMessages, sps, false);
+    OutputNALUnit nalu( naluType, m_pcEncLib->getLayerId(), temporalId );
+    m_seiWriter.writeSEImessages(nalu.m_Bitstream, tmpMessages, sps, *m_HRD, false, temporalId);
     auPos = accessUnit.insert(auPos, new NALUnitEBSP(nalu));
     auPos++;
   }
@@ -323,9 +478,8 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI
 
   while ( (itNalu!=accessUnit.end())&&
     ( (*itNalu)->m_nalUnitType==NAL_UNIT_ACCESS_UNIT_DELIMITER
-#if HEVC_VPS
     || (*itNalu)->m_nalUnitType==NAL_UNIT_VPS
-#endif
+    || (*itNalu)->m_nalUnitType==NAL_UNIT_DPS
     || (*itNalu)->m_nalUnitType==NAL_UNIT_SPS
     || (*itNalu)->m_nalUnitType==NAL_UNIT_PPS
     ))
@@ -341,11 +495,13 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI
 #endif
   // The case that a specific SEI is not present is handled in xWriteSEI (empty list)
 
+#if HEVC_SEI
   // Active parameter sets SEI must always be the first SEI
   currentMessages = extractSeisByType(localMessages, SEI::ACTIVE_PARAMETER_SETS);
   CHECK(!(currentMessages.size() <= 1), "Unspecified error");
   xWriteSEI(NAL_UNIT_PREFIX_SEI, currentMessages, accessUnit, itNalu, temporalId, sps);
   xClearSEIs(currentMessages, !testWrite);
+#endif
 
   // Buffering period SEI must always be following active parameter sets
   currentMessages = extractSeisByType(localMessages, SEI::BUFFERING_PERIOD);
@@ -371,10 +527,12 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI
     xClearSEIs(currentMessages, !testWrite);
   }
 
+#if HEVC_SEI
   // Scalable nesting SEI must always be the following DU info
   currentMessages = extractSeisByType(localMessages, SEI::SCALABLE_NESTING);
   xWriteSEISeparately(NAL_UNIT_PREFIX_SEI, currentMessages, accessUnit, itNalu, temporalId, sps);
   xClearSEIs(currentMessages, !testWrite);
+#endif
 
   // And finally everything else one by one
   xWriteSEISeparately(NAL_UNIT_PREFIX_SEI, localMessages, accessUnit, itNalu, temporalId, sps);
@@ -399,7 +557,7 @@ void EncGOP::xWriteLeadingSEIMessages (SEIMessages& seiMessages, SEIMessages& du
   // update Timing and DU info SEI
   xUpdateDuData(testAU, duData);
   xUpdateTimingSEI(picTiming, duData, sps);
-  xUpdateDuInfoSEI(duInfoSeiMessages, picTiming);
+  xUpdateDuInfoSEI(duInfoSeiMessages, picTiming, sps->getMaxTLayers());
   // actual writing
   xWriteLeadingSEIOrdered(seiMessages, duInfoSeiMessages, accessUnit, temporalId, sps, false);
 
@@ -416,9 +574,7 @@ void EncGOP::xWriteTrailingSEIMessages (SEIMessages& seiMessages, AccessUnit &ac
 
 void EncGOP::xWriteDuSEIMessages (SEIMessages& duInfoSeiMessages, AccessUnit &accessUnit, int temporalId, const SPS *sps, std::deque<DUData> &duData)
 {
-  const HRD *hrd = sps->getVuiParameters()->getHrdParameters();
-
-  if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && hrd->getSubPicCpbParamsPresentFlag() )
+  if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && m_HRD->getBufferingPeriodSEI()->m_decodingUnitCpbParamsInPicTimingSeiFlag )
   {
     int naluIdx = 0;
     AccessUnit::iterator nalu = accessUnit.begin();
@@ -462,111 +618,135 @@ void EncGOP::xCreateIRAPLeadingSEIMessages (SEIMessages& seiMessages, const SPS
 {
   OutputNALUnit nalu(NAL_UNIT_PREFIX_SEI);
 
-  if(m_pcCfg->getActiveParameterSetsSEIEnabled())
-  {
-    SEIActiveParameterSets *sei = new SEIActiveParameterSets;
-#if HEVC_VPS
-    m_seiEncoder.initSEIActiveParameterSets (sei, m_pcCfg->getVPS(), sps);
-#else
-    m_seiEncoder.initSEIActiveParameterSets(sei, sps);
-#endif
-    seiMessages.push_back(sei);
-  }
-
   if(m_pcCfg->getFramePackingArrangementSEIEnabled())
   {
     SEIFramePacking *sei = new SEIFramePacking;
     m_seiEncoder.initSEIFramePacking (sei, m_iNumPicCoded);
     seiMessages.push_back(sei);
   }
-
-  if(m_pcCfg->getSegmentedRectFramePackingArrangementSEIEnabled())
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+  if(m_pcCfg->getSEIAlternativeTransferCharacteristicsSEIEnable())
+  {
+    SEIAlternativeTransferCharacteristics *seiAlternativeTransferCharacteristics = new SEIAlternativeTransferCharacteristics;
+    m_seiEncoder.initSEIAlternativeTransferCharacteristics(seiAlternativeTransferCharacteristics);
+    seiMessages.push_back(seiAlternativeTransferCharacteristics);
+  }
+#endif
+  if (m_pcCfg->getErpSEIEnabled())
   {
-    SEISegmentedRectFramePacking *sei = new SEISegmentedRectFramePacking;
-    m_seiEncoder.initSEISegmentedRectFramePacking(sei);
+    SEIEquirectangularProjection *sei = new SEIEquirectangularProjection;
+    m_seiEncoder.initSEIErp(sei);
     seiMessages.push_back(sei);
   }
 
-  if (m_pcCfg->getDisplayOrientationSEIAngle())
+  if (m_pcCfg->getSphereRotationSEIEnabled())
   {
-    SEIDisplayOrientation *sei = new SEIDisplayOrientation;
-    m_seiEncoder.initSEIDisplayOrientation(sei);
+    SEISphereRotation *sei = new SEISphereRotation;
+    m_seiEncoder.initSEISphereRotation(sei);
     seiMessages.push_back(sei);
   }
 
-  if(m_pcCfg->getToneMappingInfoSEIEnabled())
+  if (m_pcCfg->getOmniViewportSEIEnabled())
   {
-    SEIToneMappingInfo *sei = new SEIToneMappingInfo;
-    m_seiEncoder.initSEIToneMappingInfo (sei);
+    SEIOmniViewport *sei = new SEIOmniViewport;
+    m_seiEncoder.initSEIOmniViewport(sei);
     seiMessages.push_back(sei);
   }
-
-#if HEVC_TILES_WPP
-  if(m_pcCfg->getTMCTSSEIEnabled())
+  if (m_pcCfg->getRwpSEIEnabled())
+  {
+    SEIRegionWisePacking *seiRegionWisePacking = new SEIRegionWisePacking;
+    m_seiEncoder.initSEIRegionWisePacking(seiRegionWisePacking);
+    seiMessages.push_back(seiRegionWisePacking);
+  }
+  if (m_pcCfg->getGcmpSEIEnabled())
   {
-    SEITempMotionConstrainedTileSets *sei = new SEITempMotionConstrainedTileSets;
-    m_seiEncoder.initSEITempMotionConstrainedTileSets(sei, pps);
+    SEIGeneralizedCubemapProjection *sei = new SEIGeneralizedCubemapProjection;
+    m_seiEncoder.initSEIGcmp(sei);
     seiMessages.push_back(sei);
   }
-#endif
-
-  if(m_pcCfg->getTimeCodeSEIEnabled())
+  if (m_pcCfg->getSubpicureLevelInfoSEIEnabled())
   {
-    SEITimeCode *seiTimeCode = new SEITimeCode;
-    m_seiEncoder.initSEITimeCode(seiTimeCode);
-    seiMessages.push_back(seiTimeCode);
+    SEISubpicureLevelInfo *seiSubpicureLevelInfo = new SEISubpicureLevelInfo;
+    m_seiEncoder.initSEISubpictureLevelInfo(seiSubpicureLevelInfo, sps);
+    seiMessages.push_back(seiSubpicureLevelInfo);
   }
-
-  if(m_pcCfg->getKneeSEIEnabled())
+  if (m_pcCfg->getSampleAspectRatioInfoSEIEnabled())
+  {
+    SEISampleAspectRatioInfo *seiSampleAspectRatioInfo = new SEISampleAspectRatioInfo;
+    m_seiEncoder.initSEISampleAspectRatioInfo(seiSampleAspectRatioInfo);
+    seiMessages.push_back(seiSampleAspectRatioInfo);
+  }
+  // film grain
+  if (m_pcCfg->getFilmGrainCharactersticsSEIEnabled())
   {
-    SEIKneeFunctionInfo *sei = new SEIKneeFunctionInfo;
-    m_seiEncoder.initSEIKneeFunctionInfo(sei);
+    SEIFilmGrainCharacteristics *sei = new SEIFilmGrainCharacteristics;
+    m_seiEncoder.initSEIFilmGrainCharacteristics(sei);
     seiMessages.push_back(sei);
   }
 
-  if(m_pcCfg->getMasteringDisplaySEI().colourVolumeSEIEnabled)
+  // mastering display colour volume
+  if (m_pcCfg->getMasteringDisplaySEI().colourVolumeSEIEnabled)
   {
-    const SEIMasteringDisplay &seiCfg=m_pcCfg->getMasteringDisplaySEI();
     SEIMasteringDisplayColourVolume *sei = new SEIMasteringDisplayColourVolume;
-    sei->values = seiCfg;
+    m_seiEncoder.initSEIMasteringDisplayColourVolume(sei);
     seiMessages.push_back(sei);
   }
-  if(m_pcCfg->getChromaResamplingFilterHintEnabled())
+
+  // content light level
+  if (m_pcCfg->getCLLSEIEnabled())
   {
-    SEIChromaResamplingFilterHint *seiChromaResamplingFilterHint = new SEIChromaResamplingFilterHint;
-    m_seiEncoder.initSEIChromaResamplingFilterHint(seiChromaResamplingFilterHint, m_pcCfg->getChromaResamplingHorFilterIdc(), m_pcCfg->getChromaResamplingVerFilterIdc());
-    seiMessages.push_back(seiChromaResamplingFilterHint);
+    SEIContentLightLevelInfo *seiCLL = new SEIContentLightLevelInfo;
+    m_seiEncoder.initSEIContentLightLevel(seiCLL);
+    seiMessages.push_back(seiCLL);
   }
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-  if(m_pcCfg->getSEIAlternativeTransferCharacteristicsSEIEnable())
+
+  // ambient viewing environment
+  if (m_pcCfg->getAmbientViewingEnvironmentSEIEnabled())
   {
-    SEIAlternativeTransferCharacteristics *seiAlternativeTransferCharacteristics = new SEIAlternativeTransferCharacteristics;
-    m_seiEncoder.initSEIAlternativeTransferCharacteristics(seiAlternativeTransferCharacteristics);
-    seiMessages.push_back(seiAlternativeTransferCharacteristics);
+    SEIAmbientViewingEnvironment *seiAVE = new SEIAmbientViewingEnvironment;
+    m_seiEncoder.initSEIAmbientViewingEnvironment(seiAVE);
+    seiMessages.push_back(seiAVE);
+  }
+
+  // content colour volume
+  if (m_pcCfg->getCcvSEIEnabled())
+  {
+    SEIContentColourVolume *seiContentColourVolume = new SEIContentColourVolume;
+    m_seiEncoder.initSEIContentColourVolume(seiContentColourVolume);
+    seiMessages.push_back(seiContentColourVolume);
   }
-#endif
 }
 
 void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, Slice *slice)
 {
-  if( ( m_pcCfg->getBufferingPeriodSEIEnabled() ) && ( slice->getSliceType() == I_SLICE ) &&
-    ( slice->getSPS()->getVuiParametersPresentFlag() ) &&
-    ( ( slice->getSPS()->getVuiParameters()->getHrdParameters()->getNalHrdParametersPresentFlag() )
-    || ( slice->getSPS()->getVuiParameters()->getHrdParameters()->getVclHrdParametersPresentFlag() ) ) )
+  if ((m_pcCfg->getBufferingPeriodSEIEnabled()) && (slice->isIRAP() || slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR) &&
+    ( slice->getSPS()->getHrdParametersPresentFlag() ) )
   {
     SEIBufferingPeriod *bufferingPeriodSEI = new SEIBufferingPeriod();
-    m_seiEncoder.initSEIBufferingPeriod(bufferingPeriodSEI, slice);
+    bool noLeadingPictures = ( (slice->getNalUnitType()!= NAL_UNIT_CODED_SLICE_IDR_W_RADL) && (slice->getNalUnitType()!= NAL_UNIT_CODED_SLICE_CRA) )?(true):(false);
+    m_seiEncoder.initSEIBufferingPeriod(bufferingPeriodSEI,noLeadingPictures);
+    m_HRD->setBufferingPeriodSEI(bufferingPeriodSEI);
     seiMessages.push_back(bufferingPeriodSEI);
     m_bufferingPeriodSEIPresentInAU = true;
 
+#if HEVC_SEI
     if (m_pcCfg->getScalableNestingSEIEnabled())
     {
       SEIBufferingPeriod *bufferingPeriodSEIcopy = new SEIBufferingPeriod();
       bufferingPeriodSEI->copyTo(*bufferingPeriodSEIcopy);
       nestedSeiMessages.push_back(bufferingPeriodSEIcopy);
     }
+#endif
   }
 
+  if (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && slice->isDRAP())
+  {
+    SEIDependentRAPIndication *dependentRAPIndicationSEI = new SEIDependentRAPIndication();
+    m_seiEncoder.initSEIDependentRAPIndication(dependentRAPIndicationSEI);
+    seiMessages.push_back(dependentRAPIndicationSEI);
+  }
+
+#if HEVC_SEI
   if (picInGOP ==0 && m_pcCfg->getSOPDescriptionSEIEnabled() ) // write SOP description SEI (if enabled) at the beginning of GOP
   {
     SEISOPDescription* sopDescriptionSEI = new SEISOPDescription();
@@ -617,8 +797,10 @@ void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessage
       delete seiColourRemappingInfo;
     }
   }
+#endif
 }
 
+#if HEVC_SEI
 void EncGOP::xCreateScalableNestingSEI (SEIMessages& seiMessages, SEIMessages& nestedSeiMessages)
 {
   SEIMessages tmpMessages;
@@ -633,31 +815,222 @@ void EncGOP::xCreateScalableNestingSEI (SEIMessages& seiMessages, SEIMessages& n
     tmpMessages.clear();
   }
 }
+#endif
+
+void EncGOP::xCreateFrameFieldInfoSEI  (SEIMessages& seiMessages, Slice *slice, bool isField)
+{
+  if (m_pcCfg->getFrameFieldInfoSEIEnabled())
+  {
+    SEIFrameFieldInfo *frameFieldInfoSEI = new SEIFrameFieldInfo();
+
+    // encode only very basic information. if more feature are supported, this should be moved to SEIEncoder
+    frameFieldInfoSEI->m_fieldPicFlag = isField;
+    if (isField)
+    {
+      frameFieldInfoSEI->m_bottomFieldFlag = !slice->getPic()->topField;
+    }
+    seiMessages.push_back(frameFieldInfoSEI);
+  }
+}
+
 
 void EncGOP::xCreatePictureTimingSEI  (int IRAPGOPid, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, SEIMessages& duInfoSeiMessages, Slice *slice, bool isField, std::deque<DUData> &duData)
 {
-  const VUI *vui = slice->getSPS()->getVuiParameters();
-  const HRD *hrd = vui->getHrdParameters();
+  // Picture timing depends on buffering period. When either of those is not disabled,
+  // initialization would fail. Needs more cleanup after DU timing is integrated.
+  if (!(m_pcCfg->getPictureTimingSEIEnabled() && m_pcCfg->getBufferingPeriodSEIEnabled()))
+  {
+    return;
+  }
+
+  const HRDParameters *hrd = slice->getSPS()->getHrdParameters();
 
   // update decoding unit parameters
-  if( ( m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled() ) &&
-    ( slice->getSPS()->getVuiParametersPresentFlag() ) &&
-    (  hrd->getNalHrdParametersPresentFlag() || hrd->getVclHrdParametersPresentFlag() ) )
+  if( ( m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled() ) )
   {
     int picSptDpbOutputDuDelay = 0;
     SEIPictureTiming *pictureTimingSEI = new SEIPictureTiming();
 
     // DU parameters
-    if( hrd->getSubPicCpbParamsPresentFlag() )
+    if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
     {
       uint32_t numDU = (uint32_t) duData.size();
       pictureTimingSEI->m_numDecodingUnitsMinus1     = ( numDU - 1 );
       pictureTimingSEI->m_duCommonCpbRemovalDelayFlag = false;
       pictureTimingSEI->m_numNalusInDuMinus1.resize( numDU );
-      pictureTimingSEI->m_duCpbRemovalDelayMinus1.resize( numDU );
+      const uint32_t maxNumSubLayers = slice->getSPS()->getMaxTLayers();
+      pictureTimingSEI->m_duCpbRemovalDelayMinus1.resize( numDU * maxNumSubLayers );
+    }
+    const uint32_t cpbRemovalDelayLegth = m_HRD->getBufferingPeriodSEI()->m_cpbRemovalDelayLength;
+    const uint32_t maxNumSubLayers = slice->getSPS()->getMaxTLayers();
+    pictureTimingSEI->m_auCpbRemovalDelay[maxNumSubLayers-1] = std::min<int>(std::max<int>(1, m_totalCoded[maxNumSubLayers-1] - m_lastBPSEI[maxNumSubLayers-1]), static_cast<int>(pow(2, static_cast<double>(cpbRemovalDelayLegth)))); // Syntax element signalled as minus, hence the .
+    CHECK( (m_totalCoded[maxNumSubLayers-1] - m_lastBPSEI[maxNumSubLayers-1]) > pow(2, static_cast<double>(cpbRemovalDelayLegth)), " cpbRemovalDelayLegth too small for m_auCpbRemovalDelay[pt_max_sub_layers_minus1] at picture timing SEI " );
+    const uint32_t temporalId = slice->getTLayer();
+    for( int i = temporalId ; i < maxNumSubLayers - 1 ; i ++ )
+    {
+      int indexWithinGOP = (m_totalCoded[maxNumSubLayers - 1] - m_lastBPSEI[maxNumSubLayers - 1]) % m_pcCfg->getGOPSize();
+      pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[i] = true;
+      if( ((m_rapWithLeading == true) && (indexWithinGOP == 0)) || (m_totalCoded[maxNumSubLayers - 1] == 0) || m_bufferingPeriodSEIPresentInAU)
+      {
+        pictureTimingSEI->m_cpbRemovalDelayDeltaEnabledFlag[i] = false;
+      }
+      else
+      {
+        pictureTimingSEI->m_cpbRemovalDelayDeltaEnabledFlag[i] = m_HRD->getBufferingPeriodSEI()->m_cpbRemovalDelayDeltasPresentFlag;
+      }
+      if( pictureTimingSEI->m_cpbRemovalDelayDeltaEnabledFlag[i] )
+      {
+        if( m_rapWithLeading == false )
+        {
+          switch (m_pcCfg->getGOPSize())
+          {
+            case 8:
+            {
+              if((indexWithinGOP == 1 && i == 2))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0;
+              }
+              else if((indexWithinGOP == 2 && i == 2) || (indexWithinGOP == 6 && i == 2))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1;
+              }
+              else if((indexWithinGOP == 1 && i == 1) || (indexWithinGOP == 3 && i == 2))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2;
+              }
+              else if(indexWithinGOP == 2 && i == 1)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 3;
+              }
+              else if(indexWithinGOP == 1 && i == 0)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 4;
+              }
+              else
+              {
+                THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size");
+              }
+            }
+              break;
+            case 16:
+            {
+              if((indexWithinGOP == 1 && i == 3))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0;
+              }
+              else if((indexWithinGOP == 2 && i == 3) || (indexWithinGOP == 10 && i == 3) || (indexWithinGOP == 14 && i == 3))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1;
+              }
+              else if((indexWithinGOP == 1 && i == 2) || (indexWithinGOP == 3 && i == 3) || (indexWithinGOP == 7 && i == 3) || (indexWithinGOP == 11 && i == 3))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2;
+              }
+              else if(indexWithinGOP == 4 && i == 3)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 3;
+              }
+              else if((indexWithinGOP == 2 && i == 2) || (indexWithinGOP == 10 && i == 2))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 4;
+              }
+              else if(indexWithinGOP == 1 && i == 1)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 5;
+              }
+              else if(indexWithinGOP == 3 && i == 2)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 6;
+              }
+              else if(indexWithinGOP == 2 && i == 1)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 7;
+              }
+              else if(indexWithinGOP == 1 && i == 0)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 8;
+              }
+              else
+              {
+                THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size");
+              }
+            }
+              break;
+            default:
+            {
+              THROW("m_cpbRemovalDelayDeltaIdx not supported for the current GOP size");
+            }
+              break;
+          }
+        }
+        else
+        {
+          switch (m_pcCfg->getGOPSize())
+          {
+            case 8:
+            {
+              if((indexWithinGOP == 1 && i == 2) || (indexWithinGOP == 5 && i == 2))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0;
+              }
+              else if(indexWithinGOP == 2 && i == 2)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1;
+              }
+              else if(indexWithinGOP == 1 && i == 1)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2;
+              }
+              else
+              {
+                THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size");
+              }
+            }
+              break;
+            case 16:
+            {
+              if((indexWithinGOP == 1 && i == 3) || (indexWithinGOP == 9 && i == 3) || (indexWithinGOP == 13 && i == 3))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 0;
+              }
+              else if((indexWithinGOP == 2 && i == 3) || (indexWithinGOP == 6 && i == 3) || (indexWithinGOP == 10 && i == 3))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 1;
+              }
+              else if((indexWithinGOP == 1 && i == 2) || (indexWithinGOP == 9 && i == 2) || (indexWithinGOP == 3 && i == 3))
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 2;
+              }
+              else if(indexWithinGOP == 2 && i == 2)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 3;
+              }
+              else if(indexWithinGOP == 1 && i == 1)
+              {
+                pictureTimingSEI->m_cpbRemovalDelayDeltaIdx[i] = 4;
+              }
+              else
+              {
+                THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size");
+              }
+            }
+              break;
+            default:
+            {
+              THROW("m_cpbRemovalDelayDeltaIdx not applicable for the sub-layer and GOP size");
+            }
+              break;
+          }
+        }
+      }
+      else
+      {
+        int scaledDistToBuffPeriod = (m_totalCoded[i] - m_lastBPSEI[i]) * static_cast<int>(pow(2, static_cast<double>(maxNumSubLayers - 1 - i)));
+        pictureTimingSEI->m_auCpbRemovalDelay[i] = std::min<int>(std::max<int>(1, scaledDistToBuffPeriod), static_cast<int>(pow(2, static_cast<double>(cpbRemovalDelayLegth)))); // Syntax element signalled as minus, hence the .
+        CHECK( (scaledDistToBuffPeriod) > pow(2, static_cast<double>(cpbRemovalDelayLegth)), " cpbRemovalDelayLegth too small for m_auCpbRemovalDelay[i] at picture timing SEI " );
+      }
     }
-    pictureTimingSEI->m_auCpbRemovalDelay = std::min<int>(std::max<int>(1, m_totalCoded - m_lastBPSEI), static_cast<int>(pow(2, static_cast<double>(hrd->getCpbRemovalDelayLengthMinus1()+1)))); // Syntax element signalled as minus, hence the .
-    pictureTimingSEI->m_picDpbOutputDelay = slice->getSPS()->getNumReorderPics(slice->getSPS()->getMaxTLayers()-1) + slice->getPOC() - m_totalCoded;
+    pictureTimingSEI->m_picDpbOutputDelay = slice->getSPS()->getNumReorderPics(slice->getSPS()->getMaxTLayers()-1) + slice->getPOC() - m_totalCoded[maxNumSubLayers-1];
     if(m_pcCfg->getEfficientFieldIRAPEnabled() && IRAPGOPid > 0 && IRAPGOPid < m_iGopSize)
     {
       // if pictures have been swapped there is likely one more picture delay on their tid. Very rough approximation
@@ -671,93 +1044,39 @@ void EncGOP::xCreatePictureTimingSEI  (int IRAPGOPid, SEIMessages& seiMessages,
     }
     if (m_bufferingPeriodSEIPresentInAU)
     {
-      m_lastBPSEI = m_totalCoded;
-    }
-
-    if( hrd->getSubPicCpbParamsPresentFlag() )
-    {
-      int i;
-      uint64_t ui64Tmp;
-      uint32_t uiPrev = 0;
-      uint32_t numDU = ( pictureTimingSEI->m_numDecodingUnitsMinus1 + 1 );
-      std::vector<uint32_t> &rDuCpbRemovalDelayMinus1 = pictureTimingSEI->m_duCpbRemovalDelayMinus1;
-      uint32_t maxDiff = ( hrd->getTickDivisorMinus2() + 2 ) - 1;
-
-      for( i = 0; i < numDU; i ++ )
-      {
-        pictureTimingSEI->m_numNalusInDuMinus1[ i ]       = ( i == 0 ) ? ( duData[i].accumNalsDU - 1 ) : ( duData[i].accumNalsDU- duData[i-1].accumNalsDU - 1 );
-      }
-
-      if( numDU == 1 )
+      for( int i = temporalId ; i < maxNumSubLayers ; i ++ )
       {
-        rDuCpbRemovalDelayMinus1[ 0 ] = 0; /* don't care */
+        m_lastBPSEI[i] = m_totalCoded[i];
       }
-      else
+      if( (slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL)||(slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA) )
       {
-        rDuCpbRemovalDelayMinus1[ numDU - 1 ] = 0;/* by definition */
-        uint32_t tmp = 0;
-        uint32_t accum = 0;
-
-        for( i = ( numDU - 2 ); i >= 0; i -- )
-        {
-          ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU  - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) );
-          if( (uint32_t)ui64Tmp > maxDiff )
-          {
-            tmp ++;
-          }
-        }
-        uiPrev = 0;
-
-        uint32_t flag = 0;
-        for( i = ( numDU - 2 ); i >= 0; i -- )
-        {
-          flag = 0;
-          ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU  - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) );
-
-          if( (uint32_t)ui64Tmp > maxDiff )
-          {
-            if(uiPrev >= maxDiff - tmp)
-            {
-              ui64Tmp = uiPrev + 1;
-              flag = 1;
-            }
-            else                            ui64Tmp = maxDiff - tmp + 1;
-          }
-          rDuCpbRemovalDelayMinus1[ i ] = (uint32_t)ui64Tmp - uiPrev - 1;
-          if( (int)rDuCpbRemovalDelayMinus1[ i ] < 0 )
-          {
-            rDuCpbRemovalDelayMinus1[ i ] = 0;
-          }
-          else if (tmp > 0 && flag == 1)
-          {
-            tmp --;
-          }
-          accum += rDuCpbRemovalDelayMinus1[ i ] + 1;
-          uiPrev = accum;
-        }
+        m_rapWithLeading = true;
       }
     }
 
+
     if( m_pcCfg->getPictureTimingSEIEnabled() )
     {
-      pictureTimingSEI->m_picStruct = (isField && slice->getPic()->topField)? 1 : isField? 2 : 0;
       seiMessages.push_back(pictureTimingSEI);
 
+#if HEVC_SEI
       if ( m_pcCfg->getScalableNestingSEIEnabled() ) // put picture timing SEI into scalable nesting SEI
       {
         SEIPictureTiming *pictureTimingSEIcopy = new SEIPictureTiming();
         pictureTimingSEI->copyTo(*pictureTimingSEIcopy);
         nestedSeiMessages.push_back(pictureTimingSEIcopy);
       }
+#endif
     }
 
-    if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && hrd->getSubPicCpbParamsPresentFlag() )
+    if( m_pcCfg->getDecodingUnitInfoSEIEnabled() && hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
     {
       for( int i = 0; i < ( pictureTimingSEI->m_numDecodingUnitsMinus1 + 1 ); i ++ )
       {
         SEIDecodingUnitInfo *duInfoSEI = new SEIDecodingUnitInfo();
         duInfoSEI->m_decodingUnitIdx = i;
-        duInfoSEI->m_duSptCpbRemovalDelay = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i] + 1;
+        for( int j = temporalId; j <= maxNumSubLayers; j++ )
+          duInfoSEI->m_duSptCpbRemovalDelayIncrement[j] = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i*maxNumSubLayers+j] + 1;
         duInfoSEI->m_dpbOutputDuDelayPresentFlag = false;
         duInfoSEI->m_picSptDpbOutputDuDelay = picSptDpbOutputDuDelay;
 
@@ -815,9 +1134,8 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
   {
     return;
   }
-  const VUI *vui = sps->getVuiParameters();
-  const HRD *hrd = vui->getHrdParameters();
-  if( hrd->getSubPicCpbParamsPresentFlag() )
+  const HRDParameters *hrd = sps->getHrdParameters();
+  if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
   {
     int i;
     uint64_t ui64Tmp;
@@ -826,6 +1144,10 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
     std::vector<uint32_t> &rDuCpbRemovalDelayMinus1 = pictureTimingSEI->m_duCpbRemovalDelayMinus1;
     uint32_t maxDiff = ( hrd->getTickDivisorMinus2() + 2 ) - 1;
 
+    int maxNumSubLayers = sps->getMaxTLayers();
+    for( int j = 0; j < maxNumSubLayers - 1; j++ )
+      pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[j] = false;
+
     for( i = 0; i < numDU; i ++ )
     {
       pictureTimingSEI->m_numNalusInDuMinus1[ i ]       = ( i == 0 ) ? ( duData[i].accumNalsDU - 1 ) : ( duData[i].accumNalsDU- duData[i-1].accumNalsDU - 1 );
@@ -833,17 +1155,17 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
 
     if( numDU == 1 )
     {
-      rDuCpbRemovalDelayMinus1[ 0 ] = 0; /* don't care */
+      rDuCpbRemovalDelayMinus1[ 0 + maxNumSubLayers - 1 ] = 0; /* don't care */
     }
     else
     {
-      rDuCpbRemovalDelayMinus1[ numDU - 1 ] = 0;/* by definition */
+      rDuCpbRemovalDelayMinus1[ (numDU - 1) * maxNumSubLayers + maxNumSubLayers - 1 ] = 0;/* by definition */
       uint32_t tmp = 0;
       uint32_t accum = 0;
 
       for( i = ( numDU - 2 ); i >= 0; i -- )
       {
-        ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU  - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) );
+        ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU  - duData[i].accumBitsDU ) * ( sps->getTimingInfo()->getTimeScale() / sps->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) );
         if( (uint32_t)ui64Tmp > maxDiff )
         {
           tmp ++;
@@ -855,7 +1177,7 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
       for( i = ( numDU - 2 ); i >= 0; i -- )
       {
         flag = 0;
-        ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU  - duData[i].accumBitsDU ) * ( vui->getTimingInfo()->getTimeScale() / vui->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) );
+        ui64Tmp = ( ( ( duData[numDU - 1].accumBitsDU  - duData[i].accumBitsDU ) * ( sps->getTimingInfo()->getTimeScale() / sps->getTimingInfo()->getNumUnitsInTick() ) * ( hrd->getTickDivisorMinus2() + 2 ) ) / ( m_pcCfg->getTargetBitrate() ) );
 
         if( (uint32_t)ui64Tmp > maxDiff )
         {
@@ -866,22 +1188,22 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
           }
           else                            ui64Tmp = maxDiff - tmp + 1;
         }
-        rDuCpbRemovalDelayMinus1[ i ] = (uint32_t)ui64Tmp - uiPrev - 1;
-        if( (int)rDuCpbRemovalDelayMinus1[ i ] < 0 )
+        rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] = (uint32_t)ui64Tmp - uiPrev - 1;
+        if( (int)rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] < 0 )
         {
-          rDuCpbRemovalDelayMinus1[ i ] = 0;
+          rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] = 0;
         }
         else if (tmp > 0 && flag == 1)
         {
           tmp --;
         }
-        accum += rDuCpbRemovalDelayMinus1[ i ] + 1;
+        accum += rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] + 1;
         uiPrev = accum;
       }
     }
   }
 }
-void EncGOP::xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI)
+void EncGOP::xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI, int maxSubLayers)
 {
   if (duInfoSeiMessages.empty() || (pictureTimingSEI == NULL))
   {
@@ -894,7 +1216,11 @@ void EncGOP::xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *
   {
     SEIDecodingUnitInfo *duInfoSEI = (SEIDecodingUnitInfo*) (*du);
     duInfoSEI->m_decodingUnitIdx = i;
-    duInfoSEI->m_duSptCpbRemovalDelay = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i] + 1;
+    for ( int j = 0; j < maxSubLayers; j++ )
+    {
+      duInfoSEI->m_duiSubLayerDelaysPresentFlag[j] = pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[j];
+      duInfoSEI->m_duSptCpbRemovalDelayIncrement[j] = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i*maxSubLayers+j] + 1;
+    }
     duInfoSEI->m_dpbOutputDuDelayPresentFlag = false;
     i++;
   }
@@ -908,10 +1234,10 @@ cabac_zero_word_padding(Slice *const pcSlice, Picture *const pcPic, const std::s
   const int log2subWidthCxsubHeightC = (::getComponentScaleX(COMPONENT_Cb, format)+::getComponentScaleY(COMPONENT_Cb, format));
   const int minCuWidth  = pcPic->cs->pcv->minCUWidth;
   const int minCuHeight = pcPic->cs->pcv->minCUHeight;
-  const int paddedWidth = ((sps.getPicWidthInLumaSamples()  + minCuWidth  - 1) / minCuWidth) * minCuWidth;
-  const int paddedHeight= ((sps.getPicHeightInLumaSamples() + minCuHeight - 1) / minCuHeight) * minCuHeight;
+  const int paddedWidth = ( ( pcSlice->getPPS()->getPicWidthInLumaSamples() + minCuWidth - 1 ) / minCuWidth ) * minCuWidth;
+  const int paddedHeight = ( ( pcSlice->getPPS()->getPicHeightInLumaSamples() + minCuHeight - 1 ) / minCuHeight ) * minCuHeight;
   const int rawBits = paddedWidth * paddedHeight *
-                         (sps.getBitDepth(CHANNEL_TYPE_LUMA) + 2*(sps.getBitDepth(CHANNEL_TYPE_CHROMA)>>log2subWidthCxsubHeightC));
+                         (sps.getBitDepth(CHANNEL_TYPE_LUMA) + ((2*sps.getBitDepth(CHANNEL_TYPE_CHROMA))>>log2subWidthCxsubHeightC));
   const std::size_t threshold = (32/3)*numBytesInVclNalUnits + (rawBits/32);
   if (binCountsInNalUnits >= threshold)
   {
@@ -985,11 +1311,7 @@ void EfficientFieldIRAPMapping::initialize(const bool isField, const int gopSize
 
       // check if POC corresponds to IRAP
       NalUnitType tmpUnitType = pEncGop->getNalUnitType(pocCurr, lastIDR, isField);
-#if !JVET_M0101_HLS
-      if(tmpUnitType >= NAL_UNIT_CODED_SLICE_BLA_W_LP && tmpUnitType <= NAL_UNIT_CODED_SLICE_CRA) // if picture is an IRAP
-#else
       if (tmpUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && tmpUnitType <= NAL_UNIT_CODED_SLICE_CRA) // if picture is an IRAP
-#endif
       {
         if(pocCurr%2 == 0 && iGOPid < gopSize-1 && pCfg->getGOPEntry(iGOPid).m_POC == pCfg->getGOPEntry(iGOPid+1).m_POC-1)
         { // if top field and following picture in enc order is associated bottom field
@@ -1074,71 +1396,11 @@ int EfficientFieldIRAPMapping::restoreGOPid(const int GOPid)
 }
 
 
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-static uint32_t calculateCollocatedFromL0Flag(const Slice *pSlice)
-{
-  const int refIdx = 0; // Zero always assumed
-  const Picture *refPicL0 = pSlice->getRefPic(REF_PIC_LIST_0, refIdx);
-  const Picture *refPicL1 = pSlice->getRefPic(REF_PIC_LIST_1, refIdx);
-  return refPicL0->slices[0]->getSliceQp() > refPicL1->slices[0]->getSliceQp();
-}
-#else
-static uint32_t calculateCollocatedFromL1Flag(EncCfg *pCfg, const int GOPid, const int gopSize)
+static void
+printHash(const HashType hashType, const std::string &digestStr)
 {
-  int iCloseLeft=1, iCloseRight=-1;
-  for(int i = 0; i<pCfg->getGOPEntry(GOPid).m_numRefPics; i++)
-  {
-    int iRef = pCfg->getGOPEntry(GOPid).m_referencePics[i];
-    if(iRef>0&&(iRef<iCloseRight||iCloseRight==-1))
-    {
-      iCloseRight=iRef;
-    }
-    else if(iRef<0&&(iRef>iCloseLeft||iCloseLeft==1))
-    {
-      iCloseLeft=iRef;
-    }
-  }
-  if(iCloseRight>-1)
-  {
-    iCloseRight=iCloseRight+pCfg->getGOPEntry(GOPid).m_POC-1;
-  }
-  if(iCloseLeft<1)
-  {
-    iCloseLeft=iCloseLeft+pCfg->getGOPEntry(GOPid).m_POC-1;
-    while(iCloseLeft<0)
-    {
-      iCloseLeft+=gopSize;
-    }
-  }
-  int iLeftQP=0, iRightQP=0;
-  for(int i=0; i<gopSize; i++)
-  {
-    if(pCfg->getGOPEntry(i).m_POC==(iCloseLeft%gopSize)+1)
-    {
-      iLeftQP= pCfg->getGOPEntry(i).m_QPOffset;
-    }
-    if (pCfg->getGOPEntry(i).m_POC==(iCloseRight%gopSize)+1)
-    {
-      iRightQP=pCfg->getGOPEntry(i).m_QPOffset;
-    }
-  }
-  if(iCloseRight>-1&&iRightQP<iLeftQP)
-  {
-    return 0;
-  }
-  else
-  {
-    return 1;
-  }
-}
-#endif
-
-
-static void
-printHash(const HashType hashType, const std::string &digestStr)
-{
-  const char *decodedPictureHashModeName;
-  switch (hashType)
+  const char *decodedPictureHashModeName;
+  switch (hashType)
   {
     case HASHTYPE_MD5:
       decodedPictureHashModeName = "MD5";
@@ -1295,8 +1557,7 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict
             // patch IDR-slice to CRA-Intra-slice
             pcPic->slices[ i ]->setNalUnitType    ( slice0.getNalUnitType()    );
             pcPic->slices[ i ]->setLastIDR        ( slice0.getLastIDR()        );
-            pcPic->slices[ i ]->setEnableTMVPFlag ( slice0.getEnableTMVPFlag() );
-            if ( slice0.getEnableTMVPFlag() )
+            if ( pcPic->cs->picHeader->getEnableTMVPFlag() )
             {
               pcPic->slices[ i ]->setColFromL0Flag( slice0.getColFromL0Flag()  );
               pcPic->slices[ i ]->setColRefIdx    ( slice0.getColRefIdx()      );
@@ -1337,6 +1598,326 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict
   }
 }
 
+void EncGOP::xPicInitHashME( Picture *pic, const PPS *pps, PicList &rcListPic )
+{
+  if (! m_pcCfg->getUseHashME())
+  {
+    return;
+  }
+
+  PicList::iterator iterPic = rcListPic.begin();
+  while (iterPic != rcListPic.end())
+  {
+    Picture* refPic = *(iterPic++);
+
+    if (refPic->poc != pic->poc && refPic->referenced)
+    {
+      if (!refPic->getHashMap()->isInitial())
+      {
+        if (refPic->getPOC() == 0)
+        {
+          Pel* picSrc = refPic->getOrigBuf().get(COMPONENT_Y).buf;
+          int stridePic = refPic->getOrigBuf().get(COMPONENT_Y).stride;
+          int picWidth = pps->getPicWidthInLumaSamples();
+          int picHeight = pps->getPicHeightInLumaSamples();
+          int blockSize = 4;
+          int allNum = 0;
+          int simpleNum = 0;
+          for (int j = 0; j <= picHeight - blockSize; j += blockSize)
+          {
+            for (int i = 0; i <= picWidth - blockSize; i += blockSize)
+            {
+              Pel* curBlock = picSrc + j * stridePic + i;
+              bool isHorSame = true;
+              for (int m = 0; m < blockSize&&isHorSame; m++)
+              {
+                for (int n = 1; n < blockSize&&isHorSame; n++)
+                {
+                  if (curBlock[m*stridePic] != curBlock[m*stridePic + n])
+                  {
+                    isHorSame = false;
+                  }
+                }
+              }
+              bool isVerSame = true;
+              for (int m = 1; m < blockSize&&isVerSame; m++)
+              {
+                for (int n = 0; n < blockSize&&isVerSame; n++)
+                {
+                  if (curBlock[n] != curBlock[m*stridePic + n])
+                  {
+                    isVerSame = false;
+                  }
+                }
+              }
+              allNum++;
+              if (isHorSame || isVerSame)
+              {
+                simpleNum++;
+              }
+            }
+          }
+
+          if (simpleNum < 0.3*allNum)
+          {
+            m_pcCfg->setUseHashME(false);
+            break;
+          }
+        }
+        refPic->addPictureToHashMapForInter();
+      }
+    }
+  }
+}
+
+void EncGOP::xPicInitRateControl(int &estimatedBits, int gopId, double &lambda, Picture *pic, Slice *slice)
+{
+  if ( !m_pcCfg->getUseRateCtrl() ) // TODO: does this work with multiple slices and slice-segments?
+  {
+    return;
+  }
+  int frameLevel = m_pcRateCtrl->getRCSeq()->getGOPID2Level( gopId );
+  if ( pic->slices[0]->isIRAP() )
+  {
+    frameLevel = 0;
+  }
+  m_pcRateCtrl->initRCPic( frameLevel );
+  estimatedBits = m_pcRateCtrl->getRCPic()->getTargetBits();
+
+#if U0132_TARGET_BITS_SATURATION
+  if (m_pcRateCtrl->getCpbSaturationEnabled() && frameLevel != 0)
+  {
+    int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate();
+
+    // prevent overflow
+    if (estimatedCpbFullness - estimatedBits > (int)(m_pcRateCtrl->getCpbSize()*0.9f))
+    {
+      estimatedBits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f);
+    }
+
+    estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate();
+    // prevent underflow
+#if V0078_ADAPTIVE_LOWER_BOUND
+    if (estimatedCpbFullness - estimatedBits < m_pcRateCtrl->getRCPic()->getLowerBound())
+    {
+      estimatedBits = std::max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound());
+    }
+#else
+    if (estimatedCpbFullness - estimatedBits < (int)(m_pcRateCtrl->getCpbSize()*0.1f))
+    {
+      estimatedBits = std::max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f));
+    }
+#endif
+
+    m_pcRateCtrl->getRCPic()->setTargetBits(estimatedBits);
+  }
+#endif
+
+  int sliceQP = m_pcCfg->getInitialQP();
+  if ( ( slice->getPOC() == 0 && m_pcCfg->getInitialQP() > 0 ) || ( frameLevel == 0 && m_pcCfg->getForceIntraQP() ) ) // QP is specified
+  {
+    int    NumberBFrames = ( m_pcCfg->getGOPSize() - 1 );
+    double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)NumberBFrames );
+    double dQPFactor     = 0.57*dLambda_scale;
+    int    SHIFT_QP      = 12;
+    int bitdepth_luma_qp_scale = 6 * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
+                                - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
+    double qp_temp = (double) sliceQP + bitdepth_luma_qp_scale - SHIFT_QP;
+    lambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
+  }
+  else if ( frameLevel == 0 )   // intra case, but use the model
+  {
+    m_pcSliceEncoder->calCostSliceI(pic); // TODO: This only analyses the first slice segment - what about the others?
+
+    if ( m_pcCfg->getIntraPeriod() != 1 )   // do not refine allocated bits for all intra case
+    {
+      int bits = m_pcRateCtrl->getRCSeq()->getLeftAverageBits();
+      bits = m_pcRateCtrl->getRCPic()->getRefineBitsForIntra( bits );
+
+#if U0132_TARGET_BITS_SATURATION
+      if (m_pcRateCtrl->getCpbSaturationEnabled() )
+      {
+        int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate();
+
+        // prevent overflow
+        if (estimatedCpbFullness - bits > (int)(m_pcRateCtrl->getCpbSize()*0.9f))
+        {
+          bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f);
+        }
+
+        estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate();
+        // prevent underflow
+#if V0078_ADAPTIVE_LOWER_BOUND
+        if (estimatedCpbFullness - bits < m_pcRateCtrl->getRCPic()->getLowerBound())
+        {
+          bits = estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound();
+        }
+#else
+        if (estimatedCpbFullness - bits < (int)(m_pcRateCtrl->getCpbSize()*0.1f))
+        {
+          bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f);
+        }
+#endif
+      }
+#endif
+
+      if ( bits < 200 )
+      {
+        bits = 200;
+      }
+      m_pcRateCtrl->getRCPic()->setTargetBits( bits );
+    }
+
+    list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList();
+    m_pcRateCtrl->getRCPic()->getLCUInitTargetBits();
+    lambda  = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, slice->isIRAP());
+    sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture );
+  }
+  else    // normal case
+  {
+    list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList();
+    lambda  = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, slice->isIRAP());
+    sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture );
+  }
+
+  sliceQP = Clip3( -slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, sliceQP );
+  m_pcRateCtrl->getRCPic()->setPicEstQP( sliceQP );
+
+  m_pcSliceEncoder->resetQP( pic, sliceQP, lambda );
+}
+
+void EncGOP::xPicInitLMCS(Picture *pic, PicHeader *picHeader, Slice *slice)
+{
+  if (slice->getSPS()->getUseLmcs())
+  {
+    const SliceType sliceType = slice->getSliceType();
+
+    m_pcReshaper->getReshapeCW()->rspTid = slice->getTLayer() + (slice->isIntra() ? 0 : 1);
+    m_pcReshaper->getReshapeCW()->rspSliceQP = slice->getSliceQp();
+
+    m_pcReshaper->setSrcReshaped(false);
+    m_pcReshaper->setRecReshaped(true);
+
+    m_pcReshaper->getSliceReshaperInfo().chrResScalingOffset = m_pcCfg->getReshapeCSoffset();
+
+    if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ)
+    {
+      m_pcReshaper->preAnalyzerHDR(pic, sliceType, m_pcCfg->getReshapeCW(), m_pcCfg->getDualITree());
+    }
+    else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG)
+    {
+      m_pcReshaper->preAnalyzerLMCS(pic, m_pcCfg->getReshapeSignalType(), sliceType, m_pcCfg->getReshapeCW());
+    }
+    else
+    {
+      THROW("Reshaper for other signal currently not defined!");
+    }
+
+    if (sliceType == I_SLICE )
+    {
+      if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ)
+      {
+        m_pcReshaper->initLUTfromdQPModel();
+        m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTableChromaMD(m_pcReshaper->getInvLUT());
+      }
+      else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG)
+      {
+        if (m_pcReshaper->getReshapeFlag())
+        {
+          m_pcReshaper->constructReshaperLMCS();
+          m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight());
+        }
+      }
+      else
+      {
+        THROW("Reshaper for other signal currently not defined!");
+      }
+
+      m_pcReshaper->setCTUFlag(false);
+
+      //reshape original signal
+      if (m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper())
+      {
+        pic->getOrigBuf(COMPONENT_Y).rspSignal(m_pcReshaper->getFwdLUT());
+        m_pcReshaper->setSrcReshaped(true);
+        m_pcReshaper->setRecReshaped(true);
+      }
+    }
+    else
+    {
+      if (!m_pcReshaper->getReshapeFlag())
+      {
+        m_pcReshaper->setCTUFlag(false);
+      }
+      else
+        m_pcReshaper->setCTUFlag(true);
+
+      m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(false);
+
+      if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ)
+      {
+        m_pcEncLib->getRdCost()->restoreReshapeLumaLevelToWeightTable();
+      }
+      else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG)
+      {
+        int modIP = pic->getPOC() - pic->getPOC() / m_pcCfg->getReshapeCW().rspFpsToIp * m_pcCfg->getReshapeCW().rspFpsToIp;
+        if (m_pcReshaper->getReshapeFlag() && m_pcCfg->getReshapeCW().updateCtrl == 2 && modIP == 0)
+        {
+          m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(true);
+          m_pcReshaper->constructReshaperLMCS();
+          m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight());
+        }
+      }
+      else
+      {
+        THROW("Reshaper for other signal currently not defined!");
+      }
+    }
+
+    //set all necessary information in LMCS APS and picture header
+    picHeader->setLmcsEnabledFlag(m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper());
+    picHeader->setLmcsChromaResidualScaleFlag(m_pcReshaper->getSliceReshaperInfo().getSliceReshapeChromaAdj() == 1);
+    if (m_pcReshaper->getSliceReshaperInfo().getSliceReshapeModelPresentFlag())
+    {
+      int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) );
+      picHeader->setLmcsAPSId(apsId);
+      APS* lmcsAPS = picHeader->getLmcsAPS();
+      if (lmcsAPS == nullptr)
+      {
+        ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
+        lmcsAPS = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+        if (lmcsAPS == NULL)
+        {
+          lmcsAPS = apsMap->allocatePS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+          lmcsAPS->setAPSId(apsId);
+          lmcsAPS->setAPSType(LMCS_APS);
+        }
+        picHeader->setLmcsAPS(lmcsAPS);
+      }
+      //m_pcReshaper->copySliceReshaperInfo(lmcsAPS->getReshaperAPSInfo(), m_pcReshaper->getSliceReshaperInfo());
+      SliceReshapeInfo& tInfo = lmcsAPS->getReshaperAPSInfo();
+      SliceReshapeInfo& sInfo = m_pcReshaper->getSliceReshaperInfo();
+      tInfo.reshaperModelMaxBinIdx = sInfo.reshaperModelMaxBinIdx;
+      tInfo.reshaperModelMinBinIdx = sInfo.reshaperModelMinBinIdx;
+      memcpy(tInfo.reshaperModelBinCWDelta, sInfo.reshaperModelBinCWDelta, sizeof(int)*(PIC_CODE_CW_BINS));
+      tInfo.maxNbitsNeededDeltaCW = sInfo.maxNbitsNeededDeltaCW;
+      tInfo.chrResScalingOffset = sInfo.chrResScalingOffset;
+      m_pcEncLib->getApsMap()->setChangedFlag((lmcsAPS->getAPSId() << NUM_APS_TYPE_LEN) + LMCS_APS);
+    }
+
+
+    if (picHeader->getLmcsEnabledFlag())
+    {
+      int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) );
+      picHeader->setLmcsAPSId(apsId);
+    }
+  }
+  else
+  {
+    m_pcReshaper->setCTUFlag(false);
+  }
+}
+
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
@@ -1344,19 +1925,20 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
                           std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                           bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE
                         , bool isEncodeLtRef
+                        , const int picIdInGOP
 )
 {
   // TODO: Split this function up.
 
   Picture*        pcPic = NULL;
+  PicHeader*      picHeader = NULL;
   Slice*      pcSlice;
   OutputBitstream  *pcBitstreamRedirect;
   pcBitstreamRedirect = new OutputBitstream;
   AccessUnit::iterator  itLocationToPushSliceHeaderNALU; // used to store location where NALU containing slice header is to be inserted
+  Picture* scaledRefPic[MAX_NUM_REF] = {};
 
-  xInitGOP(iPOCLast, iNumPicRcvd, isField
-         , isEncodeLtRef
-  );
+  xInitGOP( iPOCLast, iNumPicRcvd, isField, isEncodeLtRef );
 
   m_iNumPicCoded = 0;
   SEIMessages leadingSeiMessages;
@@ -1364,7 +1946,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
   SEIMessages duInfoSeiMessages;
   SEIMessages trailingSeiMessages;
   std::deque<DUData> duData;
-  SEIDecodingUnitInfo decodingUnitInfoSEI;
 
   EfficientFieldIRAPMapping effFieldIRAPMap;
   if (m_pcCfg->getEfficientFieldIRAPEnabled())
@@ -1372,14 +1953,17 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     effFieldIRAPMap.initialize(isField, m_iGopSize, iPOCLast, iNumPicRcvd, m_iLastIDR, this, m_pcCfg);
   }
 
-  // reset flag indicating whether pictures have been encoded
-  for ( int iGOPid=0; iGOPid < m_iGopSize; iGOPid++ )
+  if( isField && picIdInGOP == 0 )
   {
-    m_pcCfg->setEncodedFlag(iGOPid, false);
+    for( int iGOPid = 0; iGOPid < max(2, m_iGopSize); iGOPid++ )
+    {
+      m_pcCfg->setEncodedFlag( iGOPid, false );
+    }
   }
-
-  for ( int iGOPid=0; iGOPid < m_iGopSize; iGOPid++ )
+  for( int iGOPid = picIdInGOP; iGOPid <= picIdInGOP; iGOPid++ )
   {
+    // reset flag indicating whether pictures have been encoded
+    m_pcCfg->setEncodedFlag( iGOPid, false );
     if (m_pcCfg->getEfficientFieldIRAPEnabled())
     {
       iGOPid=effFieldIRAPMap.adjustGOPid(iGOPid);
@@ -1434,9 +2018,15 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     // start a new access unit: create an entry in the list of output access units
     AccessUnit accessUnit;
+    accessUnit.temporalId = m_pcCfg->getGOPEntry( iGOPid ).m_temporalId;
     xGetBuffer( rcListPic, rcListPicYuvRecOut,
                 iNumPicRcvd, iTimeOffset, pcPic, pocCurr, isField );
+    picHeader = pcPic->cs->picHeader;
+    picHeader->setSPSId( pcPic->cs->pps->getSPSId() );
+    picHeader->setPPSId( pcPic->cs->pps->getPPSId() );
+    picHeader->setSplitConsOverrideFlag(false);
 
+#if ER_CHROMA_QP_WCG_PPS
     // th this is a hot fix for the choma qp control
     if( m_pcEncLib->getWCGChromaQPControl().isEnabled() && m_pcEncLib->getSwitchPOC() != -1 )
     {
@@ -1449,16 +2039,23 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       // replace the pps with a more appropriated one
       pcPic->cs->pps = pPPS;
     }
+#endif
+
+    // create objects based on the picture size
+    const int picWidth = pcPic->cs->pps->getPicWidthInLumaSamples();
+    const int picHeight = pcPic->cs->pps->getPicHeightInLumaSamples();
+    const int maxCUWidth = pcPic->cs->sps->getMaxCUWidth();
+    const int maxCUHeight = pcPic->cs->sps->getMaxCUHeight();
+    const ChromaFormat chromaFormatIDC = pcPic->cs->sps->getChromaFormatIdc();
+    const int maxTotalCUDepth = pcPic->cs->sps->getMaxCodingDepth();
+
+    m_pcSliceEncoder->create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth );
 
-#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM
-    pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, m_pcCfg->getNumWppThreads(), m_pcCfg->getNumWppExtraLines(), m_pcCfg->getNumSplitThreads() );
-#elif ENABLE_SPLIT_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, 1                          , 0                             , m_pcCfg->getNumSplitThreads() );
-#elif ENABLE_WPP_PARALLELISM
-    pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, m_pcCfg->getNumWppThreads(), m_pcCfg->getNumWppExtraLines(), 1                             );
 #endif
     pcPic->createTempBuffers( pcPic->cs->pps->pcv->maxCUWidth );
-    pcPic->cs->createCoeffs();
+    pcPic->cs->createCoeffs((bool)pcPic->cs->sps->getPLTMode());
 
     //  Slice data initialization
     pcPic->clearSliceBuffer();
@@ -1477,15 +2074,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     pcPic->fieldPic = isField;
 #endif
 
-    int pocBits = pcSlice->getSPS()->getBitsForPOC();
-    int pocMask = (1 << pocBits) - 1;
-    pcSlice->setLastIDR(m_iLastIDR & ~pocMask);
-#if HEVC_DEPENDENT_SLICES
-    pcSlice->setSliceSegmentIdx(0);
-#endif
+    pcSlice->setLastIDR(m_iLastIDR);
     pcSlice->setIndependentSliceIdx(0);
-    //set default slice level flag to the same as SPS level flag
-    pcSlice->setLFCrossSliceBoundaryFlag(  pcSlice->getPPS()->getLoopFilterAcrossSlicesEnabledFlag()  );
 
     if(pcSlice->getSliceType()==B_SLICE&&m_pcCfg->getGOPEntry(iGOPid).m_sliceType=='P')
     {
@@ -1497,40 +2087,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     }
     // Set the nal unit type
     pcSlice->setNalUnitType(getNalUnitType(pocCurr, m_iLastIDR, isField));
-#if !JVET_M0101_HLS
-    if(pcSlice->getTemporalLayerNonReferenceFlag())
-    {
-      if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_TRAIL_R &&
-          !(m_iGopSize == 1 && pcSlice->getSliceType() == I_SLICE))
-        // Add this condition to avoid POC issues with encoder_intra_main.cfg configuration (see #1127 in bug tracker)
-      {
-        pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_TRAIL_N);
-      }
-      if(pcSlice->getNalUnitType()==NAL_UNIT_CODED_SLICE_RADL_R)
-      {
-        pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_RADL_N);
-      }
-      if(pcSlice->getNalUnitType()==NAL_UNIT_CODED_SLICE_RASL_R)
-      {
-        pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_RASL_N);
-      }
-    }
-#endif
 
     if (m_pcCfg->getEfficientFieldIRAPEnabled())
     {
-#if !JVET_M0101_HLS
-      if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA )  // IRAP picture
-#else
       if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
         || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP
         || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)  // IRAP picture
-#endif
       {
         m_associatedIRAPType = pcSlice->getNalUnitType();
         m_associatedIRAPPOC = pocCurr;
@@ -1558,26 +2120,17 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     if (m_pcCfg->getUseCompositeRef() && m_picBg->getSpliceFull() && getUseLTRef())
     {
-      m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, m_bgPOC);
+      m_pcEncLib->selectReferencePictureList(pcSlice, pocCurr, iGOPid, m_bgPOC);
     }
     else
     {
-      m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, -1);
+      m_pcEncLib->selectReferencePictureList(pcSlice, pocCurr, iGOPid, -1);
     }
     if (!m_pcCfg->getEfficientFieldIRAPEnabled())
     {
-#if !JVET_M0101_HLS
-      if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_RADL
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_N_LP
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP
-        || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA )  // IRAP picture
-#else
       if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL
         || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP
         || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)  // IRAP picture
-#endif
       {
         m_associatedIRAPType = pcSlice->getNalUnitType();
         m_associatedIRAPPOC = pocCurr;
@@ -1586,185 +2139,139 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       pcSlice->setAssociatedIRAPPOC(m_associatedIRAPPOC);
     }
 
-    if ((pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPS(), false, m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3) != 0) || (pcSlice->isIRAP())
-#if !JVET_M0101_HLS
-      || (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pcSlice->getAssociatedIRAPType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && pcSlice->getAssociatedIRAPType() <= NAL_UNIT_CODED_SLICE_CRA && pcSlice->getAssociatedIRAPPOC() == pcSlice->getPOC()+1)
-#else
-      || (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pcSlice->getAssociatedIRAPType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && pcSlice->getAssociatedIRAPType() <= NAL_UNIT_CODED_SLICE_CRA && pcSlice->getAssociatedIRAPPOC() == pcSlice->getPOC() + 1)
-#endif
+    pcSlice->setEnableDRAPSEI(m_pcEncLib->getDependentRAPIndicationSEIEnabled());
+    if (m_pcEncLib->getDependentRAPIndicationSEIEnabled())
+    {
+      // Only mark the picture as DRAP if all of the following applies:
+      //  1) DRAP indication SEI messages are enabled
+      //  2) The current picture is not an intra picture
+      //  3) The current picture is in the DRAP period
+      //  4) The current picture is a trailing picture
+      pcSlice->setDRAP(m_pcEncLib->getDependentRAPIndicationSEIEnabled() && m_pcEncLib->getDrapPeriod() > 0 && !pcSlice->isIntra() &&
+              pocCurr % m_pcEncLib->getDrapPeriod() == 0 && pocCurr > pcSlice->getAssociatedIRAPPOC());
+
+      if (pcSlice->isDRAP())
+      {
+        int pocCycle = 1 << (pcSlice->getSPS()->getBitsForPOC());
+        int deltaPOC = pocCurr > pcSlice->getAssociatedIRAPPOC() ? pocCurr - pcSlice->getAssociatedIRAPPOC() : pocCurr - ( pcSlice->getAssociatedIRAPPOC() & (pocCycle -1) );
+        CHECK(deltaPOC > (pocCycle >> 1), "Use a greater value for POC wraparound to enable a POC distance between IRAP and DRAP of " << deltaPOC << ".");
+        m_latestDRAPPOC = pocCurr;
+        pcSlice->setTLayer(0); // Force DRAP picture to have temporal layer 0
+      }
+      pcSlice->setLatestDRAPPOC(m_latestDRAPPOC);
+      pcSlice->setUseLTforDRAP(false); // When set, sets the associated IRAP as long-term in RPL0 at slice level, unless the associated IRAP is already included in RPL0 or RPL1 defined in SPS
+
+      PicList::iterator iterPic = rcListPic.begin();
+      Picture *rpcPic;
+      while (iterPic != rcListPic.end())
+      {
+        rpcPic = *(iterPic++);
+        if ( pcSlice->isDRAP() && rpcPic->getPOC() != pocCurr )
+        {
+            rpcPic->precedingDRAP = true;
+        }
+        else if ( !pcSlice->isDRAP() && rpcPic->getPOC() == pocCurr )
+        {
+          rpcPic->precedingDRAP = false;
+        }
+      }
+    }
+
+    if (pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPL0(), 0, false) != 0 || pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPL1(), 1, false) != 0 ||
+        (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && !pcSlice->isIRAP() && ( pcSlice->isDRAP() || !pcSlice->isPOCInRefPicList(pcSlice->getRPL0(), pcSlice->getAssociatedIRAPPOC())) )
+      || ( !pcSlice->isIRAP() && pcSlice->getPic()->cs->vps && m_pcEncLib->getNumRefLayers( pcSlice->getPic()->cs->vps->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) ) )
       )
     {
-      pcSlice->createExplicitReferencePictureSetFromReference(rcListPic, pcSlice->getRPS(), pcSlice->isIRAP(), m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3, m_pcCfg->getEfficientFieldIRAPEnabled()
-                                                            , isEncodeLtRef, m_pcCfg->getUseCompositeRef()
-      );
+      xCreateExplicitReferencePictureSetFromReference( pcSlice, rcListPic, pcSlice->getRPL0(), pcSlice->getRPL1() );
     }
 
-    pcSlice->applyReferencePictureSet(rcListPic, pcSlice->getRPS());
+    pcSlice->applyReferencePictureListBasedMarking( rcListPic, pcSlice->getRPL0(), pcSlice->getRPL1(), pcSlice->getPic()->layerId );
 
     if(pcSlice->getTLayer() > 0
-#if !JVET_M0101_HLS
-      &&  !( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_N     // Check if not a leading picture
-          || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL_R
-          || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_N
-          || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL_R )
-#else
       && !(pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL     // Check if not a leading picture
         || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL)
-#endif
         )
     {
-#if !JVET_M0101_HLS
-      if(pcSlice->isTemporalLayerSwitchingPoint(rcListPic) || pcSlice->getSPS()->getTemporalIdNestingFlag())
-      {
-        if(pcSlice->getTemporalLayerNonReferenceFlag())
-        {
-          pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_TSA_N);
-        }
-        else
-        {
-          pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_TSA_R);
-        }
-      }
-      else if(pcSlice->isStepwiseTemporalLayerSwitchingPointCandidate(rcListPic))
-#else
     if (pcSlice->isStepwiseTemporalLayerSwitchingPointCandidate(rcListPic))
-#endif
       {
         bool isSTSA=true;
+
+        if( !m_pcEncLib->getVPS()->getAllIndependentLayersFlag() && m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) )
+        {
+          isSTSA = false;
+        }
+
         for(int ii=iGOPid+1;(ii<m_pcCfg->getGOPSize() && isSTSA==true);ii++)
         {
-          int lTid= m_pcCfg->getGOPEntry(ii).m_temporalId;
-          if(lTid==pcSlice->getTLayer())
+          int lTid = m_pcCfg->getRPLEntry(0, ii).m_temporalId;
+
+          if (lTid == pcSlice->getTLayer())
           {
-            const ReferencePictureSet* nRPS = pcSlice->getSPS()->getRPSList()->getReferencePictureSet(ii);
-            for(int jj=0;jj<nRPS->getNumberOfPictures();jj++)
+            const ReferencePictureList* rpl0 = pcSlice->getSPS()->getRPLList0()->getReferencePictureList(ii);
+            for (int jj = 0; jj < pcSlice->getRPL0()->getNumberOfActivePictures(); jj++)
             {
-              if(nRPS->getUsed(jj))
+              int tPoc = m_pcCfg->getRPLEntry(0, ii).m_POC + rpl0->getRefPicIdentifier(jj);
+              int kk = 0;
+              for (kk = 0; kk<m_pcCfg->getGOPSize(); kk++)
               {
-                int tPoc=m_pcCfg->getGOPEntry(ii).m_POC+nRPS->getDeltaPOC(jj);
-                int kk=0;
-                for(kk=0;kk<m_pcCfg->getGOPSize();kk++)
+                if (m_pcCfg->getRPLEntry(0, kk).m_POC == tPoc)
                 {
-                  if(m_pcCfg->getGOPEntry(kk).m_POC==tPoc)
-                  {
-                    break;
-                  }
+                  break;
                 }
-                int tTid=m_pcCfg->getGOPEntry(kk).m_temporalId;
-                if(tTid >= pcSlice->getTLayer())
+              }
+              int tTid = m_pcCfg->getRPLEntry(0, kk).m_temporalId;
+              if (tTid >= pcSlice->getTLayer())
+              {
+                isSTSA = false;
+                break;
+              }
+            }
+            const ReferencePictureList* rpl1 = pcSlice->getSPS()->getRPLList1()->getReferencePictureList(ii);
+            for (int jj = 0; jj < pcSlice->getRPL1()->getNumberOfActivePictures(); jj++)
+            {
+              int tPoc = m_pcCfg->getRPLEntry(1, ii).m_POC + rpl1->getRefPicIdentifier(jj);
+              int kk = 0;
+              for (kk = 0; kk<m_pcCfg->getGOPSize(); kk++)
+              {
+                if (m_pcCfg->getRPLEntry(1, kk).m_POC == tPoc)
                 {
-                  isSTSA=false;
                   break;
                 }
               }
+              int tTid = m_pcCfg->getRPLEntry(1, kk).m_temporalId;
+              if (tTid >= pcSlice->getTLayer())
+              {
+                isSTSA = false;
+                break;
+              }
             }
           }
         }
         if(isSTSA==true)
         {
-#if !JVET_M0101_HLS
-          if(pcSlice->getTemporalLayerNonReferenceFlag())
-          {
-            pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_STSA_N);
-          }
-          else
-          {
-            pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_STSA_R);
-          }
-#else
           pcSlice->setNalUnitType(NAL_UNIT_CODED_SLICE_STSA);
-#endif
         }
       }
     }
-    if (pcSlice->getRPSidx() == -1)
-      arrangeLongtermPicturesInRPS(pcSlice, rcListPic);
-    RefPicListModification* refPicListModification = pcSlice->getRefPicListModification();
-    refPicListModification->setRefPicListModificationFlagL0(0);
-    refPicListModification->setRefPicListModificationFlagL1(0);
 
     if (m_pcCfg->getUseCompositeRef() && getUseLTRef() && (pocCurr > getLastLTRefPoc()))
     {
-      pcSlice->setNumRefIdx(REF_PIC_LIST_0, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures()));
-      pcSlice->setNumRefIdx(REF_PIC_LIST_1, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures()));
+      pcSlice->setNumRefIdx(REF_PIC_LIST_0, (pcSlice->isIntra()) ? 0 : min(m_pcCfg->getRPLEntry(0, iGOPid).m_numRefPicsActive + 1, pcSlice->getRPL0()->getNumberOfActivePictures()));
+      pcSlice->setNumRefIdx(REF_PIC_LIST_1, (!pcSlice->isInterB()) ? 0 : min(m_pcCfg->getRPLEntry(1, iGOPid).m_numRefPicsActive + 1, pcSlice->getRPL1()->getNumberOfActivePictures()));
     }
     else
     {
-      pcSlice->setNumRefIdx(REF_PIC_LIST_0, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures()));
-      pcSlice->setNumRefIdx(REF_PIC_LIST_1, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures()));
+      pcSlice->setNumRefIdx(REF_PIC_LIST_0, (pcSlice->isIntra()) ? 0 : pcSlice->getRPL0()->getNumberOfActivePictures());
+      pcSlice->setNumRefIdx(REF_PIC_LIST_1, (!pcSlice->isInterB()) ? 0 : pcSlice->getRPL1()->getNumberOfActivePictures());
     }
     if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef()) {
       arrangeCompositeReference(pcSlice, rcListPic, pocCurr);
     }
     //  Set reference list
-    pcSlice->setRefPicList ( rcListPic );
-
-    if (m_pcCfg->getUseHashME())
-    {
-      PicList::iterator iterPic = rcListPic.begin();
-      while (iterPic != rcListPic.end())
-      {
-        Picture* refPic = *(iterPic++);
-
-        if (refPic->poc != pcPic->poc && refPic->referenced)
-        {
-          if (!refPic->getHashMap()->isInitial())
-          {
-            if (refPic->getPOC() == 0)
-            {
-              Pel* picSrc = refPic->getOrigBuf().get(COMPONENT_Y).buf;
-              int stridePic = refPic->getOrigBuf().get(COMPONENT_Y).stride;
-              int picWidth = pcSlice->getSPS()->getPicWidthInLumaSamples();
-              int picHeight = pcSlice->getSPS()->getPicHeightInLumaSamples();
-              int blockSize = 4;
-              int allNum = 0;
-              int simpleNum = 0;
-              for (int j = 0; j <= picHeight - blockSize; j += blockSize)
-              {
-                for (int i = 0; i <= picWidth - blockSize; i += blockSize)
-                {
-                  Pel* curBlock = picSrc + j * stridePic + i;
-                  bool isHorSame = true;
-                  for (int m = 0; m < blockSize&&isHorSame; m++)
-                  {
-                    for (int n = 1; n < blockSize&&isHorSame; n++)
-                    {
-                      if (curBlock[m*stridePic] != curBlock[m*stridePic + n])
-                      {
-                        isHorSame = false;
-                      }
-                    }
-                  }
-                  bool isVerSame = true;
-                  for (int m = 1; m < blockSize&&isVerSame; m++)
-                  {
-                    for (int n = 0; n < blockSize&&isVerSame; n++)
-                    {
-                      if (curBlock[n] != curBlock[m*stridePic + n])
-                      {
-                        isVerSame = false;
-                      }
-                    }
-                  }
-                  allNum++;
-                  if (isHorSame || isVerSame)
-                  {
-                    simpleNum++;
-                  }
-                }
-              }
+    pcSlice->constructRefPicList(rcListPic);
+    
+    xPicInitHashME( pcPic, pcSlice->getPPS(), rcListPic );
 
-              if (simpleNum < 0.3*allNum)
-              {
-                m_pcCfg->setUseHashME(false);
-                break;
-              }
-            }
-            refPic->addPictureToHashMapForInter();
-          }
-        }
-      }
-    }
     if( m_pcCfg->getUseAMaxBT() )
     {
       if( !pcSlice->isIRAP() )
@@ -1781,19 +2288,19 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
         if( refLayer >= 0 && m_uiNumBlk[refLayer] != 0 )
         {
-          pcSlice->setSplitConsOverrideFlag(true);
+          picHeader->setSplitConsOverrideFlag(true);
           double dBlkSize = sqrt( ( double ) m_uiBlkSize[refLayer] / m_uiNumBlk[refLayer] );
-          if( dBlkSize < AMAXBT_TH32 )
+          if( dBlkSize < AMAXBT_TH32 || pcPic->cs->sps->getCTUSize()==32 )
           {
-            pcSlice->setMaxBTSize( 32 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 32 );
+            picHeader->setMaxBTSize( 1, 32 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 32 );
           }
-          else if( dBlkSize < AMAXBT_TH64 )
+          else if( dBlkSize < AMAXBT_TH64 || pcPic->cs->sps->getCTUSize()==64 )
           {
-            pcSlice->setMaxBTSize( 64 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 64 );
+            picHeader->setMaxBTSize( 1, 64 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 64 );
           }
           else
           {
-            pcSlice->setMaxBTSize( 128 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 128 );
+            picHeader->setMaxBTSize( 1, 128 > MAX_BT_SIZE_INTER ? MAX_BT_SIZE_INTER : 128 );
           }
 
           m_uiBlkSize[refLayer] = 0;
@@ -1832,12 +2339,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     if (pcSlice->getSliceType() == B_SLICE)
     {
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-      const uint32_t uiColFromL0 = calculateCollocatedFromL0Flag(pcSlice);
-      pcSlice->setColFromL0Flag(uiColFromL0);
-#else
-      pcSlice->setColFromL0Flag(1-uiColDir);
-#endif
+
       bool bLowDelay = true;
       int  iCurrPOC  = pcSlice->getPOC();
       int iRefIdx = 0;
@@ -1864,9 +2366,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       pcSlice->setCheckLDC(true);
     }
 
-#if !X0038_LAMBDA_FROM_QP_CAPABILITY
-    uiColDir = 1-uiColDir;
-#endif
 
     //-------------------------------------------------------------
     pcSlice->setRefPOCList();
@@ -1878,28 +2377,107 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     {
       if (iGOPid == 0) // first picture in SOP (i.e. forward B)
       {
-        pcSlice->setEnableTMVPFlag(0);
+        picHeader->setEnableTMVPFlag(0);
       }
       else
       {
         // Note: pcSlice->getColFromL0Flag() is assumed to be always 0 and getcolRefIdx() is always 0.
-        pcSlice->setEnableTMVPFlag(1);
+        picHeader->setEnableTMVPFlag(1);
       }
     }
     else if (m_pcEncLib->getTMVPModeId() == 1)
     {
-      pcSlice->setEnableTMVPFlag(1);
+      picHeader->setEnableTMVPFlag(1);
     }
     else
     {
-      pcSlice->setEnableTMVPFlag(0);
+      picHeader->setEnableTMVPFlag(0);
+    }
+
+    // disable TMVP when current picture is the only ref picture
+    if (pcSlice->isIRAP() && pcSlice->getSPS()->getIBCFlag())
+    {
+      picHeader->setEnableTMVPFlag(0);
+    }
+
+    if( pcSlice->getSliceType() != I_SLICE && picHeader->getEnableTMVPFlag() )
+    {
+      int colRefIdxL0 = -1, colRefIdxL1 = -1;
+
+      for( int refIdx = 0; refIdx < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); refIdx++ )
+      {
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+        if( pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->isRefScaled( pcSlice->getPPS() ) == false )
+#else
+        int refPicWidth = pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->unscaledPic->cs->pps->getPicWidthInLumaSamples();
+        int refPicHeight = pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->unscaledPic->cs->pps->getPicHeightInLumaSamples();
+        int curPicWidth = pcSlice->getPPS()->getPicWidthInLumaSamples();
+        int curPicHeight = pcSlice->getPPS()->getPicHeightInLumaSamples();
+
+        if( refPicWidth == curPicWidth && refPicHeight == curPicHeight )
+#endif
+        {
+          colRefIdxL0 = refIdx;
+          break;
+        }
+      }
+
+      if( pcSlice->getSliceType() == B_SLICE )
+      {
+        for( int refIdx = 0; refIdx < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); refIdx++ )
+        {
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+          if( pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->isRefScaled( pcSlice->getPPS() ) == false )
+#else
+          int refPicWidth = pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->unscaledPic->cs->pps->getPicWidthInLumaSamples();
+          int refPicHeight = pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->unscaledPic->cs->pps->getPicHeightInLumaSamples();
+          int curPicWidth = pcSlice->getPPS()->getPicWidthInLumaSamples();
+          int curPicHeight = pcSlice->getPPS()->getPicHeightInLumaSamples();
+
+          if( refPicWidth == curPicWidth && refPicHeight == curPicHeight )
+#endif
+          {
+            colRefIdxL1 = refIdx;
+            break;
+          }
+        }
+      }
+
+      if( colRefIdxL0 >= 0 && colRefIdxL1 >= 0 )
+      {
+        const Picture *refPicL0 = pcSlice->getRefPic( REF_PIC_LIST_0, colRefIdxL0 );
+        if( !refPicL0->slices.size() )
+        {
+          refPicL0 = refPicL0->unscaledPic;
+        }
+
+        const Picture *refPicL1 = pcSlice->getRefPic( REF_PIC_LIST_1, colRefIdxL1 );
+        if( !refPicL1->slices.size() )
+        {
+          refPicL1 = refPicL1->unscaledPic;
+        }
+
+        const uint32_t uiColFromL0 = refPicL0->slices[0]->getSliceQp() > refPicL1->slices[0]->getSliceQp();
+        pcSlice->setColFromL0Flag( uiColFromL0 );
+        pcSlice->setColRefIdx( uiColFromL0 ? colRefIdxL0 : colRefIdxL1 );
+      }
+      else if( colRefIdxL0 < 0 && colRefIdxL1 >= 0 )
+      {
+        pcSlice->setColFromL0Flag( false );
+        pcSlice->setColRefIdx( colRefIdxL1 );
+      }
+      else if( colRefIdxL0 >= 0 && colRefIdxL1 < 0 )
+      {
+        pcSlice->setColFromL0Flag( true );
+        pcSlice->setColRefIdx( colRefIdxL0 );
+      }
+      else
+      {
+        picHeader->setEnableTMVPFlag( 0 );
+      }
     }
 
-    // disable TMVP when current picture is the only ref picture
-    if (pcSlice->isIRAP() && pcSlice->getSPS()->getIBCFlag())
-    {
-      pcSlice->setEnableTMVPFlag(0);
-    }
+    pcSlice->scaleRefPicList( scaledRefPic, pcPic->cs->picHeader, m_pcEncLib->getApss(), picHeader->getLmcsAPS(), picHeader->getScalingListAPS(), false );
 
     // set adaptive search range for non-intra-slices
     if (m_pcCfg->getUseASR() && !pcSlice->isIRAP())
@@ -1926,17 +2504,16 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     }
     if(bGPBcheck)
     {
-      pcSlice->setMvdL1ZeroFlag(true);
+      picHeader->setMvdL1ZeroFlag(true);
     }
     else
     {
-      pcSlice->setMvdL1ZeroFlag(false);
+      picHeader->setMvdL1ZeroFlag(false);
     }
-#if HEVC_DEPENDENT_SLICES
-    pcPic->slices[pcSlice->getSliceSegmentIdx()]->setMvdL1ZeroFlag(pcSlice->getMvdL1ZeroFlag());
-#endif
 
-    if ( pcSlice->getCheckLDC() == false && pcSlice->getMvdL1ZeroFlag() == false )
+    if ( pcSlice->getSPS()->getUseSMVD() && pcSlice->getCheckLDC() == false
+      && picHeader->getMvdL1ZeroFlag() == false
+      )
     {
       int currPOC = pcSlice->getPOC();
 
@@ -1948,7 +2525,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ )
       {
         int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC();
-        if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) )
+        const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm;
+        if ( poc < currPOC && (poc > forwardPOC || refIdx0 == -1) && !isRefLongTerm )
         {
           forwardPOC = poc;
           refIdx0 = ref;
@@ -1959,7 +2537,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ )
       {
         int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC();
-        if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) )
+        const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm;
+        if ( poc > currPOC && (poc < backwardPOC || refIdx1 == -1) && !isRefLongTerm )
         {
           backwardPOC = poc;
           refIdx1 = ref;
@@ -1977,7 +2556,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); ref++ )
         {
           int poc = pcSlice->getRefPic( REF_PIC_LIST_0, ref )->getPOC();
-          if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) )
+          const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_0, ref)->longTerm;
+          if ( poc > currPOC && (poc < backwardPOC || refIdx0 == -1) && !isRefLongTerm )
           {
             backwardPOC = poc;
             refIdx0 = ref;
@@ -1988,7 +2568,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         for ( ref = 0; ref < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); ref++ )
         {
           int poc = pcSlice->getRefPic( REF_PIC_LIST_1, ref )->getPOC();
-          if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) )
+          const bool isRefLongTerm = pcSlice->getRefPic(REF_PIC_LIST_1, ref)->longTerm;
+          if ( poc < currPOC && (poc > forwardPOC || refIdx1 == -1) && !isRefLongTerm )
           {
             forwardPOC = poc;
             refIdx1 = ref;
@@ -2015,119 +2596,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     int actualTotalBits      = 0;
     int estimatedBits        = 0;
     int tmpBitsBeforeWriting = 0;
-    if ( m_pcCfg->getUseRateCtrl() ) // TODO: does this work with multiple slices and slice-segments?
-    {
-      int frameLevel = m_pcRateCtrl->getRCSeq()->getGOPID2Level( iGOPid );
-      if ( pcPic->slices[0]->isIRAP() )
-      {
-        frameLevel = 0;
-      }
-      m_pcRateCtrl->initRCPic( frameLevel );
-      estimatedBits = m_pcRateCtrl->getRCPic()->getTargetBits();
-
-#if U0132_TARGET_BITS_SATURATION
-      if (m_pcRateCtrl->getCpbSaturationEnabled() && frameLevel != 0)
-      {
-        int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate();
-
-        // prevent overflow
-        if (estimatedCpbFullness - estimatedBits > (int)(m_pcRateCtrl->getCpbSize()*0.9f))
-        {
-          estimatedBits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f);
-        }
-
-        estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate();
-        // prevent underflow
-#if V0078_ADAPTIVE_LOWER_BOUND
-        if (estimatedCpbFullness - estimatedBits < m_pcRateCtrl->getRCPic()->getLowerBound())
-        {
-          estimatedBits = std::max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound());
-        }
-#else
-        if (estimatedCpbFullness - estimatedBits < (int)(m_pcRateCtrl->getCpbSize()*0.1f))
-        {
-          estimatedBits = std::max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f));
-        }
-#endif
-
-        m_pcRateCtrl->getRCPic()->setTargetBits(estimatedBits);
-      }
-#endif
-
-      int sliceQP = m_pcCfg->getInitialQP();
-      if ( ( pcSlice->getPOC() == 0 && m_pcCfg->getInitialQP() > 0 ) || ( frameLevel == 0 && m_pcCfg->getForceIntraQP() ) ) // QP is specified
-      {
-        int    NumberBFrames = ( m_pcCfg->getGOPSize() - 1 );
-        double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)NumberBFrames );
-        double dQPFactor     = 0.57*dLambda_scale;
-        int    SHIFT_QP      = 12;
-        int bitdepth_luma_qp_scale =
-          6
-          * (pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
-             - DISTORTION_PRECISION_ADJUSTMENT(pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
-        double qp_temp = (double) sliceQP + bitdepth_luma_qp_scale - SHIFT_QP;
-        lambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
-      }
-      else if ( frameLevel == 0 )   // intra case, but use the model
-      {
-        m_pcSliceEncoder->calCostSliceI(pcPic); // TODO: This only analyses the first slice segment - what about the others?
-
-        if ( m_pcCfg->getIntraPeriod() != 1 )   // do not refine allocated bits for all intra case
-        {
-          int bits = m_pcRateCtrl->getRCSeq()->getLeftAverageBits();
-          bits = m_pcRateCtrl->getRCPic()->getRefineBitsForIntra( bits );
-
-#if U0132_TARGET_BITS_SATURATION
-          if (m_pcRateCtrl->getCpbSaturationEnabled() )
-          {
-            int estimatedCpbFullness = m_pcRateCtrl->getCpbState() + m_pcRateCtrl->getBufferingRate();
-
-            // prevent overflow
-            if (estimatedCpbFullness - bits > (int)(m_pcRateCtrl->getCpbSize()*0.9f))
-            {
-              bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.9f);
-            }
-
-            estimatedCpbFullness -= m_pcRateCtrl->getBufferingRate();
-            // prevent underflow
-#if V0078_ADAPTIVE_LOWER_BOUND
-            if (estimatedCpbFullness - bits < m_pcRateCtrl->getRCPic()->getLowerBound())
-            {
-              bits = estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound();
-            }
-#else
-            if (estimatedCpbFullness - bits < (int)(m_pcRateCtrl->getCpbSize()*0.1f))
-            {
-              bits = estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f);
-            }
-#endif
-          }
-#endif
-
-          if ( bits < 200 )
-          {
-            bits = 200;
-          }
-          m_pcRateCtrl->getRCPic()->setTargetBits( bits );
-        }
-
-        list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList();
-        m_pcRateCtrl->getRCPic()->getLCUInitTargetBits();
-        lambda  = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, pcSlice->isIRAP());
-        sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture );
-      }
-      else    // normal case
-      {
-        list<EncRCPic*> listPreviousPicture = m_pcRateCtrl->getPicList();
-        lambda  = m_pcRateCtrl->getRCPic()->estimatePicLambda( listPreviousPicture, pcSlice->isIRAP());
-        sliceQP = m_pcRateCtrl->getRCPic()->estimatePicQP( lambda, listPreviousPicture );
-      }
 
-      sliceQP = Clip3( -pcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, sliceQP );
-      m_pcRateCtrl->getRCPic()->setPicEstQP( sliceQP );
-
-      m_pcSliceEncoder->resetQP( pcPic, sliceQP, lambda );
-    }
+    xPicInitRateControl(estimatedBits, iGOPid, lambda, pcPic, pcSlice);
 
     uint32_t uiNumSliceSegments = 1;
 
@@ -2137,27 +2607,19 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     // Allocate some coders, now the number of tiles are known.
     const uint32_t numberOfCtusInFrame = pcPic->cs->pcv->sizeInCtus;
-#if HEVC_TILES_WPP
-    const int numSubstreamsColumns = (pcSlice->getPPS()->getNumTileColumnsMinus1() + 1);
-    const int numSubstreamRows     = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag() ? pcPic->cs->pcv->heightInCtus : (pcSlice->getPPS()->getNumTileRowsMinus1() + 1);
-    const int numSubstreams        = numSubstreamRows * numSubstreamsColumns;
-#else
-    const int numSubstreams        = 1;
-#endif
+    const int numSubstreamsColumns = pcSlice->getPPS()->getNumTileColumns();
+    const int numSubstreamRows     = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag() ? pcPic->cs->pcv->heightInCtus : (pcSlice->getPPS()->getNumTileRows());
+    const int numSubstreams        = std::max<int> (numSubstreamRows * numSubstreamsColumns, (int) pcPic->cs->pps->getNumSlicesInPic());
     std::vector<OutputBitstream> substreamsOut(numSubstreams);
 
 #if ENABLE_QPA
     pcPic->m_uEnerHpCtu.resize (numberOfCtusInFrame);
     pcPic->m_iOffsetCtu.resize (numberOfCtusInFrame);
 #if ENABLE_QPA_SUB_CTU
-    if (pcSlice->getPPS()->getUseDQP() && pcSlice->getPPS()->getCuQpDeltaSubdiv() > 0)
+    if (pcSlice->getPPS()->getUseDQP() && pcSlice->getCuQpDeltaSubdiv() > 0)
     {
       const PreCalcValues &pcv = *pcPic->cs->pcv;
-#if MAX_TB_SIZE_SIGNALLING
-      const unsigned   mtsLog2 = (unsigned)g_aucLog2[std::min (pcPic->cs->sps->getMaxTbSize(), pcv.maxCUWidth)];
-#else
-      const unsigned   mtsLog2 = (unsigned)g_aucLog2[std::min<uint32_t> (MAX_TB_SIZEY, pcv.maxCUWidth)];
-#endif
+      const unsigned   mtsLog2 = (unsigned)floorLog2(std::min (pcPic->cs->sps->getMaxTbSize(), pcv.maxCUWidth));
       pcPic->m_subCtuQP.resize ((pcv.maxCUWidth >> mtsLog2) * (pcv.maxCUHeight >> mtsLog2));
     }
 #endif
@@ -2172,9 +2634,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     if( pcSlice->getSPS()->getALFEnabledFlag() )
     {
       pcPic->resizeAlfCtuEnableFlag( numberOfCtusInFrame );
-      // reset the APS ALF parameters
-      AlfSliceParam newALFParam;
-      pcSlice->getAPS()->setAlfAPSParam(newALFParam);
+      pcPic->resizeAlfCtuAlternative( numberOfCtusInFrame );
+      pcPic->resizeAlfCtbFilterIndex(numberOfCtusInFrame);
     }
 
     bool decPic = false;
@@ -2192,147 +2653,53 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       // overwrite chroma qp offset for dual tree
       pcSlice->setSliceChromaQpDelta(COMPONENT_Cb, m_pcCfg->getChromaCbQpOffsetDualTree());
       pcSlice->setSliceChromaQpDelta(COMPONENT_Cr, m_pcCfg->getChromaCrQpOffsetDualTree());
-      m_pcSliceEncoder->setUpLambda(pcSlice, pcSlice->getLambdas()[0], pcSlice->getSliceQp());
-    }
-    if (pcSlice->getSPS()->getUseReshaper())
-    {
-      m_pcReshaper->getReshapeCW()->rspTid = pcSlice->getTLayer() + (pcSlice->isIntra() ? 0 : 1);
-      m_pcReshaper->getReshapeCW()->rspSliceQP = pcSlice->getSliceQp();
-
-      m_pcReshaper->setSrcReshaped(false);
-      m_pcReshaper->setRecReshaped(true);
-
-      if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ)
-      {
-        m_pcReshaper->preAnalyzerHDR(pcPic, pcSlice->getSliceType(), m_pcCfg->getReshapeCW(), m_pcCfg->getDualITree());
-      }
-      else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR)
-      {
-        m_pcReshaper->preAnalyzerSDR(pcPic, pcSlice->getSliceType(), m_pcCfg->getReshapeCW(), m_pcCfg->getDualITree());
-      }
-      else
+      if (pcSlice->getSPS()->getJointCbCrEnabledFlag())
       {
-        THROW("Reshaper for other signal currently not defined!");
+        pcSlice->setSliceChromaQpDelta(JOINT_CbCr, m_pcCfg->getChromaCbCrQpOffsetDualTree());
       }
+      m_pcSliceEncoder->setUpLambda(pcSlice, pcSlice->getLambdas()[0], pcSlice->getSliceQp());
+    }
 
-      if (pcSlice->getSliceType() == I_SLICE )
-      {
-        if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ)
-        {
-          m_pcReshaper->initLUTfromdQPModel();
-          m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTableChromaMD(m_pcReshaper->getInvLUT());
-        }
-        else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR)
-        {
-          if (m_pcReshaper->getReshapeFlag())
-          {
-            m_pcReshaper->constructReshaperSDR();
-            m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight());
-          }
-        }
-        else
-        {
-          THROW("Reshaper for other signal currently not defined!");
-        }
-
-        m_pcReshaper->setCTUFlag(false);
+    xPicInitLMCS(pcPic, picHeader, pcSlice);
 
-        //reshape original signal
-        if (m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper())
-        {
-          pcPic->getOrigBuf(COMPONENT_Y).rspSignal(m_pcReshaper->getFwdLUT());
-          m_pcReshaper->setSrcReshaped(true);
-          m_pcReshaper->setRecReshaped(true);
-        }
-      }
-      else
-      {
-        if (!m_pcReshaper->getReshapeFlag())
-        {
-          m_pcReshaper->setCTUFlag(false);
-        }
-        else
-          m_pcReshaper->setCTUFlag(true);
+    if( pcSlice->getSPS()->getScalingListFlag() && m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ )
+    {
+      picHeader->setScalingListPresentFlag( true );
 
-        m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(false);
+      int apsId = std::min<int>( 7, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) );
+      picHeader->setScalingListAPSId( apsId );
 
-        if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ)
-        {
-          m_pcEncLib->getRdCost()->restoreReshapeLumaLevelToWeightTable();
-        }
-        else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR)
-        {
-          int modIP = pcPic->getPOC() - pcPic->getPOC() / m_pcCfg->getReshapeCW().rspFpsToIp * m_pcCfg->getReshapeCW().rspFpsToIp;
-          if (m_pcReshaper->getReshapeFlag() && m_pcCfg->getReshapeCW().rspIntraPeriod == -1 && modIP == 0)           // for LDB, update reshaping curve every second
-          {
-            m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(true);
-            m_pcReshaper->constructReshaperSDR();
-            m_pcEncLib->getRdCost()->updateReshapeLumaLevelToWeightTable(m_pcReshaper->getSliceReshaperInfo(), m_pcReshaper->getWeightTable(), m_pcReshaper->getCWeight());
-          }
-        }
-        else
-        {
-          THROW("Reshaper for other signal currently not defined!");
-        }
-      }
+      ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
+      APS*  scalingListAPS = apsMap->getPS( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
+      assert( scalingListAPS != NULL );
+      picHeader->setScalingListAPS( scalingListAPS );
+    }
 
-      m_pcReshaper->copySliceReshaperInfo(pcSlice->getReshapeInfo(), m_pcReshaper->getSliceReshaperInfo());
+    pcPic->cs->picHeader->setPic(pcPic);
+    pcPic->cs->picHeader->setValid();
+    if(pcPic->cs->sps->getFpelMmvdEnabledFlag()) 
+    {
+      // cannot set pic_fpel_mmvd_enabled_flag at slice level - need new picture-level version of checkDisFracMmvd algorithm?
+      // m_pcSliceEncoder->checkDisFracMmvd( pcPic, 0, numberOfCtusInFrame );
+      bool useIntegerMVD = (pcPic->lwidth()*pcPic->lheight() > 1920 * 1080);
+      pcPic->cs->picHeader->setDisFracMMVD( useIntegerMVD );
     }
-    else
+    if (pcSlice->getSPS()->getJointCbCrEnabledFlag())
     {
-      m_pcReshaper->setCTUFlag(false);
+      m_pcSliceEncoder->setJointCbCrModes(*pcPic->cs, Position(0, 0), pcPic->cs->area.lumaSize());
     }
-
     if( encPic )
     // now compress (trial encode) the various slice segments (slices, and dependent slices)
     {
       DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", pocCurr ) ) );
 
-      pcSlice->setSliceCurStartCtuTsAddr( 0 );
-#if HEVC_DEPENDENT_SLICES
-      pcSlice->setSliceSegmentCurStartCtuTsAddr( 0 );
-#endif
-
-      for(uint32_t nextCtuTsAddr = 0; nextCtuTsAddr < numberOfCtusInFrame; )
+      for(uint32_t sliceIdx = 0; sliceIdx < pcPic->cs->pps->getNumSlicesInPic(); sliceIdx++ )
       {
+        pcSlice->setSliceMap( pcPic->cs->pps->getSliceMap( sliceIdx ) );
         m_pcSliceEncoder->precompressSlice( pcPic );
         m_pcSliceEncoder->compressSlice   ( pcPic, false, false );
 
-#if HEVC_DEPENDENT_SLICES
-        const uint32_t curSliceSegmentEnd = pcSlice->getSliceSegmentCurEndCtuTsAddr();
-        if (curSliceSegmentEnd < numberOfCtusInFrame)
-        {
-          const bool bNextSegmentIsDependentSlice = curSliceSegmentEnd < pcSlice->getSliceCurEndCtuTsAddr();
-          const uint32_t sliceBits                    = pcSlice->getSliceBits();
-          uint32_t independentSliceIdx                = pcSlice->getIndependentSliceIdx();
-          pcPic->allocateNewSlice();
-          // prepare for next slice
-          m_pcSliceEncoder->setSliceSegmentIdx      ( uiNumSliceSegments   );
-          pcSlice = pcPic->slices                   [ uiNumSliceSegments   ];
-          CHECK(!(pcSlice->getPPS()!=0), "Unspecified error");
-          pcSlice->copySliceInfo                    ( pcPic->slices[uiNumSliceSegments-1]  );
-          pcSlice->setSliceSegmentIdx               ( uiNumSliceSegments   );
-          if (bNextSegmentIsDependentSlice)
-          {
-            pcSlice->setSliceBits(sliceBits);
-          }
-          else
-          {
-            pcSlice->setSliceCurStartCtuTsAddr      ( curSliceSegmentEnd );
-            pcSlice->setSliceBits(0);
-            independentSliceIdx ++;
-          }
-          pcSlice->setIndependentSliceIdx( independentSliceIdx );
-          pcSlice->setDependentSliceSegmentFlag( bNextSegmentIsDependentSlice );
-          pcSlice->setSliceSegmentCurStartCtuTsAddr ( curSliceSegmentEnd );
-          // TODO: optimise cabac_init during compress slice to improve multi-slice operation
-          // pcSlice->setEncCABACTableIdx(m_pcSliceEncoder->getEncCABACTableIdx());
-          uiNumSliceSegments ++;
-        }
-        nextCtuTsAddr = curSliceSegmentEnd;
-#else
-        const uint32_t curSliceEnd = pcSlice->getSliceCurEndCtuTsAddr();
-        if(curSliceEnd < numberOfCtusInFrame)
+        if(sliceIdx < pcPic->cs->pps->getNumSlicesInPic() - 1)
         {
           uint32_t independentSliceIdx = pcSlice->getIndependentSliceIdx();
           pcPic->allocateNewSlice();
@@ -2341,14 +2708,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           pcSlice = pcPic->slices[uiNumSliceSegments];
           CHECK(!(pcSlice->getPPS() != 0), "Unspecified error");
           pcSlice->copySliceInfo(pcPic->slices[uiNumSliceSegments - 1]);
-          pcSlice->setSliceCurStartCtuTsAddr(curSliceEnd);
           pcSlice->setSliceBits(0);
           independentSliceIdx++;
           pcSlice->setIndependentSliceIdx(independentSliceIdx);
           uiNumSliceSegments++;
         }
-        nextCtuTsAddr = curSliceEnd;
-#endif
       }
 
       duData.clear();
@@ -2356,8 +2720,13 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       CodingStructure& cs = *pcPic->cs;
       pcSlice = pcPic->slices[0];
 
-      if (pcSlice->getSPS()->getUseReshaper() && m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper())
+      if (pcSlice->getSPS()->getUseLmcs() && m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper())
       {
+        picHeader->setLmcsEnabledFlag(true);
+
+        int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) );
+
+        picHeader->setLmcsAPSId(apsId);
           CHECK((m_pcReshaper->getRecReshaped() == false), "Rec picture is not reshaped!");
           pcPic->getRecoBuf(COMPONENT_Y).rspSignal(m_pcReshaper->getInvLUT());
           m_pcReshaper->setRecReshaped(false);
@@ -2365,6 +2734,34 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           pcPic->getOrigBuf().copyFrom(pcPic->getTrueOrigBuf());
       }
 
+      // create SAO object based on the picture size
+      if( pcSlice->getSPS()->getSAOEnabledFlag() )
+      {
+        const uint32_t widthInCtus = ( picWidth + maxCUWidth - 1 ) / maxCUWidth;
+        const uint32_t heightInCtus = ( picHeight + maxCUHeight - 1 ) / maxCUHeight;
+        const uint32_t numCtuInFrame = widthInCtus * heightInCtus;
+
+        const uint32_t log2SaoOffsetScaleLuma = pcPic->cs->slice->getPPS()->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_LUMA );
+        const uint32_t log2SaoOffsetScaleChroma = pcPic->cs->slice->getPPS()->getPpsRangeExtension().getLog2SaoOffsetScale( CHANNEL_TYPE_CHROMA );
+
+        m_pcSAO->create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth, log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma );
+        m_pcSAO->destroyEncData();
+        m_pcSAO->createEncData( m_pcCfg->getSaoCtuBoundary(), numCtuInFrame );
+        m_pcSAO->setReshaper( m_pcReshaper );
+      }
+
+      if( !m_pcEncLib->getLoopFilterDisable() )
+      {
+        m_pcEncLib->getLoopFilter()->initEncPicYuvBuffer( chromaFormatIDC, picWidth, picHeight );
+      }
+
+      if( pcSlice->getSPS()->getScalingListFlag() && m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ )
+      {
+        picHeader->setScalingListPresentFlag(true);
+        int apsId = 0;
+        picHeader->setScalingListAPSId( apsId );
+      }
+
       // SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas
       if( pcSlice->getSPS()->getSAOEnabledFlag() && m_pcCfg->getSaoCtuBoundary() )
       {
@@ -2402,11 +2799,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 #if ENABLE_QPA
                              (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost (PARL_PARAM0 (0))->getChromaWeight() : 0.0),
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
                              m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary(), m_pcCfg->getSaoGreedyMergeEnc() );
-#else
-                             m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary() );
-#endif
         //assign SAO slice header
         for(int s=0; s< uiNumSliceSegments; s++)
         {
@@ -2418,16 +2811,38 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
       if( pcSlice->getSPS()->getALFEnabledFlag() )
       {
-        AlfSliceParam alfSliceParam;
-        m_pcALF->initCABACEstimator( m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice );
+        m_pcALF->destroy();
+        m_pcALF->create( m_pcCfg, picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth, m_pcCfg->getBitDepth(), m_pcCfg->getInputBitDepth() );
 
-        m_pcALF->ALFProcess( cs, pcSlice->getLambdas(),
+        for (int s = 0; s < uiNumSliceSegments; s++)
+        {
+          pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, false);
+        }
+        m_pcALF->initCABACEstimator(m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice, m_pcEncLib->getApsMap());
+        m_pcALF->ALFProcess(cs, pcSlice->getLambdas()
 #if ENABLE_QPA
-                             (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost (PARL_PARAM0 (0))->getChromaWeight() : 0.0),
+          , (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost(PARL_PARAM0(0))->getChromaWeight() : 0.0)
 #endif
-                             alfSliceParam );
+        );
+
         //assign ALF slice header
-        pcPic->cs->aps->setAlfAPSParam(alfSliceParam);
+        for (int s = 0; s < uiNumSliceSegments; s++)
+        {
+          pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y));
+          pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb));
+          pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr));
+          if (pcPic->slices[s]->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+          {
+            pcPic->slices[s]->setTileGroupNumAps(cs.slice->getTileGroupNumAps());
+            pcPic->slices[s]->setAlfAPSs(cs.slice->getTileGroupApsIdLuma());
+          }
+          else
+          {
+            pcPic->slices[s]->setTileGroupNumAps(0);
+          }
+          pcPic->slices[s]->setAlfAPSs(cs.slice->getAlfAPSs());
+          pcPic->slices[s]->setTileGroupApsIdChroma(cs.slice->getTileGroupApsIdChroma());
+        }
       }
       if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef())
       {
@@ -2455,6 +2870,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       }
     }
 
+    pcSlice->freeScaledRefPicList( scaledRefPic );
+
     if( m_pcCfg->getUseAMaxBT() )
     {
       for( const CodingUnit *cu : pcPic->cs->cus )
@@ -2474,20 +2891,18 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       /////////////////////////////////////////////////////////////////////////////////////////////////// File writing
 
       // write various parameter sets
-#if JCTVC_Y0038_PARAMS
       bool writePS = m_bSeqFirst || (m_pcCfg->getReWriteParamSets() && (pcSlice->isIRAP()));
       if (writePS)
       {
         m_pcEncLib->setParamSetChanged(pcSlice->getSPS()->getSPSId(), pcSlice->getPPS()->getPPSId());
       }
-      actualTotalBits += xWriteParameterSets(accessUnit, pcSlice, writePS);
 
-      if (writePS)
-#else
-      actualTotalBits += xWriteParameterSets( accessUnit, pcSlice, m_bSeqFirst );
+      int layerIdx = m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() );
 
-      if ( m_bSeqFirst )
-#endif
+      // it is assumed that layerIdx equal to 0 is always present
+      actualTotalBits += xWriteParameterSets( accessUnit, pcSlice, writePS && !layerIdx );
+
+      if (writePS)
       {
         // create prefix SEI messages at the beginning of the sequence
         CHECK(!(leadingSeiMessages.empty()), "Unspecified error");
@@ -2495,19 +2910,68 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
         m_bSeqFirst = false;
       }
-      if (m_pcCfg->getAccessUnitDelimiter())
+
+      // it is assumed that layerIdx equal to 0 is always present
+      if( m_pcCfg->getAccessUnitDelimiter() && !layerIdx )
       {
         xWriteAccessUnitDelimiter(accessUnit, pcSlice);
       }
-      if (pcSlice->getSPS()->getALFEnabledFlag() && pcSlice->getAPS()->getAlfAPSParam().enabledFlag[COMPONENT_Y])
+
+      //send LMCS APS when LMCSModel is updated. It can be updated even current slice does not enable reshaper.
+      //For example, in RA, update is on intra slice, but intra slice may not use reshaper
+      if (pcSlice->getSPS()->getUseLmcs())
       {
-        pcSlice->setTileGroupAlfEnabledFlag(true);
-        pcSlice->setAPSId(pcSlice->getAPS()->getAPSId());
-        actualTotalBits += xWriteAPS(accessUnit, pcSlice->getAPS());
+        //only 1 LMCS data for 1 picture
+        int apsId = picHeader->getLmcsAPSId();
+        ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
+        APS* aps = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+        bool writeAPS = aps && apsMap->getChangedFlag((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+        if (writeAPS)
+        {
+          actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true );
+          apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+          CHECK(aps != picHeader->getLmcsAPS(), "Wrong LMCS APS pointer in compressGOP");
+        }
       }
-      else
+
+      // only 1 SCALING LIST data for 1 picture
+      if( pcSlice->getSPS()->getScalingListFlag() && ( m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ ) )
+      {
+        int apsId = picHeader->getScalingListAPSId();
+        ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
+        APS* aps = apsMap->getPS( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
+        bool writeAPS = aps && apsMap->getChangedFlag( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
+        if( writeAPS )
+        {
+          actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true );
+          apsMap->clearChangedFlag( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
+          CHECK( aps != picHeader->getScalingListAPS(), "Wrong SCALING LIST APS pointer in compressGOP" );
+        }
+      }
+
+      if (pcSlice->getSPS()->getALFEnabledFlag() && pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
       {
-        pcSlice->setTileGroupAlfEnabledFlag(false);
+        for (int apsId = 0; apsId < ALF_CTB_MAX_NUM_APS; apsId++)
+        {
+          ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
+
+          APS* aps = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + ALF_APS);
+          bool writeAPS = aps && apsMap->getChangedFlag((apsId << NUM_APS_TYPE_LEN) + ALF_APS);
+          if (!aps && pcSlice->getAlfAPSs() && pcSlice->getAlfAPSs()[apsId])
+          {
+            writeAPS = true;
+            aps = pcSlice->getAlfAPSs()[apsId]; // use asp from slice header
+            *apsMap->allocatePS(apsId) = *aps; //allocate and cpy
+            m_pcALF->setApsIdStart( apsId );
+          }
+
+          if (writeAPS )
+          {
+            actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true );
+            apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + ALF_APS);
+            CHECK(aps != pcSlice->getAlfAPSs()[apsId], "Wrong APS pointer in compressGOP");
+          }
+        }
       }
 
       // reset presence of BP SEI indication
@@ -2519,11 +2983,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       std::size_t binCountsInNalUnits   = 0; // For implementation of cabac_zero_word stuffing (section 7.4.3.10)
       std::size_t numBytesInVclNalUnits = 0; // For implementation of cabac_zero_word stuffing (section 7.4.3.10)
 
-#if HEVC_DEPENDENT_SLICES
-      for( uint32_t sliceSegmentStartCtuTsAddr = 0, sliceSegmentIdxCount=0; sliceSegmentStartCtuTsAddr < numberOfCtusInFrame; sliceSegmentIdxCount++, sliceSegmentStartCtuTsAddr=pcSlice->getSliceSegmentCurEndCtuTsAddr() )
-#else
-      for(uint32_t sliceSegmentStartCtuTsAddr = 0, sliceSegmentIdxCount = 0; sliceSegmentStartCtuTsAddr < numberOfCtusInFrame; sliceSegmentIdxCount++, sliceSegmentStartCtuTsAddr = pcSlice->getSliceCurEndCtuTsAddr())
-#endif
+      for(uint32_t sliceSegmentIdxCount = 0; sliceSegmentIdxCount < pcPic->cs->pps->getNumSlicesInPic(); sliceSegmentIdxCount++ )
       {
         pcSlice = pcPic->slices[sliceSegmentIdxCount];
         if(sliceSegmentIdxCount > 0 && pcSlice->getSliceType()!= I_SLICE)
@@ -2532,40 +2992,102 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         }
         m_pcSliceEncoder->setSliceSegmentIdx(sliceSegmentIdxCount);
 
-        pcSlice->setRPS   (pcPic->slices[0]->getRPS());
-        pcSlice->setRPSidx(pcPic->slices[0]->getRPSidx());
-
-        for ( uint32_t ui = 0 ; ui < numSubstreams; ui++ )
-        {
-          substreamsOut[ui].clear();
-        }
-
-        /* start slice NALunit */
-        OutputNALUnit nalu( pcSlice->getNalUnitType(), pcSlice->getTLayer() );
-        m_HLSWriter->setBitstream( &nalu.m_Bitstream );
+        pcSlice->setRPL0(pcPic->slices[0]->getRPL0());
+        pcSlice->setRPL1(pcPic->slices[0]->getRPL1());
+        pcSlice->setRPL0idx(pcPic->slices[0]->getRPL0idx());
+        pcSlice->setRPL1idx(pcPic->slices[0]->getRPL1idx());
 
-        pcSlice->setNoRaslOutputFlag(false);
+        pcSlice->setNoIncorrectPicOutputFlag(false);
         if (pcSlice->isIRAP())
         {
-#if !JVET_M0101_HLS
-          if (pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && pcSlice->getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP)
-#else
           if (pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && pcSlice->getNalUnitType() <= NAL_UNIT_CODED_SLICE_IDR_N_LP)
-#endif
           {
-            pcSlice->setNoRaslOutputFlag(true);
+            pcSlice->setNoIncorrectPicOutputFlag(true);
           }
           //the inference for NoOutputPriorPicsFlag
           // KJS: This cannot happen at the encoder
-          if (!m_bFirst && pcSlice->isIRAP() && pcSlice->getNoRaslOutputFlag())
+          if (!m_bFirst && (pcSlice->isIRAP() || pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_GDR) && pcSlice->getNoIncorrectPicOutputFlag())
           {
-            if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)
+            if (pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || pcSlice->getNalUnitType() >= NAL_UNIT_CODED_SLICE_GDR)
             {
-              pcSlice->setNoOutputPriorPicsFlag(true);
+              picHeader->setNoOutputOfPriorPicsFlag(true);
             }
           }
         }
 
+        // code picture header before first slice
+        if(sliceSegmentIdxCount == 0) 
+        {
+          // code RPL in picture header or slice headers
+          if( !m_pcCfg->getSliceLevelRpl() && (!pcSlice->getIdrPicFlag() || pcSlice->getSPS()->getIDRRefParamListPresent()) )
+          {
+            picHeader->setPicRplPresentFlag(true);
+            picHeader->setRPL0idx(pcSlice->getRPL0idx());
+            picHeader->setRPL1idx(pcSlice->getRPL1idx());
+            picHeader->setRPL0(pcSlice->getRPL0());
+            picHeader->setRPL1(pcSlice->getRPL1());
+            *picHeader->getLocalRPL0() = *pcSlice->getLocalRPL0();
+            *picHeader->getLocalRPL1() = *pcSlice->getLocalRPL1();
+          }
+          else {
+            picHeader->setPicRplPresentFlag(false);
+          }
+          
+          // code DBLK in picture header or slice headers
+          if( !m_pcCfg->getSliceLevelDblk() )
+          {
+            picHeader->setDeblockingFilterOverridePresentFlag( true );
+            picHeader->setDeblockingFilterOverrideFlag   ( pcSlice->getDeblockingFilterOverrideFlag()   );
+            picHeader->setDeblockingFilterDisable        ( pcSlice->getDeblockingFilterDisable()        ); 
+            picHeader->setDeblockingFilterBetaOffsetDiv2 ( pcSlice->getDeblockingFilterBetaOffsetDiv2() ); 
+            picHeader->setDeblockingFilterTcOffsetDiv2   ( pcSlice->getDeblockingFilterTcOffsetDiv2()   );
+          }
+          else {
+            picHeader->setDeblockingFilterOverridePresentFlag( false );
+          }
+          
+          // code SAO parameters in picture header or slice headers
+          if( !m_pcCfg->getSliceLevelSao() )
+          {
+            picHeader->setSaoEnabledPresentFlag( true );
+            picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA,   pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA  ));
+            picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA));
+          }
+          else {
+            picHeader->setSaoEnabledPresentFlag( false );
+          }
+          
+          // code ALF parameters in picture header or slice headers
+          if( !m_pcCfg->getSliceLevelAlf() )
+          {
+            picHeader->setAlfEnabledPresentFlag( true );
+            picHeader->setAlfEnabledFlag(COMPONENT_Y,  pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y ) );
+            picHeader->setAlfEnabledFlag(COMPONENT_Cb, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) );
+            picHeader->setAlfEnabledFlag(COMPONENT_Cr, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) );            
+            picHeader->setNumAlfAps(pcSlice->getTileGroupNumAps());
+            picHeader->setAlfAPSs(pcSlice->getTileGroupApsIdLuma());
+            picHeader->setAlfApsIdChroma(pcSlice->getTileGroupApsIdChroma());
+          }
+          else {
+            picHeader->setAlfEnabledPresentFlag( false );
+          }
+
+          pcPic->cs->picHeader->setPic(pcPic);
+          pcPic->cs->picHeader->setValid();
+          actualTotalBits += xWritePicHeader(accessUnit, pcPic->cs->picHeader);
+        }
+        pcSlice->setPicHeader( pcPic->cs->picHeader );
+
+        for ( uint32_t ui = 0 ; ui < numSubstreams; ui++ )
+        {
+          substreamsOut[ui].clear();
+        }
+
+        /* start slice NALunit */
+        OutputNALUnit nalu( pcSlice->getNalUnitType(), m_pcEncLib->getLayerId(), pcSlice->getTLayer() );
+        m_HLSWriter->setBitstream( &nalu.m_Bitstream );
+
+
         tmpBitsBeforeWriting = m_HLSWriter->getNumberOfWrittenBits();
         m_HLSWriter->codeSliceHeader( pcSlice );
         actualHeadBits += ( m_HLSWriter->getNumberOfWrittenBits() - tmpBitsBeforeWriting );
@@ -2583,27 +3105,14 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           // The final bitstream is either nalu.m_Bitstream or pcBitstreamRedirect;
           // Complete the slice header info.
           m_HLSWriter->setBitstream( &nalu.m_Bitstream );
-#if HEVC_TILES_WPP
           m_HLSWriter->codeTilesWPPEntryPoint( pcSlice );
-#endif
 
           // Append substreams...
           OutputBitstream *pcOut = pcBitstreamRedirect;
-#if HEVC_TILES_WPP
-#if HEVC_DEPENDENT_SLICES
-
-          const int numZeroSubstreamsAtStartOfSlice = pcPic->tileMap->getSubstreamForCtuAddr(pcSlice->getSliceSegmentCurStartCtuTsAddr(), false, pcSlice);
-#else
-          const int numZeroSubstreamsAtStartOfSlice  = pcPic->tileMap->getSubstreamForCtuAddr(pcSlice->getSliceCurStartCtuTsAddr(), false, pcSlice);
-#endif
-          const int numSubstreamsToCode  = pcSlice->getNumberOfSubstreamSizes()+1;
-#else
-          const int numZeroSubstreamsAtStartOfSlice  = 0;
           const int numSubstreamsToCode  = pcSlice->getNumberOfSubstreamSizes()+1;
-#endif
           for ( uint32_t ui = 0 ; ui < numSubstreamsToCode; ui++ )
           {
-            pcOut->addSubstream(&(substreamsOut[ui+numZeroSubstreamsAtStartOfSlice]));
+            pcOut->addSubstream(&(substreamsOut[ui]));
           }
         }
 
@@ -2623,10 +3132,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         }
 
         if( ( m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled() ) &&
-            ( pcSlice->getSPS()->getVuiParametersPresentFlag() ) &&
-            ( ( pcSlice->getSPS()->getVuiParameters()->getHrdParameters()->getNalHrdParametersPresentFlag() )
-           || ( pcSlice->getSPS()->getVuiParameters()->getHrdParameters()->getVclHrdParametersPresentFlag() ) ) &&
-            ( pcSlice->getSPS()->getVuiParameters()->getHrdParameters()->getSubPicCpbParamsPresentFlag() ) )
+            ( ( pcSlice->getSPS()->getHrdParameters()->getNalHrdParametersPresentFlag() )
+           || ( pcSlice->getSPS()->getHrdParameters()->getVclHrdParametersPresentFlag() ) ) &&
+            ( pcSlice->getSPS()->getHrdParameters()->getGeneralDecodingUnitHrdParamsPresentFlag() ) )
         {
             uint32_t numNalus = 0;
           uint32_t numRBSPBytes = 0;
@@ -2661,10 +3169,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       m_pcCfg->setEncodedFlag(iGOPid, true);
 
       double PSNR_Y;
-      xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y
-                       , isEncodeLtRef
-      );
+      xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y, isEncodeLtRef );
 
+#if HEVC_SEI
       // Only produce the Green Metadata SEI message with the last picture.
       if( m_pcCfg->getSEIGreenMetadataInfoSEIEnable() && pcSlice->getPOC() == ( m_pcCfg->getFramesToBeEncoded() - 1 )  )
       {
@@ -2672,6 +3179,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         m_seiEncoder.initSEIGreenMetadataInfo(seiGreenMetadataInfo, (uint32_t)(PSNR_Y * 100 + 0.5));
         trailingSeiMessages.push_back(seiGreenMetadataInfo);
       }
+#endif
 
       xWriteTrailingSEIMessages(trailingSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS());
 
@@ -2706,12 +3214,14 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         }
   #endif
       }
-
+      xCreateFrameFieldInfoSEI( leadingSeiMessages, pcSlice, isField );
       xCreatePictureTimingSEI( m_pcCfg->getEfficientFieldIRAPEnabled() ? effFieldIRAPMap.GetIRAPGOPid() : 0, leadingSeiMessages, nestedSeiMessages, duInfoSeiMessages, pcSlice, isField, duData );
-      if( m_pcCfg->getScalableNestingSEIEnabled() )
+#if HEVC_SEI
+     if( m_pcCfg->getScalableNestingSEIEnabled() )
       {
         xCreateScalableNestingSEI( leadingSeiMessages, nestedSeiMessages );
       }
+#endif
       xWriteLeadingSEIMessages( leadingSeiMessages, duInfoSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS(), duData );
       xWriteDuSEIMessages( duInfoSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS(), duData );
 
@@ -2728,7 +3238,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     m_bFirst = false;
     m_iNumPicCoded++;
     if (!(m_pcCfg->getUseCompositeRef() && isEncodeLtRef))
-      m_totalCoded ++;
+    {
+      for( int i = pcSlice->getTLayer() ; i < pcSlice->getSPS()->getMaxTLayers() ; i ++ )
+      {
+        m_totalCoded[i]++;
+      }
+    }
     /* logging: insert a newline at end of picture period */
 
     if (m_pcCfg->getEfficientFieldIRAPEnabled())
@@ -2743,17 +3258,16 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
   delete pcBitstreamRedirect;
 
-  CHECK(!( (m_iNumPicCoded == iNumPicRcvd) ), "Unspecified error");
-
+  CHECK( m_iNumPicCoded > 1, "Unspecified error" );
 }
 
-void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths)
+void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths )
 {
 #if ENABLE_QPA
   const bool    useWPSNR = m_pcEncLib->getUseWPSNR();
 #endif
 #if WCG_WPSNR
-  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
+  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
 #endif
 
   if( m_pcCfg->getDecodeBitstream(0).empty() && m_pcCfg->getDecodeBitstream(1).empty() && !m_pcCfg->useFastForwardToPOC() )
@@ -2779,25 +3293,36 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool
   //-- all
   msg( INFO, "\n" );
   msg( DETAILS,"\nSUMMARY --------------------------------------------------------\n" );
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics();
+#endif
 #if ENABLE_QPA
-  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR);
+  m_gcAnalyzeAll.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths, useWPSNR
+#if JVET_O0756_CALCULATE_HDRMETRICS
+                          , calculateHdrMetrics
+#endif
+                          );
 #else
-  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
+  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths
+#if JVET_O0756_CALCULATE_HDRMETRICS
+                          , calculateHdrMetrics
 #endif
-  msg( DETAILS,"\n\nI Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeI.printOut('i', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
+                          );
+#endif
+  msg( DETAILS, "\n\nI Slices--------------------------------------------------------\n" );
+  m_gcAnalyzeI.printOut( 'i', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths );
 
-  msg( DETAILS,"\n\nP Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeP.printOut('p', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
+  msg( DETAILS, "\n\nP Slices--------------------------------------------------------\n" );
+  m_gcAnalyzeP.printOut( 'p', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths );
 
-  msg( DETAILS,"\n\nB Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeB.printOut('b', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
+  msg( DETAILS, "\n\nB Slices--------------------------------------------------------\n" );
+  m_gcAnalyzeB.printOut( 'b', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths );
 
 #if WCG_WPSNR
   if (useLumaWPSNR)
   {
     msg(DETAILS, "\nWPSNR SUMMARY --------------------------------------------------------\n");
-    m_gcAnalyzeWPSNR.printOut('w', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useLumaWPSNR);
+    m_gcAnalyzeWPSNR.printOut( 'w', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths, useLumaWPSNR );
   }
 #endif
   if (!m_pcCfg->getSummaryOutFilename().empty())
@@ -2825,9 +3350,9 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool
     m_gcAnalyzeAll_in.setBits(m_gcAnalyzeAll.getBits());
     // prior to the above statement, the interlace analyser does not contain the correct total number of bits.
 
-    msg( DETAILS,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" );
+    msg( INFO,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" );
 #if ENABLE_QPA
-    m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR);
+    m_gcAnalyzeAll_in.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, printRprPSNR, bitDepths, useWPSNR );
 #else
     m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
 #endif
@@ -2920,7 +3445,7 @@ void EncGOP::xGetBuffer( PicList&                  rcListPic,
   while (iterPic != rcListPic.end())
   {
     rpcPic = *(iterPic);
-    if (rpcPic->getPOC() == pocCurr)
+    if( rpcPic->getPOC() == pocCurr && rpcPic->layerId == m_pcEncLib->getLayerId() )
     {
       break;
     }
@@ -3001,7 +3526,7 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,        const C
 
 uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift
 #if ENABLE_QPA
-                                    , const uint32_t chromaShift /*= 0*/
+                                    , const uint32_t chromaShiftHor /*= 0*/, const uint32_t chromaShiftVer /*= 0*/
 #endif
                                       )
 {
@@ -3021,7 +3546,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
       const uint32_t   W = pic0.width;  // image width
       const uint32_t   H = pic0.height; // image height
       const double     R = double(W * H) / (1920.0 * 1080.0);
-      const uint32_t   B = Clip3<uint32_t>(0, 128 >> chromaShift, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD)
+      const uint32_t   B = Clip3<uint32_t>(0, 128 >> chromaShiftVer, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD)
 
       uint32_t x, y;
 
@@ -3056,7 +3581,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
       }
 
       // integer weighted distortion
-      sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShift) * (H << chromaShift))) * double(1 << BD);
+      sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShiftHor) * (H << chromaShiftVer))) * double(1 << BD);
 
       return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5);
     }
@@ -3094,7 +3619,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
 double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0,
   ComponentID compID, const ChromaFormat chfmt    )
 {
-  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
+  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
   if (!useLumaWPSNR)
   {
     return 0;
@@ -3227,16 +3752,17 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
   const CPelUnitBuf& pic = cPicD;
   CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error");
 //  const CPelUnitBuf& org = (conversion != IPCOLOURSPACE_UNCHANGED) ? pcPic->getPicYuvTrueOrg()->getBuf() : pcPic->getPicYuvOrg()->getBuf();
-  const CPelUnitBuf& org = sps.getUseReshaper() ? pcPic->getTrueOrigBuf() : pcPic->getOrigBuf();
+  const CPelUnitBuf& org = (sps.getUseLmcs() || m_pcCfg->getGopBasedTemporalFilterEnabled()) ? pcPic->getTrueOrigBuf() : pcPic->getOrigBuf();
 #if ENABLE_QPA
   const bool    useWPSNR = m_pcEncLib->getUseWPSNR();
 #endif
   double  dPSNR[MAX_NUM_COMPONENT];
 #if WCG_WPSNR
-  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getReshaper() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
+  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
   double  dPSNRWeighted[MAX_NUM_COMPONENT];
   double  MSEyuvframeWeighted[MAX_NUM_COMPONENT];
 #endif
+  double  upscaledPSNR[MAX_NUM_COMPONENT];
   for(int i=0; i<MAX_NUM_COMPONENT; i++)
   {
     dPSNR[i]=0.0;
@@ -3244,7 +3770,17 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     dPSNRWeighted[i]=0.0;
     MSEyuvframeWeighted[i] = 0.0;
 #endif
+    upscaledPSNR[i] = 0.0;
+  }
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  double deltaE[hdrtoolslib::NB_REF_WHITE];
+  double psnrL[hdrtoolslib::NB_REF_WHITE];
+  for (int i=0; i<hdrtoolslib::NB_REF_WHITE; i++)
+  {
+    deltaE[i] = 0.0;
+    psnrL[i] = 0.0;
   }
+#endif
 
   PelStorage interm;
 
@@ -3264,6 +3800,22 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
   const bool bPicIsField     = pcPic->fieldPic;
   const Slice*  pcSlice      = pcPic->slices[0];
 
+  PelStorage upscaledRec;
+
+  if( m_pcEncLib->isRPREnabled() )
+  {
+    const CPelBuf& upscaledOrg = sps.getUseLmcs() ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT).get( COMPONENT_Y ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT).get( COMPONENT_Y );
+    upscaledRec.create( pic.chromaFormat, Area( Position(), upscaledOrg ) );
+
+    int xScale, yScale;
+    // it is assumed that full resolution picture PPS has ppsId 0
+    const PPS* pps = m_pcEncLib->getPPS(0);
+    CU::getRprScaling( &sps, pps, pcPic, xScale, yScale );
+    std::pair<int, int> scalingRatio = std::pair<int, int>( xScale, yScale );
+
+    Picture::rescalePicture( scalingRatio, picC, pcPic->getScalingWindow(), upscaledRec, pps->getScalingWindow(), format, sps.getBitDepths(), false, false, sps.getHorCollocatedChromaFlag(), sps.getVerCollocatedChromaFlag() );
+  }
+
   for (int comp = 0; comp < ::getNumberValidComponents(formatD); comp++)
   {
     const ComponentID compID = ComponentID(comp);
@@ -3273,15 +3825,27 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     CHECK(!( p.width  == o.width), "Unspecified error");
     CHECK(!( p.height == o.height), "Unspecified error");
 
-    const uint32_t   width  = p.width  - (m_pcEncLib->getPad(0) >> ::getComponentScaleX(compID, format));
-    const uint32_t   height = p.height - (m_pcEncLib->getPad(1) >> (!!bPicIsField+::getComponentScaleY(compID,format)));
+    int padX = m_pcEncLib->getPad( 0 );
+    int padY = m_pcEncLib->getPad( 1 );
+
+    // when RPR is enabled, picture padding is picture specific due to possible different picture resoluitons, however only full resolution padding is stored in EncLib
+    // get per picture padding from the conformance window, in this case if conformance window is set not equal to the padding then PSNR results may be inaccurate
+    if( m_pcEncLib->isRPREnabled() )
+    {
+      Window& conf = pcPic->getConformanceWindow();
+      padX = conf.getWindowRightOffset() * SPS::getWinUnitX( format );
+      padY = conf.getWindowBottomOffset() * SPS::getWinUnitY( format );
+    }
+
+    const uint32_t width = p.width - ( padX >> ::getComponentScaleX( compID, format ) );
+    const uint32_t height = p.height - ( padY >> ( !!bPicIsField + ::getComponentScaleY( compID, format ) ) );
 
     // create new buffers with correct dimensions
     const CPelBuf recPB(p.bufAt(0, 0), p.stride, width, height);
     const CPelBuf orgPB(o.bufAt(0, 0), o.stride, width, height);
     const uint32_t    bitDepth = sps.getBitDepth(toChannelType(compID));
 #if ENABLE_QPA
-    const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format));
+    const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format), ::getComponentScaleY(compID, format));
 #else
     const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0);
 #endif
@@ -3298,12 +3862,43 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
       MSEyuvframeWeighted[comp] = (double)uiSSDtempWeighted / size;
     }
 #endif
+
+    if( m_pcEncLib->isRPREnabled() )
+    {
+      const CPelBuf& upscaledOrg = sps.getUseLmcs() ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).get( compID ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT ).get( compID );
+
+      const uint32_t upscaledWidth = upscaledOrg.width - ( m_pcEncLib->getPad( 0 ) >> ::getComponentScaleX( compID, format ) );
+      const uint32_t upscaledHeight = upscaledOrg.height - ( m_pcEncLib->getPad( 1 ) >> ( !!bPicIsField + ::getComponentScaleY( compID, format ) ) );
+
+      // create new buffers with correct dimensions
+      const CPelBuf upscaledRecPB( upscaledRec.get( compID ).bufAt( 0, 0 ), upscaledRec.get( compID ).stride, upscaledWidth, upscaledHeight );
+      const CPelBuf upscaledOrgPB( upscaledOrg.bufAt( 0, 0 ), upscaledOrg.stride, upscaledWidth, upscaledHeight );
+
+#if ENABLE_QPA
+      const uint64_t upscaledSSD = xFindDistortionPlane( upscaledRecPB, upscaledOrgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX( compID, format ) );
+#else
+      const uint64_t scaledSSD = xFindDistortionPlane( upsacledRecPB, upsacledOrgPB, 0 );
+#endif
+
+      upscaledPSNR[comp] = upscaledSSD ? 10.0 * log10( (double)maxval * maxval * upscaledWidth * upscaledHeight / (double)upscaledSSD ) : 999.99;
+    }
   }
 
 #if EXTENSION_360_VIDEO
   m_ext360.calculatePSNRs(pcPic);
 #endif
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics();
+  if (calculateHdrMetrics)
+  {
+    auto beforeTime = std::chrono::steady_clock::now();
+    xCalculateHDRMetrics(pcPic, deltaE, psnrL);
+    auto elapsed = std::chrono::steady_clock::now() - beforeTime;
+    m_metricTime += elapsed;
+  }
+#endif
+
   /* calculate the size of the access unit, excluding:
    *  - any AnnexB contributions (start_code_prefix, zero_byte, etc.,)
    *  - SEI NAL units
@@ -3319,11 +3914,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     if( ( *it )->m_nalUnitType != NAL_UNIT_PREFIX_SEI && ( *it )->m_nalUnitType != NAL_UNIT_SUFFIX_SEI )
     {
       numRBSPBytes += numRBSPBytes_nal;
-#if HEVC_VPS
-      if( it == accessUnit.begin() || ( *it )->m_nalUnitType == NAL_UNIT_VPS || ( *it )->m_nalUnitType == NAL_UNIT_SPS || ( *it )->m_nalUnitType == NAL_UNIT_PPS )
-#else
-      if (it == accessUnit.begin() || (*it)->m_nalUnitType == NAL_UNIT_SPS || (*it)->m_nalUnitType == NAL_UNIT_PPS)
-#endif
+      if (it == accessUnit.begin() || (*it)->m_nalUnitType == NAL_UNIT_VPS || (*it)->m_nalUnitType == NAL_UNIT_DPS || (*it)->m_nalUnitType == NAL_UNIT_SPS || (*it)->m_nalUnitType == NAL_UNIT_PPS)
       {
         numRBSPBytes += 4;
       }
@@ -3339,45 +3930,73 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
 
   //===== add PSNR =====
   m_gcAnalyzeAll.addResult(dPSNR, (double)uibits, MSEyuvframe
+    , upscaledPSNR
     , isEncodeLtRef
   );
 #if EXTENSION_360_VIDEO
   m_ext360.addResult(m_gcAnalyzeAll);
+#endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  if (calculateHdrMetrics)
+  {
+    m_gcAnalyzeAll.addHDRMetricsResult(deltaE, psnrL);
+  }
 #endif
   if (pcSlice->isIntra())
   {
     m_gcAnalyzeI.addResult(dPSNR, (double)uibits, MSEyuvframe
+      , upscaledPSNR
       , isEncodeLtRef
     );
     *PSNR_Y = dPSNR[COMPONENT_Y];
 #if EXTENSION_360_VIDEO
     m_ext360.addResult(m_gcAnalyzeI);
+#endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    if (calculateHdrMetrics)
+    {
+      m_gcAnalyzeI.addHDRMetricsResult(deltaE, psnrL);
+    }
 #endif
   }
   if (pcSlice->isInterP())
   {
     m_gcAnalyzeP.addResult(dPSNR, (double)uibits, MSEyuvframe
+      , upscaledPSNR
       , isEncodeLtRef
     );
     *PSNR_Y = dPSNR[COMPONENT_Y];
 #if EXTENSION_360_VIDEO
     m_ext360.addResult(m_gcAnalyzeP);
+#endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    if (calculateHdrMetrics)
+    {
+      m_gcAnalyzeP.addHDRMetricsResult(deltaE, psnrL);
+    }
 #endif
   }
   if (pcSlice->isInterB())
   {
     m_gcAnalyzeB.addResult(dPSNR, (double)uibits, MSEyuvframe
+      , upscaledPSNR
       , isEncodeLtRef
     );
     *PSNR_Y = dPSNR[COMPONENT_Y];
 #if EXTENSION_360_VIDEO
     m_ext360.addResult(m_gcAnalyzeB);
+#endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    if (calculateHdrMetrics)
+    {
+      m_gcAnalyzeB.addHDRMetricsResult(deltaE, psnrL);
+    }
 #endif
   }
 #if WCG_WPSNR
   if (useLumaWPSNR)
   {
-    m_gcAnalyzeWPSNR.addResult(dPSNRWeighted, (double)uibits, MSEyuvframeWeighted, isEncodeLtRef);
+    m_gcAnalyzeWPSNR.addResult( dPSNRWeighted, (double)uibits, MSEyuvframeWeighted, upscaledPSNR, isEncodeLtRef );
   }
 #endif
 
@@ -3386,11 +4005,13 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
   {
     c += 32;
   }
+  if (m_pcCfg->getDependentRAPIndicationSEIEnabled() && pcSlice->isDRAP()) c = 'D';
 
   if( g_verbosity >= NOTICE )
   {
-    msg( NOTICE, "POC %4d TId: %1d ( %c-SLICE, QP %d ) %10d bits",
-         pcSlice->getPOC() - pcSlice->getLastIDR(),
+    msg( NOTICE, "POC %4d LId: %2d TId: %1d ( %c-SLICE, QP %d ) %10d bits",
+         pcSlice->getPOC(),
+         pcSlice->getPic()->layerId,
          pcSlice->getTLayer(),
          c,
          pcSlice->getSliceQp(),
@@ -3426,6 +4047,55 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     if (useLumaWPSNR)
     {
       msg(NOTICE, " [WY %6.4lf dB    WU %6.4lf dB    WV %6.4lf dB]", dPSNRWeighted[COMPONENT_Y], dPSNRWeighted[COMPONENT_Cb], dPSNRWeighted[COMPONENT_Cr]);
+
+      if (m_pcEncLib->getPrintHexPsnr())
+      {
+        uint64_t xPsnrWeighted[MAX_NUM_COMPONENT];
+        for (int i = 0; i < MAX_NUM_COMPONENT; i++)
+        {
+          copy(reinterpret_cast<uint8_t *>(&dPSNRWeighted[i]),
+               reinterpret_cast<uint8_t *>(&dPSNRWeighted[i]) + sizeof(dPSNRWeighted[i]),
+               reinterpret_cast<uint8_t *>(&xPsnrWeighted[i]));
+        }
+        msg(NOTICE, " [xWY %16" PRIx64 " xWU %16" PRIx64 " xWV %16" PRIx64 "]", xPsnrWeighted[COMPONENT_Y], xPsnrWeighted[COMPONENT_Cb], xPsnrWeighted[COMPONENT_Cr]);
+      }
+    }
+#endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    if(calculateHdrMetrics)
+    {
+      for (int i=0; i<1; i++)
+      {
+        msg(NOTICE, " [DeltaE%d %6.4lf dB]", (int)m_pcCfg->getWhitePointDeltaE(i), deltaE[i]);
+        if (m_pcEncLib->getPrintHexPsnr())
+        {
+          int64_t xdeltaE[MAX_NUM_COMPONENT];
+          for (int i = 0; i < 1; i++)
+          {
+            copy(reinterpret_cast<uint8_t *>(&deltaE[i]),
+                 reinterpret_cast<uint8_t *>(&deltaE[i]) + sizeof(deltaE[i]),
+                 reinterpret_cast<uint8_t *>(&xdeltaE[i]));
+          }
+          msg(NOTICE, " [xDeltaE%d %16" PRIx64 "]", (int)m_pcCfg->getWhitePointDeltaE(i), xdeltaE[0]);
+        }
+      }
+      for (int i=0; i<1; i++)
+      {
+        msg(NOTICE, " [PSNRL%d %6.4lf dB]", (int)m_pcCfg->getWhitePointDeltaE(i), psnrL[i]);
+
+        if (m_pcEncLib->getPrintHexPsnr())
+        {
+          int64_t xpsnrL[MAX_NUM_COMPONENT];
+          for (int i = 0; i < 1; i++)
+          {
+            copy(reinterpret_cast<uint8_t *>(&psnrL[i]),
+                 reinterpret_cast<uint8_t *>(&psnrL[i]) + sizeof(psnrL[i]),
+                 reinterpret_cast<uint8_t *>(&xpsnrL[i]));
+          }
+          msg(NOTICE, " [xPSNRL%d %16" PRIx64 "]", (int)m_pcCfg->getWhitePointDeltaE(i), xpsnrL[0]);
+
+        }
+      }
     }
 #endif
     msg( NOTICE, " [ET %5.0f ]", dEncTime );
@@ -3437,10 +4107,36 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
       msg( NOTICE, " [L%d ", iRefList );
       for( int iRefIndex = 0; iRefIndex < pcSlice->getNumRefIdx( RefPicList( iRefList ) ); iRefIndex++ )
       {
-        msg( NOTICE, "%d ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) - pcSlice->getLastIDR() );
+        const std::pair<int, int>& scaleRatio = pcSlice->getScalingRatio( RefPicList( iRefList ), iRefIndex );
+        
+        if( pcPic->cs->picHeader->getEnableTMVPFlag() && pcSlice->getColFromL0Flag() == bool(1 - iRefList) && pcSlice->getColRefIdx() == iRefIndex )
+        {
+          if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS )
+            msg( NOTICE, "%dc(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) );
+          else
+            msg( NOTICE, "%dc ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) );
+        }
+        else
+        {
+          if ( scaleRatio.first != 1 << SCALE_RATIO_BITS || scaleRatio.second != 1 << SCALE_RATIO_BITS )
+            msg( NOTICE, "%d(%1.2lfx, %1.2lfx) ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ), double( scaleRatio.first ) / ( 1 << SCALE_RATIO_BITS ), double( scaleRatio.second ) / ( 1 << SCALE_RATIO_BITS ) );
+          else
+            msg( NOTICE, "%d ", pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) );
+        }
+
+        if( pcSlice->getRefPOC( RefPicList( iRefList ), iRefIndex ) == pcSlice->getPOC() )
+        {
+          msg( NOTICE, ".%d", pcSlice->getRefPic( RefPicList( iRefList ), iRefIndex )->layerId );
+        }
+
+        msg( NOTICE, " " );
       }
       msg( NOTICE, "]" );
     }
+    if( m_pcEncLib->isRPREnabled() )
+    {
+      msg( NOTICE, "\nPSNR2: [Y %6.4lf dB    U %6.4lf dB    V %6.4lf dB]", upscaledPSNR[COMPONENT_Y], upscaledPSNR[COMPONENT_Cb], upscaledPSNR[COMPONENT_Cr] );
+    }
   }
   else if( g_verbosity >= INFO )
   {
@@ -3449,6 +4145,103 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
   }
 }
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+void EncGOP::xCalculateHDRMetrics( Picture* pcPic, double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE])
+{
+  copyBuftoFrame(pcPic);
+
+  ChromaFormat chFmt =  pcPic->chromaFormat;
+
+  if (chFmt != CHROMA_444)
+  {
+    m_pcConvertFormat->process(m_ppcFrameOrg[1], m_ppcFrameOrg[0]);
+    m_pcConvertFormat->process(m_ppcFrameRec[1], m_ppcFrameRec[0]);
+  }
+
+  m_pcConvertIQuantize->process(m_ppcFrameOrg[2], m_ppcFrameOrg[1]);
+  m_pcConvertIQuantize->process(m_ppcFrameRec[2], m_ppcFrameRec[1]);
+
+  m_pcColorTransform->process(m_ppcFrameOrg[3], m_ppcFrameOrg[2]);
+  m_pcColorTransform->process(m_ppcFrameRec[3], m_ppcFrameRec[2]);
+
+  m_pcTransferFct->forward(m_ppcFrameOrg[4], m_ppcFrameOrg[3]);
+  m_pcTransferFct->forward(m_ppcFrameRec[4], m_ppcFrameRec[3]);
+
+  // Calculate the Metrics
+  m_pcDistortionDeltaE->computeMetric(m_ppcFrameOrg[4], m_ppcFrameRec[4]);
+
+  *deltaE = m_pcDistortionDeltaE->getDeltaE();
+  *psnrL  = m_pcDistortionDeltaE->getPsnrL();
+
+}
+
+void EncGOP::copyBuftoFrame( Picture* pcPic )
+{
+  int cropOffsetLeft   = m_pcCfg->getCropOffsetLeft();
+  int cropOffsetTop    = m_pcCfg->getCropOffsetTop();
+  int cropOffsetRight  = m_pcCfg->getCropOffsetRight();
+  int cropOffsetBottom = m_pcCfg->getCropOffsetBottom();
+
+  int height = pcPic->getOrigBuf(COMPONENT_Y).height - cropOffsetLeft + cropOffsetRight;
+  int width = pcPic->getOrigBuf(COMPONENT_Y).width - cropOffsetTop + cropOffsetBottom;
+
+  ChromaFormat chFmt =  pcPic->chromaFormat;
+
+  Pel* pOrg = pcPic->getOrigBuf(COMPONENT_Y).buf;
+  Pel* pRec = pcPic->getRecoBuf(COMPONENT_Y).buf;
+
+  uint16_t* yOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Y_COMP];
+  uint16_t* yRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Y_COMP];
+  uint16_t* uOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Cb_COMP];
+  uint16_t* uRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Cb_COMP];
+  uint16_t* vOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Cr_COMP];
+  uint16_t* vRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Cr_COMP];
+
+  if(chFmt == CHROMA_444){
+    yOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Y_COMP];
+    yRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Y_COMP];
+    uOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Cb_COMP];
+    uRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Cb_COMP];
+    vOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Cr_COMP];
+    vRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Cr_COMP];
+  }
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      yOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Y).stride + j + cropOffsetLeft]);
+      yRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Y).stride + j + cropOffsetLeft]);
+    }
+  }
+
+  if (chFmt != CHROMA_444) {
+    height >>= 1;
+    width  >>= 1;
+    cropOffsetLeft >>= 1;
+    cropOffsetTop >>= 1;
+  }
+
+  pOrg = pcPic->getOrigBuf(COMPONENT_Cb).buf;
+  pRec = pcPic->getRecoBuf(COMPONENT_Cb).buf;
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      uOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]);
+      uRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]);
+    }
+  }
+
+  pOrg = pcPic->getOrigBuf(COMPONENT_Cr).buf;
+  pRec = pcPic->getRecoBuf(COMPONENT_Cr).buf;
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      vOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]);
+      vRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]);
+    }
+  }
+}
+#endif
+
 void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField,
                                           PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField,
                                           const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y
@@ -3501,7 +4294,7 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture*
     {
       CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error");
 #if ENABLE_QPA
-      uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format) );
+      uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format), ::getComponentScaleY(ch, format) );
 #else
       uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), 0 );
 #endif
@@ -3517,12 +4310,13 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture*
 
   //===== add PSNR =====
   m_gcAnalyzeAll_in.addResult (dPSNR, (double)uibits, MSEyuvframe
+    , MSEyuvframe
     , isEncodeLtRef
   );
 
   *PSNR_Y = dPSNR[COMPONENT_Y];
 
-  msg( DETAILS, "\n                                      Interlaced frame %d: [Y %6.4lf dB    U %6.4lf dB    V %6.4lf dB]", pcPicOrgSecondField->getPOC()/2 , dPSNR[COMPONENT_Y], dPSNR[COMPONENT_Cb], dPSNR[COMPONENT_Cr] );
+  msg( INFO, "\n                                      Interlaced frame %d: [Y %6.4lf dB    U %6.4lf dB    V %6.4lf dB]", pcPicOrgSecondField->getPOC()/2, dPSNR[COMPONENT_Y], dPSNR[COMPONENT_Cb], dPSNR[COMPONENT_Cr] );
   if (printFrameMSE)
   {
     msg( DETAILS, " [Y MSE %6.4lf  U MSE %6.4lf  V MSE %6.4lf]", MSEyuvframe[COMPONENT_Y], MSEyuvframe[COMPONENT_Cb], MSEyuvframe[COMPONENT_Cr] );
@@ -3545,17 +4339,13 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField)
 {
   if (pocCurr == 0)
   {
-    return NAL_UNIT_CODED_SLICE_IDR_W_RADL;
+    return NAL_UNIT_CODED_SLICE_IDR_N_LP;
   }
 
   if (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == (m_pcCfg->getUseCompositeRef() ? 2: 1))
   {
     // to avoid the picture becoming an IRAP
-#if !JVET_M0101_HLS
-    return NAL_UNIT_CODED_SLICE_TRAIL_R;
-#else
     return NAL_UNIT_CODED_SLICE_TRAIL;
-#endif
   }
 
   if (m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % (m_pcCfg->getIntraPeriod() * (m_pcCfg->getUseCompositeRef() ? 2 : 1)) == 0)
@@ -3578,29 +4368,17 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField)
       // picture can be still decodable when random accessing to a CRA/CRANT/BLA/BLANT picture by
       // controlling the reference pictures used for encoding that leading picture. Such a leading
       // picture need not be marked as a TFD picture.
-#if !JVET_M0101_HLS
-      return NAL_UNIT_CODED_SLICE_RASL_R;
-#else
       return NAL_UNIT_CODED_SLICE_RASL;
-#endif
     }
   }
   if (lastIDR>0)
   {
     if (pocCurr < lastIDR)
     {
-#if !JVET_M0101_HLS
-      return NAL_UNIT_CODED_SLICE_RADL_R;
-#else
       return NAL_UNIT_CODED_SLICE_RADL;
-#endif
     }
   }
-#if !JVET_M0101_HLS
-  return NAL_UNIT_CODED_SLICE_TRAIL_R;
-#else
   return NAL_UNIT_CODED_SLICE_TRAIL;
-#endif
 }
 
 void EncGOP::xUpdateRasInit(Slice* slice)
@@ -3682,112 +4460,6 @@ void EncGOP::xAttachSliceDataToNalUnit (OutputNALUnit& rNalu, OutputBitstream* c
   codedSliceData->clear();
 }
 
-// Function will arrange the long-term pictures in the decreasing order of poc_lsb_lt,
-// and among the pictures with the same lsb, it arranges them in increasing delta_poc_msb_cycle_lt value
-void EncGOP::arrangeLongtermPicturesInRPS(Slice *pcSlice, PicList& rcListPic)
-{
-  if(pcSlice->getRPS()->getNumberOfLongtermPictures() == 0)
-  {
-    return;
-  }
-  // we can only modify the local RPS!
-  CHECK(!(pcSlice->getRPSidx()==-1), "Unspecified error");
-  ReferencePictureSet *rps = pcSlice->getLocalRPS();
-
-  // Arrange long-term reference pictures in the correct order of LSB and MSB,
-  // and assign values for pocLSBLT and MSB present flag
-  int longtermPicsPoc[MAX_NUM_REF_PICS], longtermPicsLSB[MAX_NUM_REF_PICS], indices[MAX_NUM_REF_PICS];
-  int longtermPicsMSB[MAX_NUM_REF_PICS];
-  bool mSBPresentFlag[MAX_NUM_REF_PICS];
-  ::memset(longtermPicsPoc, 0, sizeof(longtermPicsPoc));    // Store POC values of LTRP
-  ::memset(longtermPicsLSB, 0, sizeof(longtermPicsLSB));    // Store POC LSB values of LTRP
-  ::memset(longtermPicsMSB, 0, sizeof(longtermPicsMSB));    // Store POC LSB values of LTRP
-  ::memset(indices        , 0, sizeof(indices));            // Indices to aid in tracking sorted LTRPs
-  ::memset(mSBPresentFlag , 0, sizeof(mSBPresentFlag));     // Indicate if MSB needs to be present
-
-  // Get the long-term reference pictures
-  int offset = rps->getNumberOfNegativePictures() + rps->getNumberOfPositivePictures();
-  int i, ctr = 0;
-  int maxPicOrderCntLSB = 1 << pcSlice->getSPS()->getBitsForPOC();
-  for(i = rps->getNumberOfPictures() - 1; i >= offset; i--, ctr++)
-  {
-    longtermPicsPoc[ctr] = rps->getPOC(i);                                  // LTRP POC
-    longtermPicsLSB[ctr] = getLSB(longtermPicsPoc[ctr], maxPicOrderCntLSB); // LTRP POC LSB
-    indices[ctr]      = i;
-    longtermPicsMSB[ctr] = longtermPicsPoc[ctr] - longtermPicsLSB[ctr];
-  }
-  int numLongPics = rps->getNumberOfLongtermPictures();
-  CHECK(!(ctr == numLongPics), "Unspecified error");
-
-  // Arrange pictures in decreasing order of MSB;
-  for(i = 0; i < numLongPics; i++)
-  {
-    for(int j = 0; j < numLongPics - 1; j++)
-    {
-      if(longtermPicsMSB[j] < longtermPicsMSB[j+1])
-      {
-        std::swap(longtermPicsPoc[j], longtermPicsPoc[j+1]);
-        std::swap(longtermPicsLSB[j], longtermPicsLSB[j+1]);
-        std::swap(longtermPicsMSB[j], longtermPicsMSB[j+1]);
-        std::swap(indices[j]        , indices[j+1]        );
-      }
-    }
-  }
-
-  for(i = 0; i < numLongPics; i++)
-  {
-    // Check if MSB present flag should be enabled.
-    // Check if the buffer contains any pictures that have the same LSB.
-    PicList::iterator  iterPic = rcListPic.begin();
-    Picture*                      pcPic;
-    while ( iterPic != rcListPic.end() )
-    {
-      pcPic = *iterPic;
-      if( (getLSB(pcPic->getPOC(), maxPicOrderCntLSB) == longtermPicsLSB[i])   &&     // Same LSB
-                                      (pcPic->referenced)     &&    // Reference picture
-                                        (pcPic->getPOC() != longtermPicsPoc[i])    )  // Not the LTRP itself
-      {
-        mSBPresentFlag[i] = true;
-        break;
-      }
-      iterPic++;
-    }
-  }
-
-  // tempArray for usedByCurr flag
-  bool tempArray[MAX_NUM_REF_PICS]; ::memset(tempArray, 0, sizeof(tempArray));
-  for(i = 0; i < numLongPics; i++)
-  {
-    tempArray[i] = rps->getUsed(indices[i]);
-  }
-  // Now write the final values;
-  ctr = 0;
-  int currMSB = 0, currLSB = 0;
-  // currPicPoc = currMSB + currLSB
-  currLSB = getLSB(pcSlice->getPOC(), maxPicOrderCntLSB);
-  currMSB = pcSlice->getPOC() - currLSB;
-
-  for(i = rps->getNumberOfPictures() - 1; i >= offset; i--, ctr++)
-  {
-    rps->setPOC                   (i, longtermPicsPoc[ctr]);
-    rps->setDeltaPOC              (i, - pcSlice->getPOC() + longtermPicsPoc[ctr]);
-    rps->setUsed                  (i, tempArray[ctr]);
-    rps->setPocLSBLT              (i, longtermPicsLSB[ctr]);
-    rps->setDeltaPocMSBCycleLT    (i, (currMSB - (longtermPicsPoc[ctr] - longtermPicsLSB[ctr])) / maxPicOrderCntLSB);
-    rps->setDeltaPocMSBPresentFlag(i, mSBPresentFlag[ctr]);
-
-    CHECK(!(rps->getDeltaPocMSBCycleLT(i) >= 0), "Unspecified error");   // Non-negative value
-  }
-  for(i = rps->getNumberOfPictures() - 1, ctr = 1; i >= offset; i--, ctr++)
-  {
-    for(int j = rps->getNumberOfPictures() - 1 - ctr; j >= offset; j--)
-    {
-      // Here at the encoder we know that we have set the full POC value for the LTRPs, hence we
-      // don't have to check the MSB present flag values for this constraint.
-      CHECK(!( rps->getPOC(i) != rps->getPOC(j) ), "Unspecified error"); // If assert fails, LTRP entry repeated in RPS!!!
-    }
-  }
-}
 
 void EncGOP::arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr)
 {
@@ -4014,11 +4686,7 @@ void EncGOP::applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices )
 
   Pel* tempRec = Rec;
   const Slice* pcSlice = pcPic->slices[0];
-#if MAX_TB_SIZE_SIGNALLING
   const uint32_t log2maxTB = pcSlice->getSPS()->getLog2MaxTbSize();
-#else
-  const uint32_t log2maxTB = MAX_TB_LOG2_SIZEY;
-#endif
   const uint32_t maxTBsize = (1<<log2maxTB);
   const uint32_t minBlockArtSize = 8;
   const uint32_t noCol = (picWidth>>log2maxTB);
@@ -4262,4 +4930,273 @@ void EncGOP::applyDeblockingFilterParameterSelection( Picture* pcPic, const uint
   }
 }
 #endif
+
+void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicList& rcListPic, const ReferencePictureList *rpl0, const ReferencePictureList *rpl1 )
+{
+  Picture* rpcPic;
+  int pocCycle = 0;
+
+  Picture* pic = slice->getPic();
+  const VPS* vps = slice->getPic()->cs->vps;
+  int layerIdx = vps == nullptr ? 0 : vps->getGeneralLayerIdx( pic->layerId );
+
+  ReferencePictureList* pLocalRPL0 = slice->getLocalRPL0();
+  *pLocalRPL0 = ReferencePictureList( slice->getSPS()->getInterLayerPresentFlag() );
+
+  uint32_t numOfSTRPL0 = 0;
+  uint32_t numOfLTRPL0 = 0;
+  uint32_t numOfILRPL0 = 0;
+  uint32_t numOfRefPic = rpl0->getNumberOfShorttermPictures() + rpl0->getNumberOfLongtermPictures();
+  uint32_t refPicIdxL0 = 0;
+
+  for( int ii = 0; ii < numOfRefPic; ii++ )
+  {
+    // loop through all pictures in the reference picture buffer
+    PicList::iterator iterPic = rcListPic.begin();
+    bool isAvailable = false;
+
+    pocCycle = 1 << ( slice->getSPS()->getBitsForPOC() );
+    while( iterPic != rcListPic.end() )
+    {
+      rpcPic = *( iterPic++ );
+
+      if( rpcPic->layerId == pic->layerId )
+      {
+        if( !rpl0->isRefPicLongterm( ii ) && rpcPic->referenced && rpcPic->getPOC() == slice->getPOC() - rpl0->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) )
+        {
+          isAvailable = true;
+          break;
+        }
+        else if( rpl0->isRefPicLongterm( ii ) && rpcPic->referenced && ( rpcPic->getPOC() & ( pocCycle - 1 ) ) == rpl0->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) )
+        {
+          isAvailable = true;
+          break;
+        }
+      }
+    }
+
+    if( isAvailable )
+    {
+      pLocalRPL0->setRefPicIdentifier( refPicIdxL0, rpl0->getRefPicIdentifier( ii ), rpl0->isRefPicLongterm( ii ), false, NOT_VALID );
+      refPicIdxL0++;
+      numOfSTRPL0 = numOfSTRPL0 + ( ( rpl0->isRefPicLongterm( ii ) ) ? 0 : 1 );
+      numOfLTRPL0 += ( rpl0->isRefPicLongterm( ii ) && !rpl0->isInterLayerRefPic( ii ) ) ? 1 : 0;
+      isAvailable = false;
+    }
+  }
+
+  // inter-layer reference pictures are added to the end of the reference picture list
+  if( layerIdx && vps && !vps->getAllIndependentLayersFlag() )
+  {
+    numOfRefPic = rpl0->getNumberOfInterLayerPictures() ? rpl0->getNumberOfInterLayerPictures() : m_pcEncLib->getNumRefLayers( layerIdx );
+
+    for( int ii = 0; ii < numOfRefPic; ii++ )
+    {
+      // loop through all pictures in the reference picture buffer
+      PicList::iterator iterPic = rcListPic.begin();
+
+      while( iterPic != rcListPic.end() )
+      {
+        rpcPic = *( iterPic++ );
+        int refLayerIdx = vps->getGeneralLayerIdx( rpcPic->layerId );
+
+        if( rpcPic->referenced && rpcPic->getPOC() == pic->getPOC() && vps->getDirectRefLayerFlag( layerIdx, refLayerIdx ) )
+        {          
+          pLocalRPL0->setRefPicIdentifier( refPicIdxL0, 0, true, true, vps->getInterLayerRefIdc( layerIdx, refLayerIdx ) );
+          refPicIdxL0++;
+          numOfILRPL0++;
+          break;
+        }
+      }
+    }
+  }
+
+  if( slice->getEnableDRAPSEI() )
+  {
+    pLocalRPL0->setNumberOfShorttermPictures( numOfSTRPL0 );
+    pLocalRPL0->setNumberOfLongtermPictures( numOfLTRPL0 );
+    pLocalRPL0->setNumberOfInterLayerPictures( numOfILRPL0 );
+
+    if( !slice->isIRAP() && !slice->isPOCInRefPicList( pLocalRPL0, slice->getAssociatedIRAPPOC() ) )
+    {
+      if( slice->getUseLTforDRAP() && !slice->isPOCInRefPicList( rpl1, slice->getAssociatedIRAPPOC() ) )
+      {
+        // Adding associated IRAP as longterm picture
+        pLocalRPL0->setRefPicIdentifier( refPicIdxL0, slice->getAssociatedIRAPPOC(), true, false, 0 );
+        refPicIdxL0++;
+        numOfLTRPL0++;
+      }
+      else
+      {
+        // Adding associated IRAP as shortterm picture
+        pLocalRPL0->setRefPicIdentifier( refPicIdxL0, slice->getPOC() - slice->getAssociatedIRAPPOC(), false, false, 0 );
+        refPicIdxL0++;
+        numOfSTRPL0++;
+      }
+    }
+  }
+
+  ReferencePictureList* pLocalRPL1 = slice->getLocalRPL1();
+  *pLocalRPL1 = ReferencePictureList( slice->getSPS()->getInterLayerPresentFlag() );
+
+  uint32_t numOfSTRPL1 = 0;
+  uint32_t numOfLTRPL1 = 0;
+  uint32_t numOfILRPL1 = 0;
+  numOfRefPic = rpl1->getNumberOfShorttermPictures() + rpl1->getNumberOfLongtermPictures();
+  uint32_t refPicIdxL1 = 0;
+
+  for( int ii = 0; ii < numOfRefPic; ii++ )
+  {
+    // loop through all pictures in the reference picture buffer
+    PicList::iterator iterPic = rcListPic.begin();
+    bool isAvailable = false;
+    pocCycle = 1 << ( slice->getSPS()->getBitsForPOC() );
+    while( iterPic != rcListPic.end() )
+    {
+      rpcPic = *( iterPic++ );
+
+      if( rpcPic->layerId == pic->layerId )
+      {
+        if( !rpl1->isRefPicLongterm( ii ) && rpcPic->referenced && rpcPic->getPOC() == slice->getPOC() - rpl1->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) )
+        {
+          isAvailable = true;
+          break;
+        }
+        else if( rpl1->isRefPicLongterm( ii ) && rpcPic->referenced && ( rpcPic->getPOC() & ( pocCycle - 1 ) ) == rpl1->getRefPicIdentifier( ii ) && !slice->isPocRestrictedByDRAP( rpcPic->getPOC(), rpcPic->precedingDRAP ) )
+        {
+          isAvailable = true;
+          break;
+        }
+      }      
+    }
+
+    if( isAvailable )
+    {
+      pLocalRPL1->setRefPicIdentifier( refPicIdxL1, rpl1->getRefPicIdentifier( ii ), rpl1->isRefPicLongterm( ii ), false, NOT_VALID );
+      refPicIdxL1++;
+      numOfSTRPL1 = numOfSTRPL1 + ( ( rpl1->isRefPicLongterm( ii ) ) ? 0 : 1 );
+      numOfLTRPL1 += ( rpl1->isRefPicLongterm( ii ) && !rpl1->isInterLayerRefPic( ii ) ) ? 1 : 0;
+      isAvailable = false;
+    }
+  }
+
+  
+  // inter-layer reference pictures are added to the end of the reference picture list
+  if( layerIdx && vps && !vps->getAllIndependentLayersFlag() )
+  {
+    numOfRefPic = rpl1->getNumberOfInterLayerPictures() ? rpl1->getNumberOfInterLayerPictures() : m_pcEncLib->getNumRefLayers( layerIdx );
+
+    for( int ii = 0; ii < numOfRefPic; ii++ )
+    {
+      // loop through all pictures in the reference picture buffer
+      PicList::iterator iterPic = rcListPic.begin();
+
+      while( iterPic != rcListPic.end() )
+      {
+        rpcPic = *( iterPic++ );
+        int refLayerIdx = vps->getGeneralLayerIdx( rpcPic->layerId );
+
+        if( rpcPic->referenced && rpcPic->getPOC() == pic->getPOC() && vps->getDirectRefLayerFlag( layerIdx, refLayerIdx ) )
+        {
+          pLocalRPL1->setRefPicIdentifier( refPicIdxL1, 0, true, true, vps->getInterLayerRefIdc( layerIdx, refLayerIdx ) );
+          refPicIdxL1++;
+          numOfILRPL1++;
+          break;
+        }
+      }
+    }
+  }
+
+  //Copy from L1 if we have less than active ref pic
+  int numOfNeedToFill = rpl0->getNumberOfActivePictures() - (numOfLTRPL0 + numOfSTRPL0);
+  bool isDisallowMixedRefPic = ( slice->getSPS()->getAllActiveRplEntriesHasSameSignFlag() ) ? true : false;
+  int originalL0StrpNum = numOfSTRPL0;
+  int originalL0LtrpNum = numOfLTRPL0;
+  int originalL0IlrpNum = numOfILRPL0;
+
+  for( int ii = 0; numOfNeedToFill > 0 && ii < ( pLocalRPL1->getNumberOfLongtermPictures() + pLocalRPL1->getNumberOfShorttermPictures() + pLocalRPL1->getNumberOfInterLayerPictures() ); ii++ )
+  {
+    if( ii <= ( numOfLTRPL1 + numOfSTRPL1 + numOfILRPL1 - 1 ) )
+    {
+      //Make sure this copy is not already in L0
+      bool canIncludeThis = true;
+      for( int jj = 0; jj < refPicIdxL0; jj++ )
+      {
+        if( ( pLocalRPL1->getRefPicIdentifier( ii ) == pLocalRPL0->getRefPicIdentifier( jj ) ) && ( pLocalRPL1->isRefPicLongterm( ii ) == pLocalRPL0->isRefPicLongterm( jj ) ) && pLocalRPL1->getInterLayerRefPicIdx( ii ) == pLocalRPL0->getInterLayerRefPicIdx( jj ) )
+        {
+          canIncludeThis = false;
+        }
+
+        bool sameSign = ( pLocalRPL1->getRefPicIdentifier( ii ) > 0 ) == ( pLocalRPL0->getRefPicIdentifier( 0 ) > 0 );
+
+        if( isDisallowMixedRefPic && canIncludeThis && !pLocalRPL1->isRefPicLongterm( ii ) && !sameSign )
+        {
+          canIncludeThis = false;
+        }
+      }
+      if( canIncludeThis )
+      {
+        pLocalRPL0->setRefPicIdentifier( refPicIdxL0, pLocalRPL1->getRefPicIdentifier( ii ), pLocalRPL1->isRefPicLongterm( ii ), pLocalRPL1->isInterLayerRefPic( ii ), pLocalRPL1->getInterLayerRefPicIdx( ii ) );
+        refPicIdxL0++;
+        numOfSTRPL0 = numOfSTRPL0 + ( ( pLocalRPL1->isRefPicLongterm( ii ) ) ? 0 : 1 );
+        numOfLTRPL0 += ( pLocalRPL1->isRefPicLongterm( ii ) && !pLocalRPL1->isInterLayerRefPic( ii ) ) ? 1 : 0;
+        numOfILRPL0 += pLocalRPL1->isInterLayerRefPic( ii ) ? 1 : 0;
+        numOfNeedToFill--;
+      }
+    }
+  }
+  pLocalRPL0->setNumberOfLongtermPictures( numOfLTRPL0 );
+  pLocalRPL0->setNumberOfShorttermPictures( numOfSTRPL0 );
+  pLocalRPL0->setNumberOfInterLayerPictures( numOfILRPL0 );
+  int numPics = numOfLTRPL0 + numOfSTRPL0;
+
+  pLocalRPL0->setNumberOfActivePictures( ( numPics < rpl0->getNumberOfActivePictures() ? numPics : rpl0->getNumberOfActivePictures() ) + numOfILRPL0 );
+  pLocalRPL0->setLtrpInSliceHeaderFlag( rpl0->getLtrpInSliceHeaderFlag() );
+  slice->setRPL0idx( -1 );
+  slice->setRPL0( pLocalRPL0 );
+
+  //Copy from L0 if we have less than active ref pic
+  numOfNeedToFill = pLocalRPL0->getNumberOfActivePictures() - ( numOfLTRPL1 + numOfSTRPL1 );
+
+  for( int ii = 0; numOfNeedToFill > 0 && ii < ( pLocalRPL0->getNumberOfLongtermPictures() + pLocalRPL0->getNumberOfShorttermPictures() + pLocalRPL0->getNumberOfInterLayerPictures() ); ii++ )
+  {
+    if( ii <= ( originalL0StrpNum + originalL0LtrpNum + originalL0IlrpNum - 1 ) )
+    {
+      //Make sure this copy is not already in L0
+      bool canIncludeThis = true;
+      for( int jj = 0; jj < refPicIdxL1; jj++ )
+      {
+        if( ( pLocalRPL0->getRefPicIdentifier( ii ) == pLocalRPL1->getRefPicIdentifier( jj ) ) && ( pLocalRPL0->isRefPicLongterm( ii ) == pLocalRPL1->isRefPicLongterm( jj ) ) && pLocalRPL0->getInterLayerRefPicIdx( ii ) == pLocalRPL1->getInterLayerRefPicIdx( jj ) )
+        {
+          canIncludeThis = false;
+        }
+
+        bool sameSign = ( pLocalRPL0->getRefPicIdentifier( ii ) > 0 ) == ( pLocalRPL1->getRefPicIdentifier( 0 ) > 0 );
+
+        if( isDisallowMixedRefPic && canIncludeThis && !pLocalRPL0->isRefPicLongterm( ii ) && !sameSign )
+        {
+          canIncludeThis = false;
+        }
+      }
+      if( canIncludeThis )
+      {
+        pLocalRPL1->setRefPicIdentifier( refPicIdxL1, pLocalRPL0->getRefPicIdentifier( ii ), pLocalRPL0->isRefPicLongterm( ii ), pLocalRPL0->isInterLayerRefPic( ii ), pLocalRPL0->getInterLayerRefPicIdx( ii ) );
+        refPicIdxL1++;
+        numOfSTRPL1 = numOfSTRPL1 + ( ( pLocalRPL0->isRefPicLongterm( ii ) ) ? 0 : 1 );
+        numOfLTRPL1 += ( pLocalRPL0->isRefPicLongterm( ii ) && !pLocalRPL0->isInterLayerRefPic( ii ) ) ? 1 : 0;
+        numOfLTRPL1 += pLocalRPL0->isInterLayerRefPic( ii ) ? 1 : 0;
+        numOfNeedToFill--;
+      }
+    }
+  }
+  pLocalRPL1->setNumberOfLongtermPictures( numOfLTRPL1 );
+  pLocalRPL1->setNumberOfShorttermPictures( numOfSTRPL1 );
+  pLocalRPL1->setNumberOfInterLayerPictures( numOfILRPL1 );
+  numPics = numOfLTRPL1 + numOfSTRPL1;
+
+  pLocalRPL1->setNumberOfActivePictures( ( isDisallowMixedRefPic ? numPics : ( numPics < rpl1->getNumberOfActivePictures() ? numPics : rpl1->getNumberOfActivePictures() ) ) + numOfILRPL1 );
+  pLocalRPL1->setLtrpInSliceHeaderFlag( rpl1->getLtrpInSliceHeaderFlag() );
+  slice->setRPL1idx( -1 );
+  slice->setRPL1( pLocalRPL1 );
+}
 //! \}
diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h
index f0c85ca4d8afe14efe94d85bf611499c3565564f..36d28e5145f3f48c470e2208b5905491ed4754ee 100644
--- a/source/Lib/EncoderLib/EncGOP.h
+++ b/source/Lib/EncoderLib/EncGOP.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,6 +60,16 @@
 #include "Analyze.h"
 #include "RateCtrl.h"
 #include <vector>
+#include "EncHRD.h"
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+#include "HDRLib/inc/ConvertColorFormat.H"
+#include "HDRLib/inc/Convert.H"
+#include "HDRLib/inc/ColorTransform.H"
+#include "HDRLib/inc/TransferFunction.H"
+#include "HDRLib/inc/DistortionMetricDeltaE.H"
+#include <chrono>
+#endif
 
 //! \ingroup EncoderLib
 //! \{
@@ -117,6 +127,7 @@ private:
   int                     m_iNumPicCoded;
   bool                    m_bFirst;
   int                     m_iLastRecoveryPicPOC;
+  int                     m_latestDRAPPOC;
   int                     m_lastRasPoc;
 
   //  Access channel
@@ -145,6 +156,8 @@ private:
   // indicate sequence first
   bool                    m_bSeqFirst;
 
+  EncHRD*                 m_HRD;
+
   // clean decoding refresh
   bool                    m_bRefreshPending;
   int                     m_pocCRA;
@@ -152,8 +165,9 @@ private:
   int                     m_associatedIRAPPOC;
 
   std::vector<int>        m_vRVM_RP;
-  uint32_t                    m_lastBPSEI;
-  uint32_t                    m_totalCoded;
+  uint32_t                    m_lastBPSEI[MAX_TLAYER];
+  uint32_t                    m_totalCoded[MAX_TLAYER];
+  bool                        m_rapWithLeading;
   bool                    m_bufferingPeriodSEIPresentInAU;
   SEIEncoder              m_seiEncoder;
 #if W0038_DB_OPT
@@ -169,6 +183,23 @@ private:
 
   AUWriterIf*             m_AUWriterIf;
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+
+  hdrtoolslib::Frame **m_ppcFrameOrg;
+  hdrtoolslib::Frame **m_ppcFrameRec;
+
+  hdrtoolslib::ConvertColorFormat     *m_pcConvertFormat;
+  hdrtoolslib::Convert                *m_pcConvertIQuantize;
+  hdrtoolslib::ColorTransform         *m_pcColorTransform;
+  hdrtoolslib::DistortionMetricDeltaE *m_pcDistortionDeltaE;
+  hdrtoolslib::TransferFunction       *m_pcTransferFct;
+
+  hdrtoolslib::ColorTransformParams   *m_pcColorTransformParams;
+  hdrtoolslib::FrameFormat            *m_pcFrameFormat;
+
+  std::chrono::duration<long long, ratio<1, 1000000000>> m_metricTime;
+#endif
+
 public:
   EncGOP();
   virtual ~EncGOP();
@@ -177,9 +208,11 @@ public:
   void  destroy     ();
 
   void  init        ( EncLib* pcEncLib );
+
   void  compressGOP ( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRec,
                       bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE
                     , bool isEncodeLtRef
+                    , const int picIdInGOP
   );
   void  xAttachSliceDataToNalUnit (OutputNALUnit& rNalu, OutputBitstream* pcBitstreamRedirect);
 
@@ -202,13 +235,12 @@ public:
   void      setLastLTRefPoc(int iLastLTRefPoc) { m_lastLTRefPoc = iLastLTRefPoc; }
   int       getLastLTRefPoc() const { return m_lastLTRefPoc; }
 
-  void  printOutSummary      ( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths );
+  void  printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths );
 #if W0038_DB_OPT
   uint64_t  preLoopFilterPicAndCalcDist( Picture* pcPic );
 #endif
   EncSlice*  getSliceEncoder()   { return m_pcSliceEncoder; }
   NalUnitType getNalUnitType( int pocCurr, int lastIdr, bool isField );
-  void arrangeLongtermPicturesInRPS(Slice *, PicList& );
   void arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr);
   void updateCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr);
 
@@ -218,6 +250,9 @@ public:
   Analyze& getAnalyzePData() { return m_gcAnalyzeP; }
   Analyze& getAnalyzeBData() { return m_gcAnalyzeB; }
 #endif
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  std::chrono::duration<long long, ratio<1, 1000000000>> getMetricTime()    const { return m_metricTime; };
+#endif
 
 protected:
   RateCtrl* getRateCtrl()       { return m_pcRateCtrl;  }
@@ -227,9 +262,17 @@ protected:
   void  xInitGOP          ( int iPOCLast, int iNumPicRcvd, bool isField
     , bool isEncodeLtRef
   );
+  void  xPicInitHashME( Picture *pic, const PPS *pps, PicList &rcListPic );
+  void  xPicInitRateControl(int &estimatedBits, int gopId, double &lambda, Picture *pic, Slice *slice);
+  void  xPicInitLMCS       (Picture *pic, PicHeader *picHeader, Slice *slice);
   void  xGetBuffer        ( PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                             int iNumPicRcvd, int iTimeOffset, Picture*& rpcPic, int pocCurr, bool isField );
 
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  void xCalculateHDRMetrics ( Picture* pcPic, double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE]);
+  void copyBuftoFrame       ( Picture* pcPic );
+#endif
+
   void  xCalculateAddPSNRs(const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y
     , bool isEncodeLtRef
   );
@@ -244,7 +287,7 @@ protected:
 
   uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift
 #if ENABLE_QPA
-                            , const uint32_t chromaShift = 0
+                            , const uint32_t chromaShiftHor = 0, const uint32_t chromaShiftVer = 0
 #endif
                              );
 #if WCG_WPSNR
@@ -258,12 +301,15 @@ protected:
 
   void xCreateIRAPLeadingSEIMessages (SEIMessages& seiMessages, const SPS *sps, const PPS *pps);
   void xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, Slice *slice);
+  void xCreateFrameFieldInfoSEI (SEIMessages& seiMessages, Slice *slice, bool isField);
   void xCreatePictureTimingSEI  (int IRAPGOPid, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, SEIMessages& duInfoSeiMessages, Slice *slice, bool isField, std::deque<DUData> &duData);
   void xUpdateDuData(AccessUnit &testAU, std::deque<DUData> &duData);
   void xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUData> &duData, const SPS *sps);
-  void xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI);
+  void xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI, int maxSubLayers);
 
+#if HEVC_SEI
   void xCreateScalableNestingSEI (SEIMessages& seiMessages, SEIMessages& nestedSeiMessages);
+#endif
   void xWriteSEI (NalUnitType naluType, SEIMessages& seiMessages, AccessUnit &accessUnit, AccessUnit::iterator &auPos, int temporalId, const SPS *sps);
   void xWriteSEISeparately (NalUnitType naluType, SEIMessages& seiMessages, AccessUnit &accessUnit, AccessUnit::iterator &auPos, int temporalId, const SPS *sps);
   void xClearSEIs(SEIMessages& seiMessages, bool deleteMessages);
@@ -272,18 +318,19 @@ protected:
   void xWriteTrailingSEIMessages (SEIMessages& seiMessages, AccessUnit &accessUnit, int temporalId, const SPS *sps);
   void xWriteDuSEIMessages       (SEIMessages& duInfoSeiMessages, AccessUnit &accessUnit, int temporalId, const SPS *sps, std::deque<DUData> &duData);
 
-#if HEVC_VPS
   int xWriteVPS (AccessUnit &accessUnit, const VPS *vps);
-#endif
-  int xWriteSPS (AccessUnit &accessUnit, const SPS *sps);
-  int xWritePPS (AccessUnit &accessUnit, const PPS *pps);
-  int xWriteAPS(AccessUnit &accessUnit, APS *aps);
+  int xWriteDPS (AccessUnit &accessUnit, const DPS *dps);
+  int xWriteSPS( AccessUnit &accessUnit, const SPS *sps, const int layerId = 0 );
+  int xWritePPS( AccessUnit &accessUnit, const PPS *pps, const SPS *sps, const int layerId = 0 );
+  int xWriteAPS( AccessUnit &accessUnit, APS *aps, const int layerId, const bool isPrefixNUT );
   int xWriteParameterSets (AccessUnit &accessUnit, Slice *slice, const bool bSeqFirst);
+  int xWritePicHeader( AccessUnit &accessUnit, PicHeader *picHeader );
 
   void applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices );
 #if W0038_DB_OPT
   void applyDeblockingFilterParameterSelection( Picture* pcPic, const uint32_t numSlices, const int gopID );
 #endif
+  void xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicList& rcListPic, const ReferencePictureList *rpl0, const ReferencePictureList *rpl1 );
 };// END CLASS DEFINITION EncGOP
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncHRD.cpp b/source/Lib/EncoderLib/EncHRD.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..342e9a747a550583d5e7400289f140c7e23ee167
--- /dev/null
+++ b/source/Lib/EncoderLib/EncHRD.cpp
@@ -0,0 +1,190 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "EncHRD.h"
+
+#if U0132_TARGET_BITS_SATURATION
+
+// calculate scale value of bitrate and initial delay
+int EncHRD::xCalcScale(int x)
+{
+  if (x==0)
+  {
+    return 0;
+  }
+  uint32_t mask = 0xffffffff;
+  int scaleValue = 32;
+
+  while ((x&mask) != 0)
+  {
+    scaleValue--;
+    mask = (mask >> 1);
+  }
+
+  return scaleValue;
+}
+#endif
+
+void EncHRD::initHRDParameters (EncCfg* encCfg)
+{
+  bool useSubCpbParams = encCfg->getNoPicPartitionFlag() == false;
+  int  bitRate         = encCfg->getTargetBitrate();
+# if U0132_TARGET_BITS_SATURATION
+  int cpbSize          = encCfg->getCpbSize();
+  CHECK(!(cpbSize!=0), "Unspecified error");  // CPB size may not be equal to zero. ToDo: have a better default and check for level constraints
+  if( !encCfg->getHrdParametersPresentFlag() && !encCfg->getCpbSaturationEnabled() )
+#else
+  if( !encCfg->getHrdParametersPresentFlag() )
+#endif
+  {
+    return;
+  }
+
+  m_timingInfo.setTimingInfoPresentFlag( true );
+  switch( encCfg->getFrameRate() )
+  {
+  case 24:
+    m_timingInfo.setNumUnitsInTick( 1125000 );    m_timingInfo.setTimeScale    ( 27000000 );
+    break;
+  case 25:
+    m_timingInfo.setNumUnitsInTick( 1080000 );    m_timingInfo.setTimeScale    ( 27000000 );
+    break;
+  case 30:
+    m_timingInfo.setNumUnitsInTick( 900900 );     m_timingInfo.setTimeScale    ( 27000000 );
+    break;
+  case 50:
+    m_timingInfo.setNumUnitsInTick( 540000 );     m_timingInfo.setTimeScale    ( 27000000 );
+    break;
+  case 60:
+    m_timingInfo.setNumUnitsInTick( 450450 );     m_timingInfo.setTimeScale    ( 27000000 );
+    break;
+  default:
+    m_timingInfo.setNumUnitsInTick( 1001 );       m_timingInfo.setTimeScale    ( 60000 );
+    break;
+  }
+
+  if (encCfg->getTemporalSubsampleRatio()>1)
+  {
+    uint32_t temporalSubsampleRatio = encCfg->getTemporalSubsampleRatio();
+    if ( double(m_timingInfo.getNumUnitsInTick()) * temporalSubsampleRatio > std::numeric_limits<uint32_t>::max() )
+    {
+      m_timingInfo.setTimeScale( m_timingInfo.getTimeScale() / temporalSubsampleRatio );
+    }
+    else
+    {
+      m_timingInfo.setNumUnitsInTick( m_timingInfo.getNumUnitsInTick() * temporalSubsampleRatio );
+    }
+  }
+  bool rateCnt = ( bitRate > 0 );
+  m_hrdParams.setNalHrdParametersPresentFlag( rateCnt );
+  m_hrdParams.setVclHrdParametersPresentFlag( rateCnt );
+  useSubCpbParams &= ( m_hrdParams.getNalHrdParametersPresentFlag() || m_hrdParams.getVclHrdParametersPresentFlag() );
+  m_hrdParams.setGeneralDecodingUnitHrdParamsPresentFlag( useSubCpbParams );
+
+  if( m_hrdParams.getGeneralDecodingUnitHrdParamsPresentFlag() )
+  {
+    m_hrdParams.setTickDivisorMinus2( 100 - 2 );
+  }
+
+#if U0132_TARGET_BITS_SATURATION
+  if (xCalcScale(bitRate) <= 6)
+  {
+    m_hrdParams.setBitRateScale(0);
+  }
+  else
+  {
+    m_hrdParams.setBitRateScale(xCalcScale(bitRate) - 6);
+  }
+
+  if (xCalcScale(cpbSize) <= 4)
+  {
+    m_hrdParams.setCpbSizeScale(0);
+  }
+  else
+  {
+    m_hrdParams.setCpbSizeScale(xCalcScale(cpbSize) - 4);
+  }
+#else
+  m_hrdParams.setBitRateScale( 4 );                                       // in units of 2^( 6 + 4 ) = 1,024 bps
+  m_hrdParams.setCpbSizeScale( 6 );                                       // in units of 2^( 4 + 6 ) = 1,024 bit
+#endif
+
+  m_hrdParams.setCpbSizeDuScale( 6 );                                     // in units of 2^( 4 + 6 ) = 1,024 bit
+
+
+  // Note: parameters for all temporal layers are initialized with the same values
+  int i, j;
+  uint32_t bitrateValue, cpbSizeValue;
+  uint32_t duCpbSizeValue;
+  uint32_t duBitRateValue = 0;
+
+  for( i = 0; i < MAX_TLAYER; i ++ )
+  {
+    m_hrdParams.setFixedPicRateFlag( i, 1 );
+    m_hrdParams.setPicDurationInTcMinus1( i, 0 );
+    m_hrdParams.setLowDelayHrdFlag( i, 0 );
+    m_hrdParams.setCpbCntMinus1( i, 0 );
+
+    //! \todo check for possible PTL violations
+    // BitRate[ i ] = ( bit_rate_value_minus1[ i ] + 1 ) * 2^( 6 + bit_rate_scale )
+    bitrateValue = bitRate / (1 << (6 + m_hrdParams.getBitRateScale()) );      // bitRate is in bits, so it needs to be scaled down
+                                                                        // CpbSize[ i ] = ( cpb_size_value_minus1[ i ] + 1 ) * 2^( 4 + cpb_size_scale )
+#if U0132_TARGET_BITS_SATURATION
+    cpbSizeValue = cpbSize / (1 << (4 + m_hrdParams.getCpbSizeScale()) );      // using bitRate results in 1 second CPB size
+#else
+    cpbSizeValue = bitRate / (1 << (4 + m_hrdParams.getCpbSizeScale()) );      // using bitRate results in 1 second CPB size
+#endif
+
+
+                                                                        // DU CPB size could be smaller (i.e. bitrateValue / number of DUs), but we don't know
+                                                                        // in how many DUs the slice segment settings will result
+    duCpbSizeValue = bitrateValue;
+    duBitRateValue = cpbSizeValue;
+
+    for( j = 0; j < ( m_hrdParams.getCpbCntMinus1( i ) + 1 ); j ++ )
+    {
+      m_hrdParams.setBitRateValueMinus1( i, j, 0, ( bitrateValue - 1 ) );
+      m_hrdParams.setCpbSizeValueMinus1( i, j, 0, ( cpbSizeValue - 1 ) );
+      m_hrdParams.setDuCpbSizeValueMinus1( i, j, 0, ( duCpbSizeValue - 1 ) );
+      m_hrdParams.setDuBitRateValueMinus1( i, j, 0, ( duBitRateValue - 1 ) );
+      m_hrdParams.setCbrFlag( i, j, 0, false );
+
+      m_hrdParams.setBitRateValueMinus1( i, j, 1, ( bitrateValue - 1) );
+      m_hrdParams.setCpbSizeValueMinus1( i, j, 1, ( cpbSizeValue - 1 ) );
+      m_hrdParams.setDuCpbSizeValueMinus1( i, j, 1, ( duCpbSizeValue - 1 ) );
+      m_hrdParams.setDuBitRateValueMinus1( i, j, 1, ( duBitRateValue - 1 ) );
+      m_hrdParams.setCbrFlag( i, j, 1, false );
+    }
+  }
+}
+
diff --git a/source/Lib/EncoderLib/EncHRD.h b/source/Lib/EncoderLib/EncHRD.h
new file mode 100644
index 0000000000000000000000000000000000000000..1aa7c695f74ec6e08318ca22d23a44d044138fee
--- /dev/null
+++ b/source/Lib/EncoderLib/EncHRD.h
@@ -0,0 +1,55 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef __ENCHRD__
+#define __ENCHRD__
+
+#include "CommonLib/Common.h"
+#include "CommonLib/HRD.h"
+#include "CommonLib/Slice.h"
+#include "EncCfg.h"
+
+class EncHRD:public HRD
+{
+public:
+  void initHRDParameters (EncCfg* encCfg);
+
+protected:
+  // calculate scale value of bitrate and initial delay
+  int xCalcScale(int x);
+
+};
+
+
+#endif // __ENCHRD__
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index dcc383dc2ce7716b14edc62e0450bbb045d32ef4..4df4089a5c102e6b8800eb2e689c8f9caab4d438 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,6 @@
 /** \file     EncLib.cpp
     \brief    encoder class
 */
-
 #include "EncLib.h"
 
 #include "EncModeCtrl.h"
@@ -47,6 +46,7 @@
 #if ENABLE_SPLIT_PARALLELISM
 #include <omp.h>
 #endif
+#include "EncLibCommon.h"
 
 //! \ingroup EncoderLib
 //! \{
@@ -55,16 +55,19 @@
 // Constructor / destructor / create / destroy
 // ====================================================================================================================
 
-
-
-EncLib::EncLib()
-  : m_spsMap( MAX_NUM_SPS )
-  , m_ppsMap( MAX_NUM_PPS )
-  , m_apsMap( MAX_NUM_APS )
+EncLib::EncLib( EncLibCommon* encLibCommon )
+  : m_cListPic( encLibCommon->getPictureBuffer() )
+  , m_cEncALF( encLibCommon->getApsIdStart() )
+  , m_spsMap( encLibCommon->getSpsMap() )
+  , m_ppsMap( encLibCommon->getPpsMap() )
+  , m_apsMap( encLibCommon->getApsMap() )
   , m_AUWriterIf( nullptr )
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   , m_cacheModel()
 #endif
+  , m_lmcsAPS(nullptr)
+  , m_scalinglistAPS( nullptr )
+  , m_doPlt( true )
 {
   m_iPOCLast          = -1;
   m_iNumPicRcvd       =  0;
@@ -75,30 +78,33 @@ EncLib::EncLib()
 #if ENABLE_SIMD_OPT_BUFFER
   g_pelBufOP.initPelBufOpsX86();
 #endif
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  m_metricTime = std::chrono::milliseconds(0);
+#endif
+
+  memset(m_apss, 0, sizeof(m_apss));
+
+  m_layerId = NOT_VALID;
+  m_picIdInGOP = NOT_VALID;
 }
 
 EncLib::~EncLib()
 {
 }
 
-void EncLib::create ()
+void EncLib::create( const int layerId )
 {
-  // initialize global variables
-  initROM();
-  TComHash::initBlockSizeToIndex();
+  m_layerId = layerId;
   m_iPOCLast = m_compositeRefEnabled ? -2 : -1;
   // create processing unit classes
   m_cGOPEncoder.        create( );
-  m_cSliceEncoder.      create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth );
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 #if ENABLE_SPLIT_PARALLELISM
   m_numCuEncStacks  = m_numSplitThreads == 1 ? 1 : NUM_RESERVERD_SPLIT_JOBS;
 #else
   m_numCuEncStacks  = 1;
 #endif
-#if ENABLE_WPP_PARALLELISM
-  m_numCuEncStacks *= ( m_numWppThreads + m_numWppExtraLines );
-#endif
 
   m_cCuEncoder      = new EncCu              [m_numCuEncStacks];
   m_cInterSearch    = new InterSearch        [m_numCuEncStacks];
@@ -118,32 +124,15 @@ void EncLib::create ()
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   m_cInterSearch.cacheAssign( &m_cacheModel );
 #endif
-  const uint32_t widthInCtus   = (getSourceWidth()  + m_maxCUWidth  - 1)  / m_maxCUWidth;
-  const uint32_t heightInCtus  = (getSourceHeight() + m_maxCUHeight - 1) / m_maxCUHeight;
-  const uint32_t numCtuInFrame = widthInCtus * heightInCtus;
-
-  if (m_bUseSAO)
-  {
-    m_cEncSAO.create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth, m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA], m_log2SaoOffsetScale[CHANNEL_TYPE_CHROMA] );
-    m_cEncSAO.createEncData(getSaoCtuBoundary(), numCtuInFrame);
-  }
 
   m_cLoopFilter.create( m_maxTotalCUDepth );
-  if ( !m_bLoopFilterDisable )
-  {
-    m_cLoopFilter.initEncPicYuvBuffer( m_chromaFormatIDC, getSourceWidth(), getSourceHeight() );
-  }
-  if( m_alf )
-  {
-    m_cEncALF.create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth, m_bitDepth, m_inputBitDepth );
-  }
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   m_cReshaper = new EncReshape[m_numCuEncStacks];
 #endif
-  if (m_lumaReshapeEnable)
+  if (m_lmcsEnabled)
   {
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     for (int jId = 0; jId < m_numCuEncStacks; jId++)
     {
       m_cReshaper[jId].createEnc(getSourceWidth(), getSourceHeight(), m_maxCUWidth, m_maxCUHeight, m_bitDepth[COMPONENT_Y]);
@@ -165,7 +154,7 @@ void EncLib::destroy ()
   // destroy processing unit classes
   m_cGOPEncoder.        destroy();
   m_cSliceEncoder.      destroy();
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   for( int jId = 0; jId < m_numCuEncStacks; jId++ )
   {
     m_cCuEncoder[jId].destroy();
@@ -181,7 +170,7 @@ void EncLib::destroy ()
   m_cEncSAO.            destroy();
   m_cLoopFilter.        destroy();
   m_cRateCtrl.          destroy();
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   for (int jId = 0; jId < m_numCuEncStacks; jId++)
   {
     m_cReshaper[jId].   destroy();
@@ -189,7 +178,7 @@ void EncLib::destroy ()
 #else
   m_cReshaper.          destroy();
 #endif
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   for( int jId = 0; jId < m_numCuEncStacks; jId++ )
   {
     m_cInterSearch[jId].   destroy();
@@ -200,7 +189,7 @@ void EncLib::destroy ()
   m_cIntraSearch.       destroy();
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   delete[] m_cCuEncoder;
   delete[] m_cInterSearch;
   delete[] m_cIntraSearch;
@@ -210,11 +199,6 @@ void EncLib::destroy ()
   delete[] m_CtxCache;
 #endif
 
-
-
-
-  // destroy ROM
-  destroyROM();
   return;
 }
 
@@ -223,14 +207,18 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
   m_AUWriterIf = auWriterIf;
 
   SPS &sps0=*(m_spsMap.allocatePS(0)); // NOTE: implementations that use more than 1 SPS need to be aware of activation issues.
-  PPS &pps0=*(m_ppsMap.allocatePS(0));
-  APS &aps0=*(m_apsMap.allocatePS(0));
+  PPS &pps0 = *( m_ppsMap.allocatePS( m_layerId ) );
+  APS &aps0 = *( m_apsMap.allocatePS( SCALING_LIST_APS ) );
+  aps0.setAPSId( 0 );
+  aps0.setAPSType( SCALING_LIST_APS );
 
   // initialize SPS
-  xInitSPS(sps0);
-#if HEVC_VPS
+  xInitSPS( sps0, m_cVPS );
   xInitVPS(m_cVPS, sps0);
-#endif
+
+  int dpsId = getDecodingParameterSetEnabled() ? 1 : 0;
+  xInitDPS(m_dps, sps0, dpsId);
+  sps0.setDecodingParameterSetId(m_dps.getDecodingParameterSetId());
 
 #if ENABLE_SPLIT_PARALLELISM
   if( omp_get_dynamic() )
@@ -240,7 +228,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
   omp_set_nested( true );
 #endif
 
-  if (getUseCompositeRef())
+  if (getUseCompositeRef() || getDependentRAPIndicationSEIEnabled())
   {
     sps0.setLongTermRefsPresent(true);
   }
@@ -248,10 +236,10 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 #if U0132_TARGET_BITS_SATURATION
   if (m_RCCpbSaturationEnabled)
   {
-    m_cRateCtrl.initHrdParam(sps0.getVuiParameters()->getHrdParameters(), m_iFrameRate, m_RCInitialCpbFullness);
+    m_cRateCtrl.initHrdParam(sps0.getHrdParameters(), m_iFrameRate, m_RCInitialCpbFullness);
   }
 #endif
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   for( int jId = 0; jId < m_numCuEncStacks; jId++ )
   {
     m_cRdCost[jId].setCostMode ( m_costMode );
@@ -261,10 +249,50 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 #endif
 
   // initialize PPS
+  pps0.setPicWidthInLumaSamples( m_iSourceWidth );
+  pps0.setPicHeightInLumaSamples( m_iSourceHeight );
+  pps0.setConformanceWindow( m_conformanceWindow );
   xInitPPS(pps0, sps0);
   // initialize APS
-  xInitAPS(aps0);
-  xInitRPS(sps0, isFieldCoding);
+  xInitRPL(sps0, isFieldCoding);
+
+  if( m_rprEnabled )
+  {
+    PPS &pps = *( m_ppsMap.allocatePS( ENC_PPS_ID_RPR ) );
+    Window& inputScalingWindow = pps0.getScalingWindow();
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    int scaledWidth = int( ( pps0.getPicWidthInLumaSamples() - SPS::getWinUnitX( sps0.getChromaFormatIdc() ) * ( inputScalingWindow.getWindowLeftOffset() + inputScalingWindow.getWindowRightOffset() ) ) / m_scalingRatioHor );
+#else
+    int scaledWidth = int( ( pps0.getPicWidthInLumaSamples() - inputScalingWindow.getWindowLeftOffset() - inputScalingWindow.getWindowRightOffset() ) / m_scalingRatioHor );
+#endif
+    int minSizeUnit = std::max(8, (int)(sps0.getMaxCUHeight() >> (sps0.getMaxCodingDepth() - 1)));
+    int temp = scaledWidth / minSizeUnit;
+    int width = ( scaledWidth - ( temp * minSizeUnit) > 0 ? temp + 1 : temp ) * minSizeUnit;
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+    int scaledHeight = int( ( pps0.getPicHeightInLumaSamples() - SPS::getWinUnitY( sps0.getChromaFormatIdc() ) * ( inputScalingWindow.getWindowTopOffset() + inputScalingWindow.getWindowBottomOffset() ) ) / m_scalingRatioVer );
+#else
+    int scaledHeight = int( ( pps0.getPicHeightInLumaSamples() - inputScalingWindow.getWindowTopOffset() - inputScalingWindow.getWindowBottomOffset() ) / m_scalingRatioVer );
+#endif
+    temp = scaledHeight / minSizeUnit;
+    int height = ( scaledHeight - ( temp * minSizeUnit) > 0 ? temp + 1 : temp ) * minSizeUnit;
+
+    pps.setPicWidthInLumaSamples( width );
+    pps.setPicHeightInLumaSamples( height );
+
+    Window conformanceWindow;
+    conformanceWindow.setWindow( 0, ( width - scaledWidth ) / SPS::getWinUnitX( sps0.getChromaFormatIdc() ), 0, ( height - scaledHeight ) / SPS::getWinUnitY( sps0.getChromaFormatIdc() ) );
+    pps.setConformanceWindow( conformanceWindow );
+
+    Window scalingWindow;
+    scalingWindow.setWindow( 0, width - scaledWidth, 0, height - scaledHeight );
+    pps.setScalingWindow( scalingWindow );
+
+    // disable picture partitioning for scaled RPR pictures (slice/tile config only provided for the original resolution)
+    m_noPicPartitionFlag = true;
+
+    xInitPPS( pps, sps0 ); // will allocate memory for and initialize pps.pcv inside
+  }
 
 #if ER_CHROMA_QP_WCG_PPS
   if (m_wcgChromaQpControl.isEnabled())
@@ -279,11 +307,12 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     xInitPPS(pps2, sps0);
     xInitPPSforLT(pps2);
   }
+  xInitPicHeader(m_picHeader, sps0, pps0);
 
   // initialize processing unit classes
   m_cGOPEncoder.  init( this );
   m_cSliceEncoder.init( this, sps0 );
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   for( int jId = 0; jId < m_numCuEncStacks; jId++ )
   {
     // precache a few objects
@@ -297,19 +326,14 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 
     // initialize transform & quantization class
     m_cTrQuant[jId].init( jId == 0 ? nullptr : m_cTrQuant[0].getQuant(),
-#if MAX_TB_SIZE_SIGNALLING
                           1 << m_log2MaxTbSize,
 
-#else
-                          MAX_TB_SIZEY,
-#endif
                           m_useRDOQ,
                           m_useRDOQTS,
 #if T0196_SELECTIVE_RDOQ
                           m_useSelectiveRDOQ,
 #endif
-                          true,
-                          m_useTransformSkipFast
+                          true
     );
 
     // initialize encoder search class
@@ -320,6 +344,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
                               cabacEstimator,
                               getCtxCache( jId ), m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth
                             , &m_cReshaper[jId]
+                            , sps0.getBitDepth(CHANNEL_TYPE_LUMA)
     );
     m_cInterSearch[jId].init( this,
                               &m_cTrQuant[jId],
@@ -339,18 +364,13 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 
   // initialize transform & quantization class
   m_cTrQuant.init( nullptr,
-#if MAX_TB_SIZE_SIGNALLING
                    1 << m_log2MaxTbSize,
-#else
-                   MAX_TB_SIZEY,
-#endif
                    m_useRDOQ,
                    m_useRDOQTS,
 #if T0196_SELECTIVE_RDOQ
                    m_useSelectiveRDOQ,
 #endif
-                   true,
-                   m_useTransformSkipFast
+                   true
   );
 
   // initialize encoder search class
@@ -361,6 +381,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
                        cabacEstimator,
                        getCtxCache(), m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth
                      , &m_cReshaper
+                     , sps0.getBitDepth(CHANNEL_TYPE_LUMA)
   );
   m_cInterSearch.init( this,
                        &m_cTrQuant,
@@ -378,47 +399,45 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 
   m_iMaxRefPicNum = 0;
 
-#if HEVC_USE_SCALING_LISTS
 #if ER_CHROMA_QP_WCG_PPS
   if( m_wcgChromaQpControl.isEnabled() )
   {
-    xInitScalingLists( sps0, *m_ppsMap.getPS(1) );
-    xInitScalingLists( sps0, pps0 );
+    xInitScalingLists( sps0, *m_apsMap.getPS( 1 ) );
+    xInitScalingLists( sps0, aps0 );
   }
   else
 #endif
   {
-    xInitScalingLists( sps0, pps0 );
+    xInitScalingLists( sps0, aps0 );
+  }
+  if( m_rprEnabled )
+  {
+    xInitScalingLists( sps0, *m_apsMap.getPS( ENC_PPS_ID_RPR ) );
   }
-#endif
-#if ENABLE_WPP_PARALLELISM
-  m_entropyCodingSyncContextStateVec.resize( pps0.pcv->heightInCtus );
-#endif
   if (getUseCompositeRef())
   {
     Picture *picBg = new Picture;
-    picBg->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false);
+    picBg->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId );
     picBg->getRecoBuf().fill(0);
-    picBg->finalInit(sps0, pps0, aps0);
+    picBg->finalInit( &m_cVPS, sps0, pps0, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
     picBg->allocateNewSlice();
     picBg->createSpliceIdx(pps0.pcv->sizeInCtus);
     m_cGOPEncoder.setPicBg(picBg);
     Picture *picOrig = new Picture;
-    picOrig->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false);
+    picOrig->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId );
     picOrig->getOrigBuf().fill(0);
     m_cGOPEncoder.setPicOrig(picOrig);
   }
 }
 
-#if HEVC_USE_SCALING_LISTS
-void EncLib::xInitScalingLists(SPS &sps, PPS &pps)
+void EncLib::xInitScalingLists( SPS &sps, APS &aps )
 {
   // Initialise scaling lists
   // The encoder will only use the SPS scaling lists. The PPS will never be marked present.
   const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE] =
   {
-      sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_LUMA),
-      sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_CHROMA)
+    sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_LUMA),
+    sps.getMaxLog2TrDynamicRange(CHANNEL_TYPE_CHROMA)
   };
 
   Quant* quant = getTrQuant()->getQuant();
@@ -427,71 +446,60 @@ void EncLib::xInitScalingLists(SPS &sps, PPS &pps)
   {
     quant->setFlatScalingList(maxLog2TrDynamicRange, sps.getBitDepths());
     quant->setUseScalingList(false);
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     for( int jId = 1; jId < m_numCuEncStacks; jId++ )
     {
       getTrQuant( jId )->getQuant()->setFlatScalingList( maxLog2TrDynamicRange, sps.getBitDepths() );
       getTrQuant( jId )->getQuant()->setUseScalingList( false );
     }
 #endif
-    sps.setScalingListPresentFlag(false);
-    pps.setScalingListPresentFlag(false);
   }
   else if(getUseScalingListId() == SCALING_LIST_DEFAULT)
   {
-    sps.getScalingList().setDefaultScalingList ();
-    sps.setScalingListPresentFlag(false);
-    pps.setScalingListPresentFlag(false);
-
-    quant->setScalingList(&(sps.getScalingList()), maxLog2TrDynamicRange, sps.getBitDepths());
+    aps.getScalingList().setDefaultScalingList ();
+    quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() );
     quant->setUseScalingList(true);
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     for( int jId = 1; jId < m_numCuEncStacks; jId++ )
     {
       getTrQuant( jId )->getQuant()->setUseScalingList( true );
     }
+    aps.getScalingList().setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks());
 #endif
   }
   else if(getUseScalingListId() == SCALING_LIST_FILE_READ)
   {
-    sps.getScalingList().setDefaultScalingList ();
-    if(sps.getScalingList().xParseScalingList(getScalingListFileName()))
+    aps.getScalingList().setDefaultScalingList();
+    CHECK( aps.getScalingList().xParseScalingList( getScalingListFileName() ), "Error Parsing Scaling List Input File" );
+    aps.getScalingList().checkDcOfMatrix();
+    if( aps.getScalingList().isNotDefaultScalingList() == false )
     {
-      THROW( "parse scaling list");
+      setUseScalingListId( SCALING_LIST_DEFAULT );
     }
-    sps.getScalingList().checkDcOfMatrix();
-    sps.setScalingListPresentFlag(sps.getScalingList().checkDefaultScalingList());
-    pps.setScalingListPresentFlag(false);
-
-    quant->setScalingList(&(sps.getScalingList()), maxLog2TrDynamicRange, sps.getBitDepths());
+    quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() );
     quant->setUseScalingList(true);
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
     for( int jId = 1; jId < m_numCuEncStacks; jId++ )
     {
       getTrQuant( jId )->getQuant()->setUseScalingList( true );
     }
 #endif
+    aps.getScalingList().setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks());
   }
   else
   {
     THROW("error : ScalingList == " << getUseScalingListId() << " not supported\n");
   }
 
-  if (getUseScalingListId() != SCALING_LIST_OFF)
+  if( getUseScalingListId() == SCALING_LIST_FILE_READ )
   {
     // Prepare delta's:
-    for(uint32_t sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
+    for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++)
     {
-      const int predListStep = (sizeId == SCALING_LIST_32x32? (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) : 1); // if 32x32, skip over chroma entries.
-
-      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId+=predListStep)
-      {
-        sps.getScalingList().checkPredMode( sizeId, listId );
-      }
+        aps.getScalingList().checkPredMode(scalingListId);
     }
   }
 }
-#endif
 
 void EncLib::xInitPPSforLT(PPS& pps)
 {
@@ -524,48 +532,39 @@ void EncLib::deletePicBuffer()
     delete pcPic;
     pcPic = NULL;
   }
+
+  m_cListPic.clear();
 }
 
-/**
- - Application has picture buffer list with size of GOP + 1
- - Picture buffer list acts like as ring buffer
- - End of the list has the latest picture
- .
- \param   flush               cause encoder to encode a partial GOP
- \param   pcPicYuvOrg         original YUV picture
- \param   pcPicYuvTrueOrg
- \param   snrCSC
- \retval  rcListPicYuvRecOut  list of reconstruction YUV pictures
- \retval  accessUnitsOut      list of output access units
- \retval  iNumEncoded         number of encoded pictures
- */
-void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
-                     int& iNumEncoded )
+bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded )
 {
-  if (m_compositeRefEnabled && m_cGOPEncoder.getPicBg()->getSpliceFull() && m_iPOCLast >= 10 && m_iNumPicRcvd == 0 && m_cGOPEncoder.getEncodedLTRef() == false)
+  if( m_compositeRefEnabled && m_cGOPEncoder.getPicBg()->getSpliceFull() && m_iPOCLast >= 10 && m_iNumPicRcvd == 0 && m_cGOPEncoder.getEncodedLTRef() == false )
   {
     Picture* picCurr = NULL;
-    xGetNewPicBuffer(rcListPicYuvRecOut, picCurr, 2);
-    const PPS *pps = m_ppsMap.getPS(2);
-    const SPS *sps = m_spsMap.getPS(pps->getSPSId());
+    xGetNewPicBuffer( rcListPicYuvRecOut, picCurr, 2 );
+    const PPS *pps = m_ppsMap.getPS( 2 );
+    const SPS *sps = m_spsMap.getPS( pps->getSPSId() );
 
-    picCurr->M_BUFS(0, PIC_ORIGINAL).copyFrom(m_cGOPEncoder.getPicBg()->getRecoBuf());
-    APS *aps = m_apsMap.getPS(0);
-    picCurr->finalInit(*sps, *pps, *aps);
+    picCurr->M_BUFS( 0, PIC_ORIGINAL ).copyFrom( m_cGOPEncoder.getPicBg()->getRecoBuf() );
+    picCurr->finalInit( &m_cVPS, *sps, *pps, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
     picCurr->poc = m_iPOCLast - 1;
     m_iPOCLast -= 2;
-    if (getUseAdaptiveQP())
+    if( getUseAdaptiveQP() )
     {
-      AQpPreanalyzer::preanalyze(picCurr);
+      AQpPreanalyzer::preanalyze( picCurr );
     }
-    if (m_RCEnableRateControl)
+    if( m_RCEnableRateControl )
     {
-      m_cRateCtrl.initRCGOP(m_iNumPicRcvd);
+      m_cRateCtrl.initRCGOP( m_iNumPicRcvd );
     }
-    m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
-      false, false, snrCSC, m_printFrameMSE, true);
-    m_cGOPEncoder.setEncodedLTRef(true);
-    if (m_RCEnableRateControl)
+
+    m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, false, false, snrCSC, m_printFrameMSE, true, 0 );
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    m_metricTime = m_cGOPEncoder.getMetricTime();
+#endif
+    m_cGOPEncoder.setEncodedLTRef( true );
+    if( m_RCEnableRateControl )
     {
       m_cRateCtrl.destroyRCGOP();
     }
@@ -573,71 +572,160 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTru
     iNumEncoded = 0;
     m_iNumPicRcvd = 0;
   }
+
   //PROF_ACCUM_AND_START_NEW_SET( getProfilerPic(), P_GOP_LEVEL );
-  if (pcPicYuvOrg != NULL)
+  if( pcPicYuvOrg != NULL )
   {
     // get original YUV
     Picture* pcPicCurr = NULL;
 
+    int ppsID = -1; // Use default PPS ID
 #if ER_CHROMA_QP_WCG_PPS
-    int ppsID=-1; // Use default PPS ID
-    if (getWCGChromaQPControl().isEnabled())
+    if( getWCGChromaQPControl().isEnabled() )
     {
-      ppsID = getdQPs()[m_iPOCLast / (m_compositeRefEnabled ? 2 : 1) + 1];
-      ppsID+=(getSwitchPOC() != -1 && (m_iPOCLast+1 >= getSwitchPOC())?1:0);
+      ppsID = getdQPs()[m_iPOCLast / ( m_compositeRefEnabled ? 2 : 1 ) + 1];
+      ppsID += ( getSwitchPOC() != -1 && ( m_iPOCLast + 1 >= getSwitchPOC() ) ? 1 : 0 );
     }
-    xGetNewPicBuffer( rcListPicYuvRecOut,
-                      pcPicCurr, ppsID );
-#else
-    xGetNewPicBuffer( rcListPicYuvRecOut,
-                      pcPicCurr, -1 ); // Uses default PPS ID. However, could be modified, for example, to use a PPS ID as a function of POC (m_iPOCLast+1)
 #endif
 
+    if( m_rprEnabled && m_uiIntraPeriod == -1 )
     {
-      const PPS *pPPS=(ppsID<0) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS(ppsID);
-      const SPS *pSPS=m_spsMap.getPS(pPPS->getSPSId());
+      const int poc = m_iPOCLast + ( m_compositeRefEnabled ? 2 : 1 );
 
-      pcPicCurr->M_BUFS( 0, PIC_ORIGINAL ).swap( *pcPicYuvOrg );
-      pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL ).swap(*cPicYuvTrueOrg );
+      if( poc / m_switchPocPeriod % 2 )
+      {
+        ppsID = ENC_PPS_ID_RPR;
+      }
+      else
+      {
+        ppsID = 0;
+      }
+    }
+
+    if( m_cVPS.getMaxLayers() > 1 )
+    {
+      ppsID = m_layerId;
+    }
+
+    xGetNewPicBuffer( rcListPicYuvRecOut, pcPicCurr, ppsID );
+
+    const PPS *pPPS = ( ppsID < 0 ) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS( ppsID );
+    const SPS *pSPS = m_spsMap.getPS( pPPS->getSPSId() );
+
+    if( m_rprEnabled )
+    {
+      pcPicCurr->M_BUFS( 0, PIC_ORIGINAL_INPUT ).getBuf( COMPONENT_Y ).copyFrom( pcPicYuvOrg->getBuf( COMPONENT_Y ) );
+      pcPicCurr->M_BUFS( 0, PIC_ORIGINAL_INPUT ).getBuf( COMPONENT_Cb ).copyFrom( pcPicYuvOrg->getBuf( COMPONENT_Cb ) );
+      pcPicCurr->M_BUFS( 0, PIC_ORIGINAL_INPUT ).getBuf( COMPONENT_Cr ).copyFrom( pcPicYuvOrg->getBuf( COMPONENT_Cr ) );
+
+      pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Y ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Y ) );
+      pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Cb ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Cb ) );
+      pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).getBuf( COMPONENT_Cr ).copyFrom( cPicYuvTrueOrg->getBuf( COMPONENT_Cr ) );
+
+      const ChromaFormat chromaFormatIDC = pSPS->getChromaFormatIdc();
+
+      const PPS *refPPS = m_ppsMap.getPS( 0 );
+      const Window& curScalingWindow = pPPS->getScalingWindow();
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+      int curPicWidth = pPPS->getPicWidthInLumaSamples()   - SPS::getWinUnitX( pSPS->getChromaFormatIdc() ) * ( curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset() );
+      int curPicHeight = pPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( pSPS->getChromaFormatIdc() ) * ( curScalingWindow.getWindowTopOffset()  + curScalingWindow.getWindowBottomOffset() );
+#else
+      int curPicWidth = pPPS->getPicWidthInLumaSamples() - curScalingWindow.getWindowLeftOffset() - curScalingWindow.getWindowRightOffset();
+      int curPicHeight = pPPS->getPicHeightInLumaSamples() - curScalingWindow.getWindowTopOffset() - curScalingWindow.getWindowBottomOffset();
+#endif
+
+      const Window& refScalingWindow = refPPS->getScalingWindow();
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+      int refPicWidth = refPPS->getPicWidthInLumaSamples()   - SPS::getWinUnitX( pSPS->getChromaFormatIdc() ) * ( refScalingWindow.getWindowLeftOffset() + refScalingWindow.getWindowRightOffset() );
+      int refPicHeight = refPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( pSPS->getChromaFormatIdc() ) * ( refScalingWindow.getWindowTopOffset()  + refScalingWindow.getWindowBottomOffset() );
+#else
+      int refPicWidth = refPPS->getPicWidthInLumaSamples() - refScalingWindow.getWindowLeftOffset() - refScalingWindow.getWindowRightOffset();
+      int refPicHeight = refPPS->getPicHeightInLumaSamples() - refScalingWindow.getWindowTopOffset() - refScalingWindow.getWindowBottomOffset();
+#endif
 
-      APS *pAPS = m_apsMap.getPS(0);
-      pcPicCurr->finalInit(*pSPS, *pPPS, *pAPS);
+      int xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth;
+      int yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight;
+      std::pair<int, int> scalingRatio = std::pair<int, int>( xScale, yScale );
+
+      Picture::rescalePicture( scalingRatio, *pcPicYuvOrg, refPPS->getScalingWindow(), pcPicCurr->getOrigBuf(), pPPS->getScalingWindow(), chromaFormatIDC, pSPS->getBitDepths(), true, true,
+        pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() );
+      Picture::rescalePicture( scalingRatio, *cPicYuvTrueOrg, refPPS->getScalingWindow(), pcPicCurr->getTrueOrigBuf(), pPPS->getScalingWindow(), chromaFormatIDC, pSPS->getBitDepths(), true, true,
+        pSPS->getHorCollocatedChromaFlag(), pSPS->getVerCollocatedChromaFlag() );
+    }
+    else
+    {
+      pcPicCurr->M_BUFS( 0, PIC_ORIGINAL ).swap( *pcPicYuvOrg );
+      pcPicCurr->M_BUFS( 0, PIC_TRUE_ORIGINAL ).swap( *cPicYuvTrueOrg );
     }
+    pcPicCurr->finalInit( &m_cVPS, *pSPS, *pPPS, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
 
     pcPicCurr->poc = m_iPOCLast;
 
     // compute image characteristics
-    if ( getUseAdaptiveQP() )
+    if( getUseAdaptiveQP() )
     {
       AQpPreanalyzer::preanalyze( pcPicCurr );
     }
   }
 
-  if ((m_iNumPicRcvd == 0) || (!flush && (m_iPOCLast != 0) && (m_iNumPicRcvd != m_iGOPSize) && (m_iGOPSize != 0)))
+  if( ( m_iNumPicRcvd == 0 ) || ( !flush && ( m_iPOCLast != 0 ) && ( m_iNumPicRcvd != m_iGOPSize ) && ( m_iGOPSize != 0 ) ) )
   {
     iNumEncoded = 0;
-    return;
+    return true;
   }
 
-  if ( m_RCEnableRateControl )
+  if( m_RCEnableRateControl )
   {
     m_cRateCtrl.initRCGOP( m_iNumPicRcvd );
   }
 
+  m_picIdInGOP = 0;
+
+  return false;
+}
+
+/**
+ - Application has picture buffer list with size of GOP + 1
+ - Picture buffer list acts like as ring buffer
+ - End of the list has the latest picture
+ .
+ \param   flush               cause encoder to encode a partial GOP
+ \param   pcPicYuvOrg         original YUV picture
+ \param   pcPicYuvTrueOrg
+ \param   snrCSC
+ \retval  rcListPicYuvRecOut  list of reconstruction YUV pictures
+ \retval  accessUnitsOut      list of output access units
+ \retval  iNumEncoded         number of encoded pictures
+ */
+
+bool EncLib::encode( const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded )
+{ 
   // compress GOP
-  m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
-                            false, false, snrCSC, m_printFrameMSE
-    , false
-  );
+  m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
+    false, false, snrCSC, m_printFrameMSE, false, m_picIdInGOP );
 
-  if ( m_RCEnableRateControl )
+  m_picIdInGOP++;
+
+  // go over all pictures in a GOP excluding the first IRAP
+  if( m_picIdInGOP != m_iGOPSize && m_iPOCLast )
+  {
+    return true;
+  }
+
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  m_metricTime = m_cGOPEncoder.getMetricTime();
+#endif
+
+  if( m_RCEnableRateControl )
   {
     m_cRateCtrl.destroyRCGOP();
   }
 
-  iNumEncoded         = m_iNumPicRcvd;
-  m_iNumPicRcvd       = 0;
+  iNumEncoded = m_iNumPicRcvd;
+  m_iNumPicRcvd = 0;
   m_uiNumAllPicCoded += iNumEncoded;
+
+  return false;
 }
 
 /**------------------------------------------------
@@ -662,72 +750,106 @@ void separateFields(Pel* org, Pel* dstField, uint32_t stride, uint32_t width, ui
 
 }
 
-void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
-                     int& iNumEncoded, bool isTff )
+bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
+  int& iNumEncoded, bool isTff )
 {
   iNumEncoded = 0;
+  bool keepDoing = true;
 
-  for (int fieldNum=0; fieldNum<2; fieldNum++)
+  for( int fieldNum = 0; fieldNum < 2; fieldNum++ )
   {
-    if (pcPicYuvOrg)
+    if( pcPicYuvOrg )
     {
       /* -- field initialization -- */
-      const bool isTopField=isTff==(fieldNum==0);
+      const bool isTopField = isTff == ( fieldNum == 0 );
 
       Picture *pcField;
       xGetNewPicBuffer( rcListPicYuvRecOut, pcField, -1 );
 
-      for (uint32_t comp = 0; comp < ::getNumberValidComponents(pcPicYuvOrg->chromaFormat); comp++)
+      for( uint32_t comp = 0; comp < ::getNumberValidComponents( pcPicYuvOrg->chromaFormat ); comp++ )
       {
-        const ComponentID compID = ComponentID(comp);
+        const ComponentID compID = ComponentID( comp );
         {
           PelBuf compBuf = pcPicYuvOrg->get( compID );
           separateFields( compBuf.buf,
-                         pcField->getOrigBuf().get(compID).buf,
-                         compBuf.stride,
-                         compBuf.width,
-                         compBuf.height,
-                         isTopField);
+            pcField->getOrigBuf().get( compID ).buf,
+            compBuf.stride,
+            compBuf.width,
+            compBuf.height,
+            isTopField );
+          // to get fields of true original buffer to avoid wrong PSNR calculation in summary
+          compBuf = pcPicYuvTrueOrg->get( compID );
+          separateFields( compBuf.buf,
+            pcField->getTrueOrigBuf().get(compID).buf,
+            compBuf.stride,
+            compBuf.width,
+            compBuf.height,
+            isTopField);
         }
       }
 
       {
-        int ppsID=-1; // Use default PPS ID
-        const PPS *pPPS=(ppsID<0) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS(ppsID);
-        const SPS *pSPS=m_spsMap.getPS(pPPS->getSPSId());
-
-        APS *pAPS = m_apsMap.getPS(0);
-        pcField->finalInit(*pSPS, *pPPS, *pAPS);
+        int ppsID = -1; // Use default PPS ID
+        const PPS *pPPS = ( ppsID < 0 ) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS( ppsID );
+        const SPS *pSPS = m_spsMap.getPS( pPPS->getSPSId() );
+        pcField->finalInit( &m_cVPS, *pSPS, *pPPS, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
       }
 
       pcField->poc = m_iPOCLast;
       pcField->reconstructed = false;
 
-      pcField->setBorderExtension(false);// where is this normally?
+      pcField->setBorderExtension( false );// where is this normally?
 
       pcField->topField = isTopField;                  // interlaced requirement
 
       // compute image characteristics
-      if ( getUseAdaptiveQP() )
+      if( getUseAdaptiveQP() )
       {
         AQpPreanalyzer::preanalyze( pcField );
       }
     }
 
-    if ( m_iNumPicRcvd && ((flush&&fieldNum==1) || (m_iPOCLast/2)==0 || m_iNumPicRcvd==m_iGOPSize ) )
-    {
-      // compress GOP
-      m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE
-                              , false
-      );
-
-      iNumEncoded += m_iNumPicRcvd;
-      m_uiNumAllPicCoded += m_iNumPicRcvd;
-      m_iNumPicRcvd = 0;
-    }
   }
+
+  if( m_iNumPicRcvd && ( flush || m_iPOCLast == 1 || m_iNumPicRcvd == m_iGOPSize ) )
+  {
+    m_picIdInGOP = 0;
+    m_iPOCLast -= 2;
+    keepDoing = false;
+  }
+
+  return keepDoing;
 }
 
+bool EncLib::encode( const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded, bool isTff )
+{
+  iNumEncoded = 0;
+
+  for( int fieldNum = 0; fieldNum < 2; fieldNum++ )
+  {
+    m_iPOCLast = ( m_iNumPicRcvd == m_iGOPSize ) ? m_uiNumAllPicCoded + m_iNumPicRcvd - 1 : m_iPOCLast + 1;
+
+    // compress GOP
+    m_cGOPEncoder.compressGOP( m_iPOCLast, m_iPOCLast < 2 ? m_iPOCLast + 1 : m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE, false, m_picIdInGOP );
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    m_metricTime = m_cGOPEncoder.getMetricTime();
+#endif
+
+    m_picIdInGOP++;
+  }
+   
+  // go over all pictures in a GOP excluding first top field and first bottom field
+  if( m_picIdInGOP != m_iGOPSize && m_iPOCLast > 1 )
+  {
+    return true;
+  }
+
+  iNumEncoded += m_iNumPicRcvd;
+  m_uiNumAllPicCoded += m_iNumPicRcvd;
+  m_iNumPicRcvd = 0;
+  
+  return false;
+}
 
 // ====================================================================================================================
 // Protected member functions
@@ -742,7 +864,7 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTr
  */
 void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Picture*& rpcPic, int ppsId )
 {
-  // rotate he output buffer
+  // rotate the output buffer
   rcListPicYuvRecOut.push_back( rcListPicYuvRecOut.front() ); rcListPicYuvRecOut.pop_front();
 
   rpcPic=0;
@@ -759,23 +881,27 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict
   Slice::sortPicList(m_cListPic);
 
   // use an entry in the buffered list if the maximum number that need buffering has been reached:
-  if (m_cListPic.size() >= (uint32_t)(m_iGOPSize + getMaxDecPicBuffering(MAX_TLAYER-1) + 2) )
+  if( m_cListPic.size() >= (uint32_t)( m_iGOPSize + getMaxDecPicBuffering( MAX_TLAYER - 1 ) + 2 ) )
   {
-    PicList::iterator iterPic  = m_cListPic.begin();
+    PicList::iterator iterPic = m_cListPic.begin();
     int iSize = int( m_cListPic.size() );
-    for ( int i = 0; i < iSize; i++ )
+    for( int i = 0; i < iSize; i++ )
     {
       rpcPic = *iterPic;
-      if( ! rpcPic->referenced )
+      if( !rpcPic->referenced && rpcPic->layerId == m_layerId )
       {
         break;
       }
+      else
+      {
+        rpcPic = nullptr;
+      }
       iterPic++;
     }
 
     // If PPS ID is the same, we will assume that it has not changed since it was last used
     // and return the old object.
-    if (pps.getPPSId() != rpcPic->cs->pps->getPPSId())
+    if( rpcPic && pps.getPPSId() != rpcPic->cs->pps->getPPSId() )
     {
       // the IDs differ - free up an entry in the list, and then create a new one, as with the case where the max buffering state has not been reached.
       rpcPic->destroy();
@@ -788,15 +914,19 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict
   if (rpcPic==0)
   {
     rpcPic = new Picture;
-
-    rpcPic->create( sps.getChromaFormatIdc(), Size( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples()), sps.getMaxCUWidth(), sps.getMaxCUWidth()+16, false );
+    rpcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, false, m_layerId );
+    if( m_rprEnabled )
+    {
+      rpcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT ).create( sps.getChromaFormatIdc(), Area( Position(), Size( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples() ) ) );
+      rpcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).create( sps.getChromaFormatIdc(), Area( Position(), Size( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples() ) ) );
+    }
     if ( getUseAdaptiveQP() )
     {
-      const uint32_t iMaxDQPLayer = pps.getCuQpDeltaSubdiv()/2+1;
+      const uint32_t iMaxDQPLayer = m_picHeader.getCuQpDeltaSubdivIntra()/2+1;
       rpcPic->aqlayer.resize( iMaxDQPLayer );
       for (uint32_t d = 0; d < iMaxDQPLayer; d++)
       {
-        rpcPic->aqlayer[d] = new AQpLayer( sps.getPicWidthInLumaSamples(), sps.getPicHeightInLumaSamples(), sps.getMaxCUWidth()>>d, sps.getMaxCUHeight()>>d );
+        rpcPic->aqlayer[d] = new AQpLayer( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples(), sps.getMaxCUWidth() >> d, sps.getMaxCUHeight() >> d );
       }
     }
 
@@ -812,81 +942,27 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict
   m_iNumPicRcvd++;
 }
 
-
-#if HEVC_VPS
-void EncLib::xInitVPS(VPS &vps, const SPS &sps)
+void EncLib::xInitVPS(VPS& vps, const SPS& sps)
 {
   // The SPS must have already been set up.
   // set the VPS profile information.
-  *vps.getPTL() = *sps.getPTL();
-  vps.setMaxOpSets(1);
-  vps.getTimingInfo()->setTimingInfoPresentFlag       ( false );
-  vps.setNumHrdParameters( 0 );
+  vps.setMaxSubLayers(sps.getMaxTLayers());
+}
 
-  vps.createHrdParamBuffer();
-  for( uint32_t i = 0; i < vps.getNumHrdParameters(); i ++ )
-  {
-    vps.setHrdOpSetIdx( 0, i );
-    vps.setCprmsPresentFlag( false, i );
-    // Set up HrdParameters here.
-  }
+void EncLib::xInitDPS(DPS &dps, const SPS &sps, const int dpsId)
+{
+  // The SPS must have already been set up.
+  // set the DPS profile information.
+  dps.setDecodingParameterSetId(dpsId);
+  dps.setMaxSubLayersMinus1(sps.getMaxTLayers()-1);
+  std::vector<ProfileTierLevel> ptls;
+  ptls.resize(1);
+  ptls[0] = *sps.getProfileTierLevel();
+  dps.setProfileTierLevel(ptls);
 }
-#endif
 
-void EncLib::xInitSPS(SPS &sps)
+void EncLib::xInitSPS( SPS& sps, VPS& vps )
 {
-#if !JVET_M0101_HLS
-  sps.setIntraOnlyConstraintFlag(m_bIntraOnlyConstraintFlag);
-  sps.setMaxBitDepthConstraintIdc(m_maxBitDepthConstraintIdc);
-  sps.setMaxChromaFormatConstraintIdc(m_maxChromaFormatConstraintIdc);
-  sps.setFrameConstraintFlag(m_frameOnlyConstraintFlag);
-  sps.setNoQtbttDualTreeIntraConstraintFlag(m_bNoQtbttDualTreeIntraConstraintFlag);
-  sps.setNoSaoConstraintFlag(m_bNoSaoConstraintFlag);
-  sps.setNoAlfConstraintFlag(m_bNoAlfConstraintFlag);
-  sps.setNoPcmConstraintFlag(m_bNoPcmConstraintFlag);
-  sps.setNoRefWraparoundConstraintFlag(m_bNoRefWraparoundConstraintFlag);
-  sps.setNoTemporalMvpConstraintFlag(m_bNoTemporalMvpConstraintFlag);
-  sps.setNoSbtmvpConstraintFlag(m_bNoSbtmvpConstraintFlag);
-  sps.setNoAmvrConstraintFlag(m_bNoAmvrConstraintFlag);
-  sps.setNoBdofConstraintFlag(m_bNoBdofConstraintFlag);
-  sps.setNoCclmConstraintFlag(m_bNoCclmConstraintFlag);
-  sps.setNoMtsConstraintFlag(m_bNoMtsConstraintFlag);
-  sps.setNoAffineMotionConstraintFlag(m_bNoAffineMotionConstraintFlag);
-  sps.setNoGbiConstraintFlag(m_bNoGbiConstraintFlag);
-  sps.setNoMhIntraConstraintFlag(m_bNoMhIntraConstraintFlag);
-  sps.setNoTriangleConstraintFlag(m_bNoTriangleConstraintFlag);
-  sps.setNoLadfConstraintFlag(m_bNoLadfConstraintFlag);
-  sps.setNoCurrPicRefConstraintFlag(m_bNoCurrPicRefConstraintFlag);
-  sps.setNoQpDeltaConstraintFlag(m_bNoQpDeltaConstraintFlag);
-  sps.setNoDepQuantConstraintFlag(m_bNoDepQuantConstraintFlag);
-  sps.setNoSignDataHidingConstraintFlag(m_bNoSignDataHidingConstraintFlag);
-
-  ProfileTierLevel& profileTierLevel = *sps.getPTL()->getGeneralPTL();
-  profileTierLevel.setLevelIdc                    (m_level);
-  profileTierLevel.setTierFlag                    (m_levelTier);
-  profileTierLevel.setProfileIdc                  (m_profile);
-  profileTierLevel.setProfileCompatibilityFlag    (m_profile, 1);
-  profileTierLevel.setProgressiveSourceFlag       (m_progressiveSourceFlag);
-  profileTierLevel.setInterlacedSourceFlag        (m_interlacedSourceFlag);
-  profileTierLevel.setNonPackedConstraintFlag     (m_nonPackedConstraintFlag);
-  profileTierLevel.setFrameOnlyConstraintFlag     (m_frameOnlyConstraintFlag);
-  profileTierLevel.setBitDepthConstraint          (m_bitDepthConstraintValue);
-  profileTierLevel.setChromaFormatConstraint      (m_chromaFormatConstraintValue);
-  profileTierLevel.setIntraConstraintFlag         (m_intraConstraintFlag);
-  profileTierLevel.setOnePictureOnlyConstraintFlag(m_onePictureOnlyConstraintFlag);
-  profileTierLevel.setLowerBitRateConstraintFlag  (m_lowerBitRateConstraintFlag);
-
-  if ((m_profile == Profile::MAIN10) && (m_bitDepth[CHANNEL_TYPE_LUMA] == 8) && (m_bitDepth[CHANNEL_TYPE_CHROMA] == 8))
-  {
-    /* The above constraint is equal to Profile::MAIN */
-    profileTierLevel.setProfileCompatibilityFlag(Profile::MAIN, 1);
-  }
-  if (m_profile == Profile::MAIN)
-  {
-    /* A Profile::MAIN10 decoder can always decode Profile::MAIN */
-    profileTierLevel.setProfileCompatibilityFlag( Profile::MAIN10, 1 );
-  }
-#else
   ProfileTierLevel* profileTierLevel = sps.getProfileTierLevel();
   ConstraintInfo* cinfo = profileTierLevel->getConstraintInfo();
   cinfo->setProgressiveSourceFlag       (m_progressiveSourceFlag);
@@ -897,38 +973,54 @@ void EncLib::xInitSPS(SPS &sps)
   cinfo->setMaxBitDepthConstraintIdc    (m_maxBitDepthConstraintIdc);
   cinfo->setMaxChromaFormatConstraintIdc((ChromaFormat)m_maxChromaFormatConstraintIdc);
   cinfo->setNoQtbttDualTreeIntraConstraintFlag(m_bNoQtbttDualTreeIntraConstraintFlag);
+  cinfo->setNoPartitionConstraintsOverrideConstraintFlag(m_noPartitionConstraintsOverrideConstraintFlag);
   cinfo->setNoSaoConstraintFlag(m_bNoSaoConstraintFlag);
   cinfo->setNoAlfConstraintFlag(m_bNoAlfConstraintFlag);
-  cinfo->setNoPcmConstraintFlag(m_bNoPcmConstraintFlag);
   cinfo->setNoRefWraparoundConstraintFlag(m_bNoRefWraparoundConstraintFlag);
   cinfo->setNoTemporalMvpConstraintFlag(m_bNoTemporalMvpConstraintFlag);
   cinfo->setNoSbtmvpConstraintFlag(m_bNoSbtmvpConstraintFlag);
   cinfo->setNoAmvrConstraintFlag(m_bNoAmvrConstraintFlag);
   cinfo->setNoBdofConstraintFlag(m_bNoBdofConstraintFlag);
+  cinfo->setNoDmvrConstraintFlag(m_noDmvrConstraintFlag);
   cinfo->setNoCclmConstraintFlag(m_bNoCclmConstraintFlag);
   cinfo->setNoMtsConstraintFlag(m_bNoMtsConstraintFlag);
+  cinfo->setNoSbtConstraintFlag(m_noSbtConstraintFlag);
   cinfo->setNoAffineMotionConstraintFlag(m_bNoAffineMotionConstraintFlag);
-  cinfo->setNoGbiConstraintFlag(m_bNoGbiConstraintFlag);
-  cinfo->setNoMhIntraConstraintFlag(m_bNoMhIntraConstraintFlag);
+  cinfo->setNoBcwConstraintFlag(m_bNoBcwConstraintFlag);
+  cinfo->setNoIbcConstraintFlag(m_noIbcConstraintFlag);
+  cinfo->setNoCiipConstraintFlag(m_bNoCiipConstraintFlag);
+  cinfo->setNoFPelMmvdConstraintFlag(m_noFPelMmvdConstraintFlag);
   cinfo->setNoTriangleConstraintFlag(m_bNoTriangleConstraintFlag);
   cinfo->setNoLadfConstraintFlag(m_bNoLadfConstraintFlag);
-  cinfo->setNoCurrPicRefConstraintFlag(m_bNoCurrPicRefConstraintFlag);
+  cinfo->setNoTransformSkipConstraintFlag(m_noTransformSkipConstraintFlag);
+  cinfo->setNoBDPCMConstraintFlag(m_noBDPCMConstraintFlag);
+  cinfo->setNoJointCbCrConstraintFlag(m_noJointCbCrConstraintFlag);
   cinfo->setNoQpDeltaConstraintFlag(m_bNoQpDeltaConstraintFlag);
   cinfo->setNoDepQuantConstraintFlag(m_bNoDepQuantConstraintFlag);
   cinfo->setNoSignDataHidingConstraintFlag(m_bNoSignDataHidingConstraintFlag);
+  cinfo->setNoTrailConstraintFlag(m_noTrailConstraintFlag);
+  cinfo->setNoStsaConstraintFlag(m_noStsaConstraintFlag);
+  cinfo->setNoRaslConstraintFlag(m_noRaslConstraintFlag);
+  cinfo->setNoRadlConstraintFlag(m_noRadlConstraintFlag);
+  cinfo->setNoIdrConstraintFlag(m_noIdrConstraintFlag);
+  cinfo->setNoCraConstraintFlag(m_noCraConstraintFlag);
+  cinfo->setNoGdrConstraintFlag(m_noGdrConstraintFlag);
+  cinfo->setNoApsConstraintFlag(m_noApsConstraintFlag);
 
   profileTierLevel->setLevelIdc                    (m_level);
   profileTierLevel->setTierFlag                    (m_levelTier);
   profileTierLevel->setProfileIdc                  (m_profile);
-
-#endif
+  profileTierLevel->setNumSubProfile(m_numSubProfile);
+  for (int k = 0; k < m_numSubProfile; k++)
+  {
+    profileTierLevel->setSubProfileIdc(k, m_subProfile[k]);
+  }
   /* XXX: should Main be marked as compatible with still picture? */
   /* XXX: may be a good idea to refactor the above into a function
    * that chooses the actual compatibility based upon options */
-
-  sps.setPicWidthInLumaSamples  ( m_iSourceWidth      );
-  sps.setPicHeightInLumaSamples ( m_iSourceHeight     );
-  sps.setConformanceWindow      ( m_conformanceWindow );
+  sps.setVPSId(m_cVPS.getVPSId());
+  sps.setMaxPicWidthInLumaSamples( m_iSourceWidth );
+  sps.setMaxPicHeightInLumaSamples( m_iSourceHeight );
   sps.setMaxCUWidth             ( m_maxCUWidth        );
   sps.setMaxCUHeight            ( m_maxCUHeight       );
   sps.setMaxCodingDepth         ( m_maxTotalCUDepth   );
@@ -938,24 +1030,45 @@ void EncLib::xInitSPS(SPS &sps)
   sps.setCTUSize                             ( m_CTUSize );
   sps.setSplitConsOverrideEnabledFlag        ( m_useSplitConsOverride );
   sps.setMinQTSizes                          ( m_uiMinQT );
-  sps.setMaxBTDepth                          ( m_uiMaxBTDepth, m_uiMaxBTDepthI, m_uiMaxBTDepthIChroma );
+  sps.setMaxMTTHierarchyDepth                ( m_uiMaxMTTHierarchyDepth, m_uiMaxMTTHierarchyDepthI, m_uiMaxMTTHierarchyDepthIChroma );
+  unsigned maxBtSize[3], maxTtSize[3];
+  memcpy(maxBtSize, m_uiMinQT, sizeof(maxBtSize));
+  memcpy(maxTtSize, m_uiMinQT, sizeof(maxTtSize));
+  if (m_uiMaxMTTHierarchyDepth)
+  {
+    maxBtSize[1] = std::min(m_CTUSize, (unsigned)MAX_BT_SIZE_INTER);
+    maxTtSize[1] = std::min(m_CTUSize, (unsigned)MAX_TT_SIZE_INTER);
+  }
+  if (m_uiMaxMTTHierarchyDepthI)
+  {
+    maxBtSize[0] = std::min(m_CTUSize, (unsigned)MAX_BT_SIZE);
+    maxTtSize[0] = std::min(m_CTUSize, (unsigned)MAX_TT_SIZE);
+  }
+  if (m_uiMaxMTTHierarchyDepthIChroma)
+  {
+    maxBtSize[2] = std::min(m_CTUSize, (unsigned)MAX_BT_SIZE_C);
+    maxTtSize[2] = std::min(m_CTUSize, (unsigned)MAX_TT_SIZE_C);
+  }
+  sps.setMaxBTSize                           ( maxBtSize[1], maxBtSize[0], maxBtSize[2] );
+  sps.setMaxTTSize                           ( maxTtSize[1], maxTtSize[0], maxTtSize[2] );
+  sps.setIDRRefParamListPresent              ( m_idrRefParamList );
   sps.setUseDualITree                        ( m_dualITree );
+  sps.setUseLFNST                            ( m_LFNST );
   sps.setSBTMVPEnabledFlag                  ( m_SubPuMvpMode );
   sps.setAMVREnabledFlag                ( m_ImvMode != IMV_OFF );
   sps.setBDOFEnabledFlag                    ( m_BIO );
   sps.setUseAffine             ( m_Affine );
   sps.setUseAffineType         ( m_AffineType );
+  sps.setUsePROF               ( m_PROF );
   sps.setUseLMChroma           ( m_LMChroma ? true : false );
-  sps.setCclmCollocatedChromaFlag( m_cclmCollocatedChromaFlag );
+  sps.setHorCollocatedChromaFlag( m_horCollocatedChromaFlag );
+  sps.setVerCollocatedChromaFlag( m_verCollocatedChromaFlag );
   sps.setUseMTS                ( m_IntraMTS || m_InterMTS || m_ImplicitMTS );
   sps.setUseIntraMTS           ( m_IntraMTS );
   sps.setUseInterMTS           ( m_InterMTS );
   sps.setUseSBT                             ( m_SBT );
-  if( sps.getUseSBT() )
-  {
-    sps.setMaxSbtSize                       ( m_iSourceWidth >= 1920 ? 64 : 32 );
-  }
-  sps.setUseGBi                ( m_GBi );
+  sps.setUseSMVD                ( m_SMVD );
+  sps.setUseBcw                ( m_bcw );
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   sps.setLadfEnabled           ( m_LadfEnabled );
   if ( m_LadfEnabled )
@@ -970,17 +1083,25 @@ void EncLib::xInitSPS(SPS &sps)
   }
 #endif
 
-  sps.setUseMHIntra            ( m_MHIntra );
+  sps.setUseCiip            ( m_ciip );
   sps.setUseTriangle           ( m_Triangle );
-  sps.setDisFracMmvdEnabledFlag             ( m_allowDisFracMMVD );
+  sps.setUseMMVD               ( m_MMVD );
+  sps.setFpelMmvdEnabledFlag   (( m_MMVD ) ? m_allowDisFracMMVD : false);
+  sps.setBdofControlPresentFlag(m_BIO);
+  sps.setDmvrControlPresentFlag(m_DMVR);
+  sps.setProfControlPresentFlag(m_PROF);
   sps.setAffineAmvrEnabledFlag              ( m_AffineAmvr );
   sps.setUseDMVR                            ( m_DMVR );
-
+  sps.setUseColorTrans(m_useColorTrans);
+  sps.setPLTMode                            ( m_PLTMode);
   sps.setIBCFlag                            ( m_IBCMode);
   sps.setWrapAroundEnabledFlag                      ( m_wrapAround );
   sps.setWrapAroundOffset                   ( m_wrapAroundOffset );
   // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here
-  sps.setUseReshaper                        ( m_lumaReshapeEnable );
+  sps.setUseISP                             ( m_ISP );
+  sps.setUseLmcs                            ( m_lmcsEnabled );
+  sps.setUseMRL                ( m_MRL );
+  sps.setUseMIP                ( m_MIP );
   int minCUSize =  sps.getMaxCUWidth() >> sps.getLog2DiffMaxMinCodingBlockSize();
   int log2MinCUSize = 0;
   while(minCUSize > 1)
@@ -990,26 +1111,30 @@ void EncLib::xInitSPS(SPS &sps)
   }
 
   sps.setLog2MinCodingBlockSize(log2MinCUSize);
+  CHECK(log2MinCUSize > std::min(6, floorLog2(sps.getMaxCUWidth())), "log2_min_luma_coding_block_size_minus2 shall be in the range of 0 to min (4, log2_ctu_size - 2)");
+  CHECK(m_uiMaxMTTHierarchyDepth > 2 * (floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize()), "sps_max_mtt_hierarchy_depth_inter_slice shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)");
+  CHECK(m_uiMaxMTTHierarchyDepthI > 2 * (floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize()), "sps_max_mtt_hierarchy_depth_intra_slice_luma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)");
+  CHECK(m_uiMaxMTTHierarchyDepthIChroma > 2 * (floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize()), "sps_max_mtt_hierarchy_depth_intra_slice_chroma shall be in the range 0 to 2*(ctbLog2SizeY - log2MinCUSize)");
 
-  sps.setPCMLog2MinSize (m_uiPCMLog2MinSize);
-  sps.setPCMEnabledFlag        ( m_usePCM           );
-  sps.setPCMLog2MaxSize( m_pcmLog2MaxSize  );
+  sps.setTransformSkipEnabledFlag(m_useTransformSkip);
+  sps.setBDPCMEnabled(m_useBDPCM);
 
   sps.setSPSTemporalMVPEnabledFlag((getTMVPModeId() == 2 || getTMVPModeId() == 1));
 
-#if MAX_TB_SIZE_SIGNALLING
   sps.setLog2MaxTbSize   ( m_log2MaxTbSize );
-#endif
 
   for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++)
   {
     sps.setBitDepth      (ChannelType(channelType), m_bitDepth[channelType] );
     sps.setQpBDOffset  (ChannelType(channelType), (6 * (m_bitDepth[channelType] - 8)));
-    sps.setPCMBitDepth (ChannelType(channelType), m_PCMBitDepth[channelType]         );
+    sps.setMinQpPrimeTsMinus4(ChannelType(channelType), (6 * (m_bitDepth[channelType] - m_inputBitDepth[channelType])));
   }
 
-  sps.setSAOEnabledFlag( m_bUseSAO );
+  sps.setUseWP( m_useWeightedPred );
+  sps.setUseWPBiPred( m_useWeightedBiPred );
 
+  sps.setSAOEnabledFlag( m_bUseSAO );
+  sps.setJointCbCrEnabledFlag( m_JointCbCrMode );
   sps.setMaxTLayers( m_maxTempLayer );
   sps.setTemporalIdNestingFlag( ( m_maxTempLayer == 1 ) ? true : false );
 
@@ -1019,13 +1144,7 @@ void EncLib::xInitSPS(SPS &sps)
     sps.setNumReorderPics(m_numReorderPics[i], i);
   }
 
-  sps.setPCMFilterDisableFlag  ( m_bPCMFilterDisableFlag );
-#if HEVC_USE_SCALING_LISTS
   sps.setScalingListFlag ( (m_useScalingListId == SCALING_LIST_OFF) ? 0 : 1 );
-#endif
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  sps.setUseStrongIntraSmoothing( m_useStrongIntraSmoothing );
-#endif
   sps.setALFEnabledFlag( m_alf );
   sps.setVuiParametersPresentFlag(getVuiParametersPresentFlag());
 
@@ -1033,38 +1152,22 @@ void EncLib::xInitSPS(SPS &sps)
   {
     VUI* pcVUI = sps.getVuiParameters();
     pcVUI->setAspectRatioInfoPresentFlag(getAspectRatioInfoPresentFlag());
+    pcVUI->setAspectRatioConstantFlag(!getSampleAspectRatioInfoSEIEnabled());
     pcVUI->setAspectRatioIdc(getAspectRatioIdc());
     pcVUI->setSarWidth(getSarWidth());
     pcVUI->setSarHeight(getSarHeight());
-    pcVUI->setOverscanInfoPresentFlag(getOverscanInfoPresentFlag());
-    pcVUI->setOverscanAppropriateFlag(getOverscanAppropriateFlag());
-    pcVUI->setVideoSignalTypePresentFlag(getVideoSignalTypePresentFlag());
-    pcVUI->setVideoFormat(getVideoFormat());
-    pcVUI->setVideoFullRangeFlag(getVideoFullRangeFlag());
     pcVUI->setColourDescriptionPresentFlag(getColourDescriptionPresentFlag());
     pcVUI->setColourPrimaries(getColourPrimaries());
     pcVUI->setTransferCharacteristics(getTransferCharacteristics());
     pcVUI->setMatrixCoefficients(getMatrixCoefficients());
+    pcVUI->setFieldSeqFlag(false);
     pcVUI->setChromaLocInfoPresentFlag(getChromaLocInfoPresentFlag());
     pcVUI->setChromaSampleLocTypeTopField(getChromaSampleLocTypeTopField());
     pcVUI->setChromaSampleLocTypeBottomField(getChromaSampleLocTypeBottomField());
-    pcVUI->setNeutralChromaIndicationFlag(getNeutralChromaIndicationFlag());
-    pcVUI->setDefaultDisplayWindow(getDefaultDisplayWindow());
-    pcVUI->setFrameFieldInfoPresentFlag(getFrameFieldInfoPresentFlag());
-    pcVUI->setFieldSeqFlag(false);
-    pcVUI->setHrdParametersPresentFlag(false);
-    pcVUI->getTimingInfo()->setPocProportionalToTimingFlag(getPocProportionalToTimingFlag());
-    pcVUI->getTimingInfo()->setNumTicksPocDiffOneMinus1   (getNumTicksPocDiffOneMinus1()   );
-    pcVUI->setBitstreamRestrictionFlag(getBitstreamRestrictionFlag());
-#if HEVC_TILES_WPP
-    pcVUI->setTilesFixedStructureFlag(getTilesFixedStructureFlag());
-#endif
-    pcVUI->setMotionVectorsOverPicBoundariesFlag(getMotionVectorsOverPicBoundariesFlag());
-    pcVUI->setMinSpatialSegmentationIdc(getMinSpatialSegmentationIdc());
-    pcVUI->setMaxBytesPerPicDenom(getMaxBytesPerPicDenom());
-    pcVUI->setMaxBitsPerMinCuDenom(getMaxBitsPerMinCuDenom());
-    pcVUI->setLog2MaxMvLengthHorizontal(getLog2MaxMvLengthHorizontal());
-    pcVUI->setLog2MaxMvLengthVertical(getLog2MaxMvLengthVertical());
+    pcVUI->setChromaSampleLocType(getChromaSampleLocType());
+    pcVUI->setOverscanInfoPresentFlag(getOverscanInfoPresentFlag());
+    pcVUI->setOverscanAppropriateFlag(getOverscanAppropriateFlag());
+    pcVUI->setVideoFullRangeFlag(getVideoFullRangeFlag());
   }
 
   sps.setNumLongTermRefPicSPS(NUM_LONG_TERM_REF_PIC_SPS);
@@ -1074,6 +1177,10 @@ void EncLib::xInitSPS(SPS &sps)
     sps.setLtRefPicPocLsbSps(k, 0);
     sps.setUsedByCurrPicLtSPSFlag(k, 0);
   }
+  int numQpTables = m_chromaQpMappingTableParams.getSameCQPTableForAllChromaFlag() ? 1 : (sps.getJointCbCrEnabledFlag() ? 3 : 2);
+  m_chromaQpMappingTableParams.setNumQpTables(numQpTables);
+  sps.setChromaQpMappingTableFromParams(m_chromaQpMappingTableParams, sps.getQpBDOffset(CHANNEL_TYPE_CHROMA));
+  sps.derivedChromaQPMappingTables();
 
 #if U0132_TARGET_BITS_SATURATION
   if( getPictureTimingSEIEnabled() || getDecodingUnitInfoSEIEnabled() || getCpbSaturationEnabled() )
@@ -1085,7 +1192,7 @@ void EncLib::xInitSPS(SPS &sps)
   }
   if( getBufferingPeriodSEIEnabled() || getPictureTimingSEIEnabled() || getDecodingUnitInfoSEIEnabled() )
   {
-    sps.getVuiParameters()->setHrdParametersPresentFlag( true );
+    sps.setHrdParametersPresentFlag( true );
   }
 
   // Set up SPS range extension settings
@@ -1100,182 +1207,70 @@ void EncLib::xInitSPS(SPS &sps)
   sps.getSpsRangeExtension().setHighPrecisionOffsetsEnabledFlag(m_highPrecisionOffsetsEnabledFlag);
   sps.getSpsRangeExtension().setPersistentRiceAdaptationEnabledFlag(m_persistentRiceAdaptationEnabledFlag);
   sps.getSpsRangeExtension().setCabacBypassAlignmentEnabledFlag(m_cabacBypassAlignmentEnabledFlag);
-}
-
-#if U0132_TARGET_BITS_SATURATION
-// calculate scale value of bitrate and initial delay
-int calcScale(int x)
-{
-  if (x==0)
-  {
-    return 0;
-  }
-  uint32_t iMask = 0xffffffff;
-  int ScaleValue = 32;
 
-  while ((x&iMask) != 0)
+  if( m_uiIntraPeriod < 0 )
   {
-    ScaleValue--;
-    iMask = (iMask >> 1);
+    sps.setRPL1CopyFromRPL0Flag( true );
   }
 
-  return ScaleValue;
-}
-#endif
-void EncLib::xInitHrdParameters(SPS &sps)
-{
-  bool useSubCpbParams = (getSliceMode() > 0) || (getSliceSegmentMode() > 0);
-  int  bitRate         = getTargetBitrate();
-  bool isRandomAccess  = getIntraPeriod() > 0;
-# if U0132_TARGET_BITS_SATURATION
-  int cpbSize          = getCpbSize();
-  CHECK(!(cpbSize!=0), "Unspecified error");  // CPB size may not be equal to zero. ToDo: have a better default and check for level constraints
-  if( !getVuiParametersPresentFlag() && !getCpbSaturationEnabled() )
-#else
-  if( !getVuiParametersPresentFlag() )
-#endif
+  sps.setSubPicPresentFlag(m_subPicPresentFlag);
+  if (m_subPicPresentFlag) 
   {
-    return;
-  }
-
-  VUI *vui = sps.getVuiParameters();
-  HRD *hrd = vui->getHrdParameters();
-
-  TimingInfo *timingInfo = vui->getTimingInfo();
-  timingInfo->setTimingInfoPresentFlag( true );
-  switch( getFrameRate() )
-  {
-  case 24:
-    timingInfo->setNumUnitsInTick( 1125000 );    timingInfo->setTimeScale    ( 27000000 );
-    break;
-  case 25:
-    timingInfo->setNumUnitsInTick( 1080000 );    timingInfo->setTimeScale    ( 27000000 );
-    break;
-  case 30:
-    timingInfo->setNumUnitsInTick( 900900 );     timingInfo->setTimeScale    ( 27000000 );
-    break;
-  case 50:
-    timingInfo->setNumUnitsInTick( 540000 );     timingInfo->setTimeScale    ( 27000000 );
-    break;
-  case 60:
-    timingInfo->setNumUnitsInTick( 450450 );     timingInfo->setTimeScale    ( 27000000 );
-    break;
-  default:
-    timingInfo->setNumUnitsInTick( 1001 );       timingInfo->setTimeScale    ( 60000 );
-    break;
-  }
-
-  if (getTemporalSubsampleRatio()>1)
-  {
-    uint32_t temporalSubsampleRatio = getTemporalSubsampleRatio();
-    if ( double(timingInfo->getNumUnitsInTick()) * temporalSubsampleRatio > std::numeric_limits<uint32_t>::max() )
+    sps.setNumSubPics(m_numSubPics);
+    for (int i = 0; i < m_numSubPics; i++) 
     {
-      timingInfo->setTimeScale( timingInfo->getTimeScale() / temporalSubsampleRatio );
+      sps.setSubPicCtuTopLeftX(i, m_subPicCtuTopLeftX[i] );
+      sps.setSubPicCtuTopLeftY(i, m_subPicCtuTopLeftY[i]);
+      sps.setSubPicWidth(i, m_subPicWidth[i]);
+      sps.setSubPicHeight(i, m_subPicHeight[i]);
+      sps.setSubPicTreatedAsPicFlag(i, m_subPicTreatedAsPicFlag[i]);
+      sps.setLoopFilterAcrossSubpicEnabledFlag(i, m_loopFilterAcrossSubpicEnabledFlag[i]);
     }
-    else
-    {
-      timingInfo->setNumUnitsInTick( timingInfo->getNumUnitsInTick() * temporalSubsampleRatio );
-    }
-  }
-
-  bool rateCnt = ( bitRate > 0 );
-  hrd->setNalHrdParametersPresentFlag( rateCnt );
-  hrd->setVclHrdParametersPresentFlag( rateCnt );
-  hrd->setSubPicCpbParamsPresentFlag( useSubCpbParams );
-
-  if( hrd->getSubPicCpbParamsPresentFlag() )
-  {
-    hrd->setTickDivisorMinus2( 100 - 2 );                          //
-    hrd->setDuCpbRemovalDelayLengthMinus1( 7 );                    // 8-bit precision ( plus 1 for last DU in AU )
-    hrd->setSubPicCpbParamsInPicTimingSEIFlag( true );
-    hrd->setDpbOutputDelayDuLengthMinus1( 5 + 7 );                 // With sub-clock tick factor of 100, at least 7 bits to have the same value as AU dpb delay
   }
-  else
-  {
-    hrd->setSubPicCpbParamsInPicTimingSEIFlag( false );
-  }
-
-#if U0132_TARGET_BITS_SATURATION
-  if (calcScale(bitRate) <= 6)
+  sps.setSubPicIdPresentFlag(m_subPicIdPresentFlag);
+  if (m_subPicIdPresentFlag) 
   {
-    hrd->setBitRateScale(0);
-  }
-  else
+    sps.setSubPicIdSignallingPresentFlag(m_subPicIdSignallingPresentFlag);
+    if (m_subPicIdSignallingPresentFlag)
+    {
+      sps.setSubPicIdLen(m_subPicIdLen);
+      for (int i = 0; i < m_numSubPics; i++)
   {
-    hrd->setBitRateScale(calcScale(bitRate) - 6);
+        sps.setSubPicId(i, m_subPicId[i]);
+      }
+    }
   }
 
-  if (calcScale(cpbSize) <= 4)
+  sps.setLoopFilterAcrossVirtualBoundariesDisabledFlag( m_loopFilterAcrossVirtualBoundariesDisabledFlag );
+  sps.setNumVerVirtualBoundaries            ( m_numVerVirtualBoundaries );
+  sps.setNumHorVirtualBoundaries            ( m_numHorVirtualBoundaries );
+  for( unsigned int i = 0; i < m_numVerVirtualBoundaries; i++ )
   {
-    hrd->setCpbSizeScale(0);
+    sps.setVirtualBoundariesPosX            ( m_virtualBoundariesPosX[i], i );
   }
-  else
+  for( unsigned int i = 0; i < m_numHorVirtualBoundaries; i++ )
   {
-    hrd->setCpbSizeScale(calcScale(cpbSize) - 4);
+    sps.setVirtualBoundariesPosY            ( m_virtualBoundariesPosY[i], i );
   }
-#else
-  hrd->setBitRateScale( 4 );                                       // in units of 2^( 6 + 4 ) = 1,024 bps
-  hrd->setCpbSizeScale( 6 );                                       // in units of 2^( 4 + 6 ) = 1,024 bit
-#endif
 
-  hrd->setDuCpbSizeScale( 6 );                                     // in units of 2^( 4 + 6 ) = 1,024 bit
-
-  hrd->setInitialCpbRemovalDelayLengthMinus1(15);                  // assuming 0.5 sec, log2( 90,000 * 0.5 ) = 16-bit
-  if( isRandomAccess )
-  {
-    hrd->setCpbRemovalDelayLengthMinus1(5);                        // 32 = 2^5 (plus 1)
-    hrd->setDpbOutputDelayLengthMinus1 (5);                        // 32 + 3 = 2^6
-  }
-  else
+  sps.setInterLayerPresentFlag( vps.getMaxLayers() > 1 && !vps.getAllIndependentLayersFlag() );
+  for (unsigned int i = 0; i < vps.getMaxLayers(); ++i)
   {
-    hrd->setCpbRemovalDelayLengthMinus1(9);                        // max. 2^10
-    hrd->setDpbOutputDelayLengthMinus1 (9);                        // max. 2^10
+    CHECK((vps.getIndependentLayerFlag(i) == 1) && (sps.getInterLayerPresentFlag() != 0), " When vps_independent_layer_flag[GeneralLayerIdx[nuh_layer_id ]]  is equal to 1, the value of inter_layer_ref_pics_present_flag shall be equal to 0.");
   }
 
-  // Note: parameters for all temporal layers are initialized with the same values
-  int i, j;
-  uint32_t bitrateValue, cpbSizeValue;
-  uint32_t duCpbSizeValue;
-  uint32_t duBitRateValue = 0;
-
-  for( i = 0; i < MAX_TLAYER; i ++ )
-  {
-    hrd->setFixedPicRateFlag( i, 1 );
-    hrd->setPicDurationInTcMinus1( i, 0 );
-    hrd->setLowDelayHrdFlag( i, 0 );
-    hrd->setCpbCntMinus1( i, 0 );
-
-    //! \todo check for possible PTL violations
-    // BitRate[ i ] = ( bit_rate_value_minus1[ i ] + 1 ) * 2^( 6 + bit_rate_scale )
-    bitrateValue = bitRate / (1 << (6 + hrd->getBitRateScale()) );      // bitRate is in bits, so it needs to be scaled down
-    // CpbSize[ i ] = ( cpb_size_value_minus1[ i ] + 1 ) * 2^( 4 + cpb_size_scale )
-#if U0132_TARGET_BITS_SATURATION
-    cpbSizeValue = cpbSize / (1 << (4 + hrd->getCpbSizeScale()) );      // using bitRate results in 1 second CPB size
-#else
-    cpbSizeValue = bitRate / (1 << (4 + hrd->getCpbSizeScale()) );      // using bitRate results in 1 second CPB size
-#endif
+  sps.setRprEnabledFlag( m_rprEnabled || sps.getInterLayerPresentFlag() );
+}
 
+void EncLib::xInitHrdParameters(SPS &sps)
+{
+  m_encHRD.initHRDParameters((EncCfg*) this);
 
-    // DU CPB size could be smaller (i.e. bitrateValue / number of DUs), but we don't know
-    // in how many DUs the slice segment settings will result
-    duCpbSizeValue = bitrateValue;
-    duBitRateValue = cpbSizeValue;
+  HRDParameters *hrdParams = sps.getHrdParameters();
+  *hrdParams = m_encHRD.getHRDParameters();
 
-    for( j = 0; j < ( hrd->getCpbCntMinus1( i ) + 1 ); j ++ )
-    {
-      hrd->setBitRateValueMinus1( i, j, 0, ( bitrateValue - 1 ) );
-      hrd->setCpbSizeValueMinus1( i, j, 0, ( cpbSizeValue - 1 ) );
-      hrd->setDuCpbSizeValueMinus1( i, j, 0, ( duCpbSizeValue - 1 ) );
-      hrd->setDuBitRateValueMinus1( i, j, 0, ( duBitRateValue - 1 ) );
-      hrd->setCbrFlag( i, j, 0, false );
-
-      hrd->setBitRateValueMinus1( i, j, 1, ( bitrateValue - 1) );
-      hrd->setCpbSizeValueMinus1( i, j, 1, ( cpbSizeValue - 1 ) );
-      hrd->setDuCpbSizeValueMinus1( i, j, 1, ( duCpbSizeValue - 1 ) );
-      hrd->setDuBitRateValueMinus1( i, j, 1, ( duBitRateValue - 1 ) );
-      hrd->setCbrFlag( i, j, 1, false );
-    }
-  }
+  TimingInfo *timingInfo = sps.getTimingInfo();
+  *timingInfo = m_encHRD.getTimingInfo();
 }
 
 void EncLib::xInitPPS(PPS &pps, const SPS &sps)
@@ -1283,7 +1278,22 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   // pps ID already initialised.
   pps.setSPSId(sps.getSPSId());
 
-  pps.setConstrainedIntraPred( m_bUseConstrainedIntraPred );
+  pps.setConstantSliceHeaderParamsEnabledFlag(getConstantSliceHeaderParamsEnabledFlag());
+  pps.setPPSDepQuantEnabledIdc(getPPSDepQuantEnabledIdc());
+  pps.setPPSRefPicListSPSIdc0(getPPSRefPicListSPSIdc0());
+  pps.setPPSRefPicListSPSIdc1(getPPSRefPicListSPSIdc1());
+  pps.setPPSMvdL1ZeroIdc(getPPSMvdL1ZeroIdc());
+  pps.setPPSCollocatedFromL0Idc(getPPSCollocatedFromL0Idc());
+  pps.setPPSSixMinusMaxNumMergeCandPlus1(getPPSSixMinusMaxNumMergeCandPlus1());
+  pps.setPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1());
+
+  pps.setNumSubPics(sps.getNumSubPics());
+  pps.setSubPicIdSignallingPresentFlag(false);
+  pps.setSubPicIdLen(sps.getSubPicIdLen());
+  for(int picIdx=0; picIdx<pps.getNumSubPics(); picIdx++)
+  {
+    pps.setSubPicId(picIdx, sps.getSubPicId(picIdx));
+  }
   bool bUseDQP = (getCuQpDeltaSubdiv() > 0)? true : false;
 
   if((getMaxDeltaQP() != 0 )|| getUseAdaptiveQP())
@@ -1314,30 +1324,25 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   if ( m_RCEnableRateControl )
   {
     pps.setUseDQP(true);
-    pps.setCuQpDeltaSubdiv( 0 );
   }
   else if(bUseDQP)
   {
     pps.setUseDQP(true);
-    pps.setCuQpDeltaSubdiv( m_cuQpDeltaSubdiv );
   }
   else
   {
     pps.setUseDQP(false);
-    pps.setCuQpDeltaSubdiv( 0 );
   }
 
   if ( m_cuChromaQpOffsetSubdiv >= 0 )
   {
-    pps.getPpsRangeExtension().setCuChromaQpOffsetSubdiv(m_cuChromaQpOffsetSubdiv);
-    pps.getPpsRangeExtension().clearChromaQpOffsetList();
-    pps.getPpsRangeExtension().setChromaQpOffsetListEntry(1, 6, 6);
+    pps.clearChromaQpOffsetList();
+    pps.setChromaQpOffsetListEntry(1, 6, 6, 6);
     /* todo, insert table entries from command line (NB, 0 should not be touched) */
   }
   else
   {
-    pps.getPpsRangeExtension().setCuChromaQpOffsetSubdiv(0);
-    pps.getPpsRangeExtension().clearChromaQpOffsetList();
+    pps.clearChromaQpOffsetList();
   }
   pps.getPpsRangeExtension().setCrossComponentPredictionEnabledFlag(m_crossComponentPredictionEnabledFlag);
   pps.getPpsRangeExtension().setLog2SaoOffsetScale(CHANNEL_TYPE_LUMA,   m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA  ]);
@@ -1359,6 +1364,15 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     pps.setPicInitQPMinus26( std::min( maxDQP, std::max( minDQP, baseQp ) ));
   }
 
+  if (sps.getJointCbCrEnabledFlag() == false || getChromaFormatIdc() == CHROMA_400)
+  {
+    pps.setJointCbCrQpOffsetPresentFlag(false);
+  }
+  else
+  {
+    pps.setJointCbCrQpOffsetPresentFlag(true);
+  }
+
 #if ER_CHROMA_QP_WCG_PPS
   if (getWCGChromaQPControl().isEnabled())
   {
@@ -1370,12 +1384,20 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     const int crQP =(int)(dcrQP + ( dcrQP < 0 ? -0.5 : 0.5) );
     pps.setQpOffset(COMPONENT_Cb, Clip3( -12, 12, min(0, cbQP) + m_chromaCbQpOffset ));
     pps.setQpOffset(COMPONENT_Cr, Clip3( -12, 12, min(0, crQP) + m_chromaCrQpOffset));
+    if(pps.getJointCbCrQpOffsetPresentFlag())
+      pps.setQpOffset(JOINT_CbCr, Clip3(-12, 12, (min(0, cbQP) + min(0, crQP)) / 2 + m_chromaCbCrQpOffset));
+    else
+      pps.setQpOffset(JOINT_CbCr, 0);
   }
   else
   {
 #endif
   pps.setQpOffset(COMPONENT_Cb, m_chromaCbQpOffset );
   pps.setQpOffset(COMPONENT_Cr, m_chromaCrQpOffset );
+  if (pps.getJointCbCrQpOffsetPresentFlag())
+    pps.setQpOffset(JOINT_CbCr, m_chromaCbCrQpOffset);
+  else
+    pps.setQpOffset(JOINT_CbCr, 0);
 #if ER_CHROMA_QP_WCG_PPS
   }
 #endif
@@ -1407,13 +1429,53 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     !pps.getSliceChromaQpFlag() && sps.getUseDualITree()
     && (getChromaFormatIdc() != CHROMA_400))
   {
-    pps.setSliceChromaQpFlag(m_chromaCbQpOffsetDualTree != 0 || m_chromaCrQpOffsetDualTree != 0);
+    pps.setSliceChromaQpFlag(m_chromaCbQpOffsetDualTree != 0 || m_chromaCrQpOffsetDualTree != 0 || m_chromaCbCrQpOffsetDualTree != 0);
   }
 
-#if HEVC_TILES_WPP
   pps.setEntropyCodingSyncEnabledFlag( m_entropyCodingSyncEnabledFlag );
-  pps.setTilesEnabledFlag( (m_iNumColumnsMinus1 > 0 || m_iNumRowsMinus1 > 0) );
-#endif
+
+  pps.setNoPicPartitionFlag( m_noPicPartitionFlag );
+  if( m_noPicPartitionFlag == false )
+  {
+    pps.setLog2CtuSize( ceilLog2( sps.getCTUSize()) );
+    pps.setNumExpTileColumns( (uint32_t) m_tileColumnWidth.size() );
+    pps.setNumExpTileRows( (uint32_t) m_tileRowHeight.size() );
+    pps.setTileColumnWidths( m_tileColumnWidth );
+    pps.setTileRowHeights( m_tileRowHeight );
+    pps.initTiles();
+    pps.setRectSliceFlag( m_rectSliceFlag );
+    if( m_rectSliceFlag ) 
+    {
+      pps.setNumSlicesInPic( m_numSlicesInPic );
+      pps.setTileIdxDeltaPresentFlag( m_tileIdxDeltaPresentFlag );
+      pps.setRectSlices( m_rectSlices );
+      pps.initRectSliceMap( );
+    }
+    else
+    {
+      pps.initRasterSliceMap( m_rasterSliceSize );
+    }
+    pps.setLoopFilterAcrossTilesEnabledFlag( m_bLFCrossTileBoundaryFlag );
+    pps.setLoopFilterAcrossSlicesEnabledFlag( m_bLFCrossSliceBoundaryFlag );
+  }
+  else
+  {
+    pps.setLog2CtuSize( ceilLog2( sps.getCTUSize()) );
+    pps.setNumExpTileColumns(1);
+    pps.setNumExpTileRows(1);
+    pps.addTileColumnWidth( pps.getPicWidthInCtu( ) );
+    pps.addTileRowHeight( pps.getPicHeightInCtu( ) );
+    pps.initTiles();
+    pps.setRectSliceFlag( 1 );
+    pps.setNumSlicesInPic( 1 );
+    pps.initRectSlices( );
+    pps.setTileIdxDeltaPresentFlag( 0 );
+    pps.setSliceTileIdx( 0, 0 );
+    pps.initRectSliceMap( );
+    pps.setLoopFilterAcrossTilesEnabledFlag( true );
+    pps.setLoopFilterAcrossSlicesEnabledFlag( true );
+  }
+
   pps.setUseWP( m_useWeightedPred );
   pps.setWPBiPred( m_useWeightedBiPred );
   pps.setOutputFlagPresentFlag( false );
@@ -1448,7 +1510,6 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
 
   pps.setDeblockingFilterControlPresentFlag(deblockingFilterControlPresentFlag);
 
-  pps.setLog2ParallelMergeLevelMinus2   (m_log2ParallelMergeLevelMinus2 );
   pps.setCabacInitPresentFlag(CABAC_INIT_PRESENT_FLAG);
   pps.setLoopFilterAcrossSlicesEnabledFlag( m_bLFCrossSliceBoundaryFlag );
 
@@ -1460,8 +1521,8 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   }
   for( int i = 0; i < getGOPSize(); i++)
   {
-    CHECK(!(getGOPEntry(i).m_numRefPicsActive >= 0 && getGOPEntry(i).m_numRefPicsActive <= MAX_NUM_REF), "Unspecified error");
-    histogram[getGOPEntry(i).m_numRefPicsActive]++;
+    CHECK(!(getRPLEntry(0, i).m_numRefPicsActive >= 0 && getRPLEntry(0, i).m_numRefPicsActive <= MAX_NUM_REF), "Unspecified error");
+    histogram[getRPLEntry(0, i).m_numRefPicsActive]++;
   }
 
   int maxHist=-1;
@@ -1477,400 +1538,362 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   CHECK(!(bestPos <= 15), "Unspecified error");
     pps.setNumRefIdxL0DefaultActive(bestPos);
   pps.setNumRefIdxL1DefaultActive(bestPos);
-  pps.setTransquantBypassEnabledFlag(getTransquantBypassEnabledFlag());
-  pps.setUseTransformSkip( m_useTransformSkip );
-  pps.getPpsRangeExtension().setLog2MaxTransformSkipBlockSize( m_log2MaxTransformSkipBlockSize  );
+  pps.setLog2MaxTransformSkipBlockSize(m_log2MaxTransformSkipBlockSize);
+  pps.setPictureHeaderExtensionPresentFlag(false);
+
+  pps.pcv = new PreCalcValues( sps, pps, true );
+  pps.setRpl1IdxPresentFlag(sps.getRPL1IdxPresentFlag());
+}
 
-#if HEVC_DEPENDENT_SLICES
-  if (m_sliceSegmentMode != NO_SLICES)
+void EncLib::xInitPicHeader(PicHeader &picHeader, const SPS &sps, const PPS &pps)
+{
+  int i;
+  picHeader.initPicHeader();
+
+  // parameter sets
+  picHeader.setSPSId( sps.getSPSId() );
+  picHeader.setPPSId( pps.getPPSId() );  
+  
+  // merge list sizes
+  picHeader.setMaxNumMergeCand      ( getMaxNumMergeCand()       );
+  picHeader.setMaxNumAffineMergeCand( getMaxNumAffineMergeCand() );
+  picHeader.setMaxNumTriangleCand   ( getMaxNumTriangleCand()    );
+  picHeader.setMaxNumIBCMergeCand   ( getMaxNumIBCMergeCand()    );
+  
+  // copy partitioning constraints from SPS
+  picHeader.setSplitConsOverrideFlag(false);
+  picHeader.setMinQTSizes( sps.getMinQTSizes() );
+  picHeader.setMaxMTTHierarchyDepths( sps.getMaxMTTHierarchyDepths() );
+  picHeader.setMaxBTSizes( sps.getMaxBTSizes() );
+  picHeader.setMaxTTSizes( sps.getMaxTTSizes() );
+
+  // quantization
+  picHeader.setDepQuantEnabledFlag( getDepQuantEnabledFlag() );
+  picHeader.setSignDataHidingEnabledFlag( getSignDataHidingEnabledFlag() );
+  
+  bool bUseDQP = (getCuQpDeltaSubdiv() > 0)? true : false;
+
+  if( (getMaxDeltaQP() != 0 )|| getUseAdaptiveQP() )
   {
-    pps.setDependentSliceSegmentsEnabledFlag( true );
+    bUseDQP = true;
   }
-#endif
 
-#if HEVC_TILES_WPP
-  xInitPPSforTiles(pps);
+#if SHARP_LUMA_DELTA_QP
+  if( getLumaLevelToDeltaQPMapping().isEnabled() )
+  {
+    bUseDQP = true;
+  }
+#endif
+#if ENABLE_QPA
+  if( getUsePerceptQPA() && !bUseDQP )
+  {
+    CHECK( m_cuQpDeltaSubdiv != 0, "max. delta-QP subdiv must be zero!" );
+    bUseDQP = (getBaseQP() < 38) && (getSourceWidth() > 512 || getSourceHeight() > 320);
+  }
 #endif
 
-  pps.pcv = new PreCalcValues( sps, pps, true );
+  if( m_costMode==COST_SEQUENCE_LEVEL_LOSSLESS || m_costMode==COST_LOSSLESS_CODING )
+  {
+    bUseDQP=false;
+  }
+
+  if( m_RCEnableRateControl )
+  {
+    picHeader.setCuQpDeltaSubdivIntra( 0 );
+    picHeader.setCuQpDeltaSubdivInter( 0 );
+  }
+  else if( bUseDQP )
+  {
+    picHeader.setCuQpDeltaSubdivIntra( m_cuQpDeltaSubdiv );
+    picHeader.setCuQpDeltaSubdivInter( m_cuQpDeltaSubdiv );
+  }
+  else
+  {
+    picHeader.setCuQpDeltaSubdivIntra( 0 );
+    picHeader.setCuQpDeltaSubdivInter( 0 );
+  }
+
+  if( m_cuChromaQpOffsetSubdiv >= 0 )
+  {
+    picHeader.setCuChromaQpOffsetSubdivIntra(m_cuChromaQpOffsetSubdiv);
+    picHeader.setCuChromaQpOffsetSubdivInter(m_cuChromaQpOffsetSubdiv);
+  }
+  else
+  {
+    picHeader.setCuChromaQpOffsetSubdivIntra(0);
+    picHeader.setCuChromaQpOffsetSubdivInter(0);
+  }
+  
+  // sub-pictures
+  picHeader.setSubPicIdSignallingPresentFlag(sps.getSubPicIdSignallingPresentFlag());
+  picHeader.setSubPicIdLen(sps.getSubPicIdLen());
+  for(i=0; i<sps.getNumSubPics(); i++) {
+    picHeader.setSubPicId(i, sps.getSubPicId(i));
+  }
+
+  // virtual boundaries
+  picHeader.setLoopFilterAcrossVirtualBoundariesDisabledFlag(sps.getLoopFilterAcrossVirtualBoundariesDisabledFlag());
+  picHeader.setNumVerVirtualBoundaries(sps.getNumVerVirtualBoundaries());
+  picHeader.setNumHorVirtualBoundaries(sps.getNumHorVirtualBoundaries());
+  for(i=0; i<3; i++) {
+    picHeader.setVirtualBoundariesPosX(sps.getVirtualBoundariesPosX(i), i);
+    picHeader.setVirtualBoundariesPosY(sps.getVirtualBoundariesPosY(i), i);
+  }
+
+  // gradual decoder refresh flag
+  picHeader.setGdrPicFlag(false);
+  
+  // BDOF / DMVR / PROF
+  picHeader.setDisBdofFlag(false);
+  picHeader.setDisDmvrFlag(false);
+  picHeader.setDisProfFlag(false);
 }
 
 void EncLib::xInitAPS(APS &aps)
 {
   //Do nothing now
 }
-//Function for initializing m_RPSList, a list of ReferencePictureSet, based on the GOPEntry objects read from the config file.
-void EncLib::xInitRPS(SPS &sps, bool isFieldCoding)
+
+void EncLib::xInitRPL(SPS &sps, bool isFieldCoding)
 {
-  ReferencePictureSet*      rps;
+  ReferencePictureList*      rpl;
 
-  sps.createRPSList(getGOPSize() + m_extraRPSs + 1);
-  RPSList* rpsList = sps.getRPSList();
+  int numRPLCandidates = getRPLCandidateSize(0);
+  // To allocate one additional memory for RPL of POC1 (first bottom field) which is not specified in cfg file
+  sps.createRPLList0(numRPLCandidates + (isFieldCoding ? 1 : 0));
+  sps.createRPLList1(numRPLCandidates + (isFieldCoding ? 1 : 0));
+  RPLList* rplList = 0;
 
-  for( int i = 0; i < getGOPSize()+m_extraRPSs; i++)
+  for (int i = 0; i < 2; i++)
   {
-    const GOPEntry &ge = getGOPEntry(i);
-    rps = rpsList->getReferencePictureSet(i);
-    rps->setNumberOfPictures(ge.m_numRefPics);
-    rps->setNumRefIdc(ge.m_numRefIdc);
-    int numNeg = 0;
-    int numPos = 0;
-    for( int j = 0; j < ge.m_numRefPics; j++)
+    rplList = (i == 0) ? sps.getRPLList0() : sps.getRPLList1();
+    for (int j = 0; j < numRPLCandidates; j++)
     {
-      rps->setDeltaPOC(j,ge.m_referencePics[j]);
-      rps->setUsed(j,ge.m_usedByCurrPic[j]);
-      if(ge.m_referencePics[j]>0)
-      {
-        numPos++;
-      }
-      else
+      const RPLEntry &ge = getRPLEntry(i, j);
+      rpl = rplList->getReferencePictureList(j);
+      rpl->setNumberOfShorttermPictures(ge.m_numRefPics);
+      rpl->setNumberOfLongtermPictures(0);   //Hardcoded as 0 for now. need to update this when implementing LTRP
+      rpl->setNumberOfActivePictures(ge.m_numRefPicsActive);
+      rpl->setLtrpInSliceHeaderFlag(ge.m_ltrp_in_slice_header_flag);
+      rpl->setInterLayerPresentFlag( sps.getInterLayerPresentFlag() );
+      // inter-layer reference picture is not signaled in SPS RPL, SPS is shared currently
+      rpl->setNumberOfInterLayerPictures( 0 );
+
+      for (int k = 0; k < ge.m_numRefPics; k++)
       {
-        numNeg++;
-      }
-    }
-    rps->setNumberOfNegativePictures(numNeg);
-    rps->setNumberOfPositivePictures(numPos);
-
-    // handle inter RPS intialization from the config file.
-    rps->setInterRPSPrediction(ge.m_interRPSPrediction > 0);  // not very clean, converting anything > 0 to true.
-    rps->setDeltaRIdxMinus1(0);                               // index to the Reference RPS is always the previous one.
-    ReferencePictureSet*     RPSRef = i>0 ? rpsList->getReferencePictureSet(i-1): NULL;  // get the reference RPS
-
-    if (ge.m_interRPSPrediction == 2)  // Automatic generation of the inter RPS idc based on the RIdx provided.
-    {
-      CHECK(!(RPSRef!=NULL), "Unspecified error");
-      int deltaRPS = getGOPEntry(i-1).m_POC - ge.m_POC;  // the ref POC - current POC
-      int numRefDeltaPOC = RPSRef->getNumberOfPictures();
-
-      rps->setDeltaRPS(deltaRPS);           // set delta RPS
-      rps->setNumRefIdc(numRefDeltaPOC+1);  // set the numRefIdc to the number of pictures in the reference RPS + 1.
-      int count=0;
-      for (int j = 0; j <= numRefDeltaPOC; j++ ) // cycle through pics in reference RPS.
-      {
-        int RefDeltaPOC = (j<numRefDeltaPOC)? RPSRef->getDeltaPOC(j): 0;  // if it is the last decoded picture, set RefDeltaPOC = 0
-        rps->setRefIdc(j, 0);
-        for (int k = 0; k < rps->getNumberOfPictures(); k++ )  // cycle through pics in current RPS.
-        {
-          if (rps->getDeltaPOC(k) == ( RefDeltaPOC + deltaRPS))  // if the current RPS has a same picture as the reference RPS.
-          {
-              rps->setRefIdc(j, (rps->getUsed(k)?1:2));
-              count++;
-              break;
-          }
-        }
-      }
-      if (count != rps->getNumberOfPictures())
-      {
-        msg( WARNING, "Warning: Unable fully predict all delta POCs using the reference RPS index given in the config file.  Setting Inter RPS to false for this RPS.\n");
-        rps->setInterRPSPrediction(0);
-      }
-    }
-    else if (ge.m_interRPSPrediction == 1)  // inter RPS idc based on the RefIdc values provided in config file.
-    {
-      CHECK(!(RPSRef!=NULL), "Unspecified error");
-      rps->setDeltaRPS(ge.m_deltaRPS);
-      rps->setNumRefIdc(ge.m_numRefIdc);
-      for (int j = 0; j < ge.m_numRefIdc; j++ )
-      {
-        rps->setRefIdc(j, ge.m_refIdc[j]);
-      }
-      // the following code overwrite the deltaPOC and Used by current values read from the config file with the ones
-      // computed from the RefIdc.  A warning is printed if they are not identical.
-      numNeg = 0;
-      numPos = 0;
-      ReferencePictureSet      RPSTemp;  // temporary variable
-
-      for (int j = 0; j < ge.m_numRefIdc; j++ )
-      {
-        if (ge.m_refIdc[j])
-        {
-          int deltaPOC = ge.m_deltaRPS + ((j < RPSRef->getNumberOfPictures())? RPSRef->getDeltaPOC(j) : 0);
-          RPSTemp.setDeltaPOC((numNeg+numPos),deltaPOC);
-          RPSTemp.setUsed((numNeg+numPos),ge.m_refIdc[j]==1?1:0);
-          if (deltaPOC<0)
-          {
-            numNeg++;
-          }
-          else
-          {
-            numPos++;
-          }
-        }
-      }
-      if (numNeg != rps->getNumberOfNegativePictures())
-      {
-        msg( WARNING, "Warning: number of negative pictures in RPS is different between intra and inter RPS specified in the config file.\n");
-        rps->setNumberOfNegativePictures(numNeg);
-        rps->setNumberOfPictures(numNeg+numPos);
-      }
-      if (numPos != rps->getNumberOfPositivePictures())
-      {
-        msg( WARNING, "Warning: number of positive pictures in RPS is different between intra and inter RPS specified in the config file.\n");
-        rps->setNumberOfPositivePictures(numPos);
-        rps->setNumberOfPictures(numNeg+numPos);
-      }
-      RPSTemp.setNumberOfPictures(numNeg+numPos);
-      RPSTemp.setNumberOfNegativePictures(numNeg);
-      RPSTemp.sortDeltaPOC();     // sort the created delta POC before comparing
-      // check if Delta POC and Used are the same
-      // print warning if they are not.
-      for (int j = 0; j < ge.m_numRefIdc; j++ )
-      {
-        if (RPSTemp.getDeltaPOC(j) != rps->getDeltaPOC(j))
-        {
-          msg( WARNING, "Warning: delta POC is different between intra RPS and inter RPS specified in the config file.\n");
-          rps->setDeltaPOC(j,RPSTemp.getDeltaPOC(j));
-        }
-        if (RPSTemp.getUsed(j) != rps->getUsed(j))
-        {
-          msg( WARNING, "Warning: Used by Current in RPS is different between intra and inter RPS specified in the config file.\n");
-          rps->setUsed(j,RPSTemp.getUsed(j));
-        }
+        rpl->setRefPicIdentifier( k, ge.m_deltaRefPics[k], 0, false, 0 );
       }
     }
   }
-  //In case of field coding, we need to set special parameters for the first bottom field of the sequence, since it is not specified in the cfg file.
-  //The position = GOPSize + extraRPSs which is (a priori) unused is reserved for this field in the RPS.
+
   if (isFieldCoding)
   {
-    rps = rpsList->getReferencePictureSet(getGOPSize()+m_extraRPSs);
-    rps->setNumberOfPictures(1);
-    rps->setNumberOfNegativePictures(1);
-    rps->setNumberOfPositivePictures(0);
-    rps->setNumberOfLongtermPictures(0);
-    rps->setDeltaPOC(0,-1);
-    rps->setPOC(0,0);
-    rps->setUsed(0,true);
-    rps->setInterRPSPrediction(false);
-    rps->setDeltaRIdxMinus1(0);
-    rps->setDeltaRPS(0);
-    rps->setNumRefIdc(0);
+    // To set RPL of POC1 (first bottom field) which is not specified in cfg file
+    for (int i = 0; i < 2; i++)
+    {
+      rplList = (i == 0) ? sps.getRPLList0() : sps.getRPLList1();
+      rpl = rplList->getReferencePictureList(numRPLCandidates);
+      rpl->setNumberOfShorttermPictures(1);
+      rpl->setNumberOfLongtermPictures(0);
+      rpl->setNumberOfActivePictures(1);
+      rpl->setLtrpInSliceHeaderFlag(0);
+      rpl->setRefPicIdentifier(0, 1, 0, false, 0);
+      rpl->setPOC(0, 0);
+    }
   }
-}
 
-   // This is a function that
-   // determines what Reference Picture Set to use
-   // for a specific slice (with POC = POCCurr)
-void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid
-                                      , int ltPoc
-)
-{
-  bool isEncodeLtRef = (POCCurr == ltPoc);
-  if (m_compositeRefEnabled && isEncodeLtRef)
-  {
-    POCCurr++;
-  }
-  int rIdx = GOPid;
-  slice->setRPSidx(GOPid);
+  //Check if all delta POC of STRP in each RPL has the same sign
+  //Check RPLL0 first
+  const RPLList* rplList0 = sps.getRPLList0();
+  const RPLList* rplList1 = sps.getRPLList1();
+  uint32_t numberOfRPL = sps.getNumRPL0();
 
-  for(int extraNum=m_iGOPSize; extraNum<m_extraRPSs+m_iGOPSize; extraNum++)
+  bool isAllEntriesinRPLHasSameSignFlag = true;
+  bool isFirstEntry = true;
+  bool lastSign = true;        //true = positive ; false = negative
+  for (uint32_t ii = 0; isAllEntriesinRPLHasSameSignFlag && ii < numberOfRPL; ii++)
   {
-    if(m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0)
+    const ReferencePictureList* rpl = rplList0->getReferencePictureList(ii);
+    for (uint32_t jj = 0; isAllEntriesinRPLHasSameSignFlag && jj < rpl->getNumberOfActivePictures(); jj++)
     {
-      int POCIndex = POCCurr%m_uiIntraPeriod;
-      if(POCIndex == 0)
+      if (!rpl->isRefPicLongterm(jj) && isFirstEntry)
       {
-        POCIndex = m_uiIntraPeriod;
+        lastSign = (rpl->getRefPicIdentifier(jj) >= 0) ? true : false;
+        isFirstEntry = false;
       }
-      if(POCIndex == m_GOPList[extraNum].m_POC)
+      else if (!rpl->isRefPicLongterm(jj) && (((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) >= 0 && lastSign == false) || ((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) < 0 && lastSign == true)))
       {
-        slice->setRPSidx(extraNum);
-        rIdx = extraNum;
+        isAllEntriesinRPLHasSameSignFlag = false;
       }
     }
-    else
+  }
+  //Check RPLL1. Skip it if it is already found out that this flag is not true for RPL0 or if RPL1 is the same as RPL0
+  numberOfRPL = sps.getNumRPL1();
+  isFirstEntry = true;
+  lastSign = true;
+  for (uint32_t ii = 0; isAllEntriesinRPLHasSameSignFlag && !sps.getRPL1CopyFromRPL0Flag() && ii < numberOfRPL; ii++)
+  {
+    isFirstEntry = true;
+    const ReferencePictureList* rpl = rplList1->getReferencePictureList(ii);
+    for (uint32_t jj = 0; isAllEntriesinRPLHasSameSignFlag && jj < rpl->getNumberOfActivePictures(); jj++)
     {
-      if(POCCurr==m_GOPList[extraNum].m_POC)
+      if (!rpl->isRefPicLongterm(jj) && isFirstEntry)
+      {
+        lastSign = (rpl->getRefPicIdentifier(jj) >= 0) ? true : false;
+        isFirstEntry = false;
+      }
+      else if (!rpl->isRefPicLongterm(jj) && (((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) >= 0 && lastSign == false) || ((rpl->getRefPicIdentifier(jj) - rpl->getRefPicIdentifier(jj - 1)) < 0 && lastSign == true)))
       {
-        slice->setRPSidx(extraNum);
-        rIdx = extraNum;
+        isAllEntriesinRPLHasSameSignFlag = false;
       }
     }
   }
+  sps.setAllActiveRplEntriesHasSameSignFlag(isAllEntriesinRPLHasSameSignFlag);
+}
 
-  if(POCCurr == 1 && slice->getPic()->fieldPic)
+void EncLib::getActiveRefPicListNumForPOC(const SPS *sps, int POCCurr, int GOPid, uint32_t *activeL0, uint32_t *activeL1)
+{
+  if (m_uiIntraPeriod < 0)  //Only for RA
   {
-    slice->setRPSidx(m_iGOPSize+m_extraRPSs);
-    rIdx = m_iGOPSize + m_extraRPSs;
+    *activeL0 = *activeL1 = 0;
+    return;
   }
+  uint32_t rpl0Idx = GOPid;
+  uint32_t rpl1Idx = GOPid;
 
-  ReferencePictureSet *rps = const_cast<ReferencePictureSet *>(slice->getSPS()->getRPSList()->getReferencePictureSet(slice->getRPSidx()));
-  if (m_compositeRefEnabled && ltPoc != -1 && !isEncodeLtRef)
+  int fullListNum = m_iGOPSize;
+  int partialListNum = getRPLCandidateSize(0) - m_iGOPSize;
+  int extraNum = fullListNum;
+  if (m_uiIntraPeriod < 0)
   {
-    if (ltPoc != -1 && rps->getNumberOfLongtermPictures() != 1 && !isEncodeLtRef)
+    if (POCCurr < (2 * m_iGOPSize + 2))
     {
-      int idx = rps->getNumberOfPictures();
-      int maxPicOrderCntLSB = 1 << slice->getSPS()->getBitsForPOC();
-      int ltPocLsb = ltPoc % maxPicOrderCntLSB;
-
-      rps->setNumberOfPictures(rps->getNumberOfPictures() + 1);
-      rps->setNumberOfLongtermPictures(1);
-      rps->setPOC(idx, ltPoc);
-      rps->setPocLSBLT(idx, ltPocLsb);
-      rps->setDeltaPOC(idx, -POCCurr + ltPoc);
-      rps->setUsed(idx, true);
+      rpl0Idx = POCCurr + m_iGOPSize - 1;
+      rpl1Idx = POCCurr + m_iGOPSize - 1;
     }
-  }
-  else if (m_compositeRefEnabled && isEncodeLtRef)
-  {
-    ReferencePictureSet* localRPS = slice->getLocalRPS();
-    (*localRPS) = ReferencePictureSet();
-    int refPics = rps->getNumberOfPictures();
-    localRPS->setNumberOfPictures(rps->getNumberOfPictures());
-    for (int i = 0; i < refPics; i++)
+    else
     {
-      localRPS->setDeltaPOC(i, rps->getDeltaPOC(i) + 1);
-      localRPS->setUsed(i, rps->getUsed(i));
+      rpl0Idx = (POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1;
+      rpl1Idx = (POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1;
     }
-    localRPS->setNumberOfNegativePictures(rps->getNumberOfNegativePictures());
-    localRPS->setNumberOfPositivePictures(rps->getNumberOfPositivePictures());
-    localRPS->setInterRPSPrediction(true);
-    int deltaRPS = 1;
-    int newIdc = 0;
-    for (int i = 0; i < refPics; i++)
-    {
-      int deltaPOC = ((i != refPics) ? rps->getDeltaPOC(i) : 0);  // check if the reference abs POC is >= 0
-      int refIdc = 0;
-      for (int j = 0; j < localRPS->getNumberOfPictures(); j++) // loop through the  pictures in the new RPS
-      {
-        if ((deltaPOC + deltaRPS) == localRPS->getDeltaPOC(j))
-        {
-          if (localRPS->getUsed(j))
-          {
-            refIdc = 1;
-          }
-          else
-          {
-            refIdc = 2;
-          }
-        }
-      }
-      localRPS->setRefIdc(i, refIdc);
-      newIdc++;
-    }
-    localRPS->setNumRefIdc(newIdc + 1);
-    localRPS->setRefIdc(newIdc, 0);
-    localRPS->setDeltaRPS(deltaRPS);
-    localRPS->setDeltaRIdxMinus1(slice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() - 1 - rIdx);
-    slice->setRPS(localRPS);
-    slice->setRPSidx(-1);
-    return;
+    extraNum = fullListNum + partialListNum;
   }
-  slice->setRPS(rps);
-}
-
-int EncLib::getReferencePictureSetIdxForSOP(int POCCurr, int GOPid )
-{
-  int rpsIdx = GOPid;
-
-  for(int extraNum=m_iGOPSize; extraNum<m_extraRPSs+m_iGOPSize; extraNum++)
+  for (; extraNum<fullListNum + partialListNum; extraNum++)
   {
-    if(m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0)
+    if (m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0)
     {
       int POCIndex = POCCurr%m_uiIntraPeriod;
-      if(POCIndex == 0)
-      {
+      if (POCIndex == 0)
         POCIndex = m_uiIntraPeriod;
-      }
-      if(POCIndex == m_GOPList[extraNum].m_POC)
+      if (POCIndex == m_RPLList0[extraNum].m_POC)
       {
-        rpsIdx = extraNum;
+        rpl0Idx = extraNum;
+        rpl1Idx = extraNum;
+        extraNum++;
       }
     }
-    else
-    {
-      if(POCCurr==m_GOPList[extraNum].m_POC)
-      {
-        rpsIdx = extraNum;
-      }
-    }
-  }
-
-  return rpsIdx;
-}
-
-#if HEVC_TILES_WPP
-void  EncLib::xInitPPSforTiles(PPS &pps)
-{
-  pps.setTileUniformSpacingFlag( m_tileUniformSpacingFlag );
-  pps.setNumTileColumnsMinus1( m_iNumColumnsMinus1 );
-  pps.setNumTileRowsMinus1( m_iNumRowsMinus1 );
-  if( !m_tileUniformSpacingFlag )
-  {
-    pps.setTileColumnWidth( m_tileColumnWidth );
-    pps.setTileRowHeight( m_tileRowHeight );
   }
-  pps.setLoopFilterAcrossTilesEnabledFlag( m_loopFilterAcrossTilesEnabledFlag );
 
-  // # substreams is "per tile" when tiles are independent.
+  const ReferencePictureList *rpl0 = sps->getRPLList0()->getReferencePictureList(rpl0Idx);
+  *activeL0 = rpl0->getNumberOfActivePictures();
+  const ReferencePictureList *rpl1 = sps->getRPLList1()->getReferencePictureList(rpl1Idx);
+  *activeL1 = rpl1->getNumberOfActivePictures();
 }
-#endif
 
-void  EncCfg::xCheckGSParameters()
+void EncLib::selectReferencePictureList(Slice* slice, int POCCurr, int GOPid, int ltPoc)
 {
-#if HEVC_TILES_WPP
-  int   iWidthInCU = ( m_iSourceWidth%m_maxCUWidth ) ? m_iSourceWidth/m_maxCUWidth + 1 : m_iSourceWidth/m_maxCUWidth;
-  int   iHeightInCU = ( m_iSourceHeight%m_maxCUHeight ) ? m_iSourceHeight/m_maxCUHeight + 1 : m_iSourceHeight/m_maxCUHeight;
-  uint32_t  uiCummulativeColumnWidth = 0;
-  uint32_t  uiCummulativeRowHeight = 0;
-
-  //check the column relative parameters
-  if( m_iNumColumnsMinus1 >= (1<<(LOG2_MAX_NUM_COLUMNS_MINUS1+1)) )
+  bool isEncodeLtRef = (POCCurr == ltPoc);
+  if (m_compositeRefEnabled && isEncodeLtRef)
   {
-    EXIT( "The number of columns is larger than the maximum allowed number of columns." );
+    POCCurr++;
   }
 
-  if( m_iNumColumnsMinus1 >= iWidthInCU )
-  {
-    EXIT( "The current picture can not have so many columns." );
-  }
+  slice->setRPL0idx(GOPid);
+  slice->setRPL1idx(GOPid);
 
-  if( m_iNumColumnsMinus1 && !m_tileUniformSpacingFlag )
+  int fullListNum = m_iGOPSize;
+  int partialListNum = getRPLCandidateSize(0) - m_iGOPSize;
+  int extraNum = fullListNum;
+  if (m_uiIntraPeriod < 0)
   {
-    for(int i=0; i<m_iNumColumnsMinus1; i++)
+    if (POCCurr < (2 * m_iGOPSize + 2))
     {
-      uiCummulativeColumnWidth += m_tileColumnWidth[i];
+      slice->setRPL0idx(POCCurr + m_iGOPSize - 1);
+      slice->setRPL1idx(POCCurr + m_iGOPSize - 1);
     }
-
-    if( uiCummulativeColumnWidth >= iWidthInCU )
+    else
     {
-      EXIT( "The width of the column is too large." );
+      slice->setRPL0idx((POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1);
+      slice->setRPL1idx((POCCurr%m_iGOPSize == 0) ? m_iGOPSize - 1 : POCCurr%m_iGOPSize - 1);
     }
+    extraNum = fullListNum + partialListNum;
   }
-
-  //check the row relative parameters
-  if( m_iNumRowsMinus1 >= (1<<(LOG2_MAX_NUM_ROWS_MINUS1+1)) )
-  {
-    EXIT( "The number of rows is larger than the maximum allowed number of rows." );
-  }
-
-  if( m_iNumRowsMinus1 >= iHeightInCU )
+  for (; extraNum < fullListNum + partialListNum; extraNum++)
   {
-    EXIT( "The current picture can not have so many rows." );
+    if (m_uiIntraPeriod > 0 && getDecodingRefreshType() > 0)
+    {
+      int POCIndex = POCCurr%m_uiIntraPeriod;
+      if (POCIndex == 0)
+        POCIndex = m_uiIntraPeriod;
+      if (POCIndex == m_RPLList0[extraNum].m_POC)
+      {
+        slice->setRPL0idx(extraNum);
+        slice->setRPL1idx(extraNum);
+        extraNum++;
+      }
+    }
   }
 
-  if( m_iNumRowsMinus1 && !m_tileUniformSpacingFlag )
+  if (slice->getPic()->fieldPic)
   {
-    for(int i=0; i<m_iNumRowsMinus1; i++)
+    // To set RPL index of POC1 (first bottom field)
+    if (POCCurr == 1)
     {
-      uiCummulativeRowHeight += m_tileRowHeight[i];
+      slice->setRPL0idx(getRPLCandidateSize(0));
+      slice->setRPL1idx(getRPLCandidateSize(0));
     }
-
-    if( uiCummulativeRowHeight >= iHeightInCU )
+    else if (m_uiIntraPeriod < 0)
     {
-      EXIT( "The height of the row is too large." );
+      // To set RPL indexes for LD
+      int numRPLCandidates = getRPLCandidateSize(0);
+      if (POCCurr < numRPLCandidates - m_iGOPSize + 2)
+      {
+        slice->setRPL0idx(POCCurr + m_iGOPSize - 2);
+        slice->setRPL1idx(POCCurr + m_iGOPSize - 2);
+      }
+      else
+      {
+        if (POCCurr%m_iGOPSize == 0)
+        {
+          slice->setRPL0idx(m_iGOPSize - 2);
+          slice->setRPL1idx(m_iGOPSize - 2);
+        }
+        else if (POCCurr%m_iGOPSize == 1)
+        {
+          slice->setRPL0idx(m_iGOPSize - 1);
+          slice->setRPL1idx(m_iGOPSize - 1);
+        }
+        else
+        {
+          slice->setRPL0idx(POCCurr % m_iGOPSize - 2);
+          slice->setRPL1idx(POCCurr % m_iGOPSize - 2);
+        }
+      }
     }
   }
-#endif
+
+  const ReferencePictureList *rpl0 = (slice->getSPS()->getRPLList0()->getReferencePictureList(slice->getRPL0idx()));
+  const ReferencePictureList *rpl1 = (slice->getSPS()->getRPLList1()->getReferencePictureList(slice->getRPL1idx()));
+  slice->setRPL0(rpl0);
+  slice->setRPL1(rpl1);
 }
 
-#if JCTVC_Y0038_PARAMS
+
 void EncLib::setParamSetChanged(int spsId, int ppsId)
 {
   m_ppsMap.setChangedFlag(ppsId);
   m_spsMap.setChangedFlag(spsId);
 }
-#endif
 bool EncLib::APSNeedsWriting(int apsId)
 {
   bool isChanged = m_apsMap.getChangedFlag(apsId);
@@ -1892,6 +1915,37 @@ bool EncLib::SPSNeedsWriting(int spsId)
   return bChanged;
 }
 
+void EncLib::checkPltStats( Picture* pic )
+{
+  int totalArea = 0;
+  int pltArea = 0;
+  for (auto apu : pic->cs->pus)
+  {
+    for (int i = 0; i < MAX_NUM_TBLOCKS; ++i)
+    {
+      int puArea = apu->blocks[i].width * apu->blocks[i].height;
+      if (apu->blocks[i].width > 0 && apu->blocks[i].height > 0)
+      {
+        totalArea += puArea;
+        if (CU::isPLT(*apu->cu) || CU::isIBC(*apu->cu))
+        {
+          pltArea += puArea;
+        }
+        break;
+      }
+
+    }
+  }
+  if (pltArea * PLT_FAST_RATIO < totalArea)
+  {
+    m_doPlt = false;
+  }
+  else
+  {
+    m_doPlt = true;
+  }
+}
+
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
 int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
 {
@@ -1900,7 +1954,15 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
 
   if (getCostMode()==COST_LOSSLESS_CODING)
   {
+#if JVET_AHG14_LOSSLESS
+#if JVET_AHG14_LOSSLESS_ENC_QP_FIX
+    qp = getBaseQP();
+#else
+    qp = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP - ( ( pSlice->getSPS()->getBitDepth( CHANNEL_TYPE_LUMA ) - 8 ) * 6 );
+#endif
+#else
     qp=LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP;
+#endif
   }
   else
   {
@@ -1930,14 +1992,6 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
     }
     else
     {
-#if SHARP_LUMA_DELTA_QP
-      // Only adjust QP when not lossless
-      if (!(( getMaxDeltaQP() == 0 ) && (!getLumaLevelToDeltaQPMapping().isEnabled()) && (qp == -lumaQpBDOffset ) && (pSlice->getPPS()->getTransquantBypassEnabledFlag())))
-#else
-      if (!(( getMaxDeltaQP() == 0 ) && (qp == -lumaQpBDOffset ) && (pSlice->getPPS()->getTransquantBypassEnabledFlag())))
-#endif
-
-      {
         const GOPEntry &gopEntry=getGOPEntry(gopIndex);
         // adjust QP according to the QP offset for the GOP entry.
         qp +=gopEntry.m_QPOffset;
@@ -1947,7 +2001,6 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
         int qpOffset = (int)floor(Clip3<double>(0.0, 3.0, dqpOffset));
         qp += qpOffset ;
       }
-    }
 
 #if !QP_SWITCHING_FOR_PARALLEL
     // modify QP if a fractional QP was originally specified, cause dQPs to be 0 or 1.
@@ -1963,4 +2016,5 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
 }
 #endif
 
+
 //! \}
diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h
index 137b70ffac0be012c8204ba6099d8a931fe6ac97..f9b13233e597e85169e374573f23fea701a2e419 100644
--- a/source/Lib/EncoderLib/EncLib.h
+++ b/source/Lib/EncoderLib/EncLib.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -48,6 +48,7 @@
 #include "EncCfg.h"
 #include "EncGOP.h"
 #include "EncSlice.h"
+#include "EncHRD.h"
 #include "VLCWriter.h"
 #include "CABACWriter.h"
 #include "InterSearch.h"
@@ -57,6 +58,7 @@
 #include "EncAdaptiveLoopFilter.h"
 #include "RateCtrl.h"
 
+class EncLibCommon;
 
 //! \ingroup EncoderLib
 //! \{
@@ -72,11 +74,12 @@ private:
   // picture
   int                       m_iPOCLast;                           ///< time index (POC)
   int                       m_iNumPicRcvd;                        ///< number of received pictures
-  uint32_t                      m_uiNumAllPicCoded;                   ///< number of coded pictures
-  PicList                   m_cListPic;                           ///< dynamic list of pictures
+  uint32_t                  m_uiNumAllPicCoded;                   ///< number of coded pictures
+  PicList&                  m_cListPic;                           ///< dynamic list of pictures
+  int                       m_layerId;
 
   // encoder search
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   InterSearch              *m_cInterSearch;                       ///< encoder search class
   IntraSearch              *m_cIntraSearch;                       ///< encoder search class
 #else
@@ -84,7 +87,7 @@ private:
   IntraSearch               m_cIntraSearch;                       ///< encoder search class
 #endif
   // coding tool
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   TrQuant                  *m_cTrQuant;                           ///< transform & quantization class
 #else
   TrQuant                   m_cTrQuant;                           ///< transform & quantization class
@@ -93,13 +96,13 @@ private:
   EncSampleAdaptiveOffset   m_cEncSAO;                            ///< sample adaptive offset class
   EncAdaptiveLoopFilter     m_cEncALF;
   HLSWriter                 m_HLSWriter;                          ///< CAVLC encoder
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   CABACEncoder             *m_CABACEncoder;
 #else
   CABACEncoder              m_CABACEncoder;
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   EncReshape               *m_cReshaper;                        ///< reshaper class
 #else
   EncReshape                m_cReshaper;                        ///< reshaper class
@@ -108,17 +111,18 @@ private:
   // processing unit
   EncGOP                    m_cGOPEncoder;                        ///< GOP encoder
   EncSlice                  m_cSliceEncoder;                      ///< slice encoder
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   EncCu                    *m_cCuEncoder;                         ///< CU encoder
 #else
   EncCu                     m_cCuEncoder;                         ///< CU encoder
 #endif
   // SPS
-  ParameterSetMap<SPS>      m_spsMap;                             ///< SPS. This is the base value. This is copied to PicSym
-  ParameterSetMap<PPS>      m_ppsMap;                             ///< PPS. This is the base value. This is copied to PicSym
-  ParameterSetMap<APS>      m_apsMap;                             ///< APS. This is the base value. This is copied to PicSym
+  ParameterSetMap<SPS>&     m_spsMap;                             ///< SPS. This is the base value. This is copied to PicSym
+  ParameterSetMap<PPS>&     m_ppsMap;                             ///< PPS. This is the base value. This is copied to PicSym
+  ParameterSetMap<APS>&     m_apsMap;                             ///< APS. This is the base value. This is copied to PicSym
+  PicHeader                 m_picHeader;                          ///< picture header
   // RD cost computation
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   RdCost                   *m_cRdCost;                            ///< RD cost computation class
   CtxCache                 *m_CtxCache;                           ///< buffer for temporarily stored context models
 #else
@@ -130,7 +134,7 @@ private:
 
   AUWriterIf*               m_AUWriterIf;
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   int                       m_numCuEncStacks;
 #endif
 
@@ -138,36 +142,43 @@ private:
   CacheModel                m_cacheModel;
 #endif
 
+  APS*                      m_apss[ALF_CTB_MAX_NUM_APS];
+
+  APS*                      m_lmcsAPS;
+  APS*                      m_scalinglistAPS;
+
+  EncHRD                    m_encHRD;
+
+  bool                      m_doPlt;
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  std::chrono::duration<long long, ratio<1, 1000000000>> m_metricTime;
+#endif
+  int                       m_picIdInGOP;
+
 public:
+  SPS*                      getSPS( int spsId ) { return m_spsMap.getPS( spsId ); };
+  APS**                     getApss() { return m_apss; }
   Ctx                       m_entropyCodingSyncContextState;      ///< leave in addition to vector for compatibility
-#if ENABLE_WPP_PARALLELISM
-  std::vector<Ctx>          m_entropyCodingSyncContextStateVec;   ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row
-#endif
 
 protected:
   void  xGetNewPicBuffer  ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Picture*& rpcPic, int ppsId ); ///< get picture buffer which will be processed. If ppsId<0, then the ppsMap will be queried for the first match.
-#if HEVC_VPS
-  void  xInitVPS          (VPS &vps, const SPS &sps); ///< initialize VPS from encoder options
-#endif
-  void  xInitSPS          (SPS &sps);                 ///< initialize SPS from encoder options
+  void  xInitVPS(VPS& vps, const SPS& sps); ///< initialize VPS from encoder options
+  void  xInitDPS          (DPS &dps, const SPS &sps, const int dpsId); ///< initialize DPS from encoder options
+  void  xInitSPS          ( SPS& sps, VPS& vps );       ///< initialize SPS from encoder options
   void  xInitPPS          (PPS &pps, const SPS &sps); ///< initialize PPS from encoder options
+  void  xInitPicHeader    (PicHeader &picHeader, const SPS &sps, const PPS &pps); ///< initialize Picture Header from encoder options
   void  xInitAPS          (APS &aps);                 ///< initialize APS from encoder options
-#if HEVC_USE_SCALING_LISTS
-  void  xInitScalingLists (SPS &sps, PPS &pps);   ///< initialize scaling lists
-#endif
+  void  xInitScalingLists ( SPS &sps, APS &aps );     ///< initialize scaling lists
   void  xInitPPSforLT(PPS& pps);
-  void  xInitHrdParameters(SPS &sps);                 ///< initialize HRD parameters
+  void  xInitHrdParameters(SPS &sps);                 ///< initialize HRDParameters parameters
 
-#if HEVC_TILES_WPP
-  void  xInitPPSforTiles  (PPS &pps);
-#endif
-  void  xInitRPS          (SPS &sps, bool isFieldCoding);           ///< initialize PPS from encoder options
+  void  xInitRPL(SPS &sps, bool isFieldCoding);           ///< initialize SPS from encoder options
 
 public:
-  EncLib();
+  EncLib( EncLibCommon* encLibCommon );
   virtual ~EncLib();
 
-  void      create          ();
+  void      create          ( const int layerId );
   void      destroy         ();
   void      init            ( bool isFieldCoding, AUWriterIf* auWriterIf );
   void      deletePicBuffer ();
@@ -178,7 +189,7 @@ public:
 
   AUWriterIf*             getAUWriterIf         ()              { return   m_AUWriterIf;           }
   PicList*                getListPic            ()              { return  &m_cListPic;             }
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   InterSearch*            getInterSearch        ( int jId = 0 ) { return  &m_cInterSearch[jId];    }
   IntraSearch*            getIntraSearch        ( int jId = 0 ) { return  &m_cIntraSearch[jId];    }
 
@@ -194,13 +205,14 @@ public:
   EncAdaptiveLoopFilter*  getALF                ()              { return  &m_cEncALF;              }
   EncGOP*                 getGOPEncoder         ()              { return  &m_cGOPEncoder;          }
   EncSlice*               getSliceEncoder       ()              { return  &m_cSliceEncoder;        }
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+  EncHRD*                 getHRD                ()              { return  &m_encHRD;               }
+#if ENABLE_SPLIT_PARALLELISM
   EncCu*                  getCuEncoder          ( int jId = 0 ) { return  &m_cCuEncoder[jId];      }
 #else
   EncCu*                  getCuEncoder          ()              { return  &m_cCuEncoder;           }
 #endif
   HLSWriter*              getHLSWriter          ()              { return  &m_HLSWriter;            }
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   CABACEncoder*           getCABACEncoder       ( int jId = 0 ) { return  &m_CABACEncoder[jId];    }
 
   RdCost*                 getRdCost             ( int jId = 0 ) { return  &m_cRdCost[jId];         }
@@ -214,51 +226,63 @@ public:
   RateCtrl*               getRateCtrl           ()              { return  &m_cRateCtrl;            }
 
 
-  void selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid
-    , int ltPoc
-  );
-  int getReferencePictureSetIdxForSOP(int POCCurr, int GOPid );
+  void                    getActiveRefPicListNumForPOC(const SPS *sps, int POCCurr, int GOPid, uint32_t *activeL0, uint32_t *activeL1);
+  void                    selectReferencePictureList(Slice* slice, int POCCurr, int GOPid, int ltPoc);
 
-#if JCTVC_Y0038_PARAMS
   void                   setParamSetChanged(int spsId, int ppsId);
-#endif
   bool                   APSNeedsWriting(int apsId);
   bool                   PPSNeedsWriting(int ppsId);
   bool                   SPSNeedsWriting(int spsId);
   const PPS* getPPS( int Id ) { return m_ppsMap.getPS( Id); }
   const APS*             getAPS(int Id) { return m_apsMap.getPS(Id); }
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   void                   setNumCuEncStacks( int n )             { m_numCuEncStacks = n; }
   int                    getNumCuEncStacks()              const { return m_numCuEncStacks; }
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   EncReshape*            getReshaper( int jId = 0 )             { return  &m_cReshaper[jId]; }
 #else
   EncReshape*            getReshaper()                          { return  &m_cReshaper; }
 #endif
+
+  ParameterSetMap<APS>*  getApsMap() { return &m_apsMap; }
+
+  bool                   getPltEnc()                      const { return   m_doPlt; }
+  void                   checkPltStats( Picture* pic );
+#if JVET_O0756_CALCULATE_HDRMETRICS
+  std::chrono::duration<long long, ratio<1, 1000000000>> getMetricTime()    const { return m_metricTime; };
+#endif
   // -------------------------------------------------------------------------------------------------------------------
   // encoder function
   // -------------------------------------------------------------------------------------------------------------------
 
   /// encode several number of pictures until end-of-sequence
-  void encode( bool bEos,
+  bool encodePrep( bool bEos,
                PelStorage* pcPicYuvOrg,
                PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space.
                std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                int& iNumEncoded );
 
-  /// encode several number of pictures until end-of-sequence
-  void encode( bool bEos,
+  bool encode( const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space.
+               std::list<PelUnitBuf*>& rcListPicYuvRecOut,
+               int& iNumEncoded );
+
+  bool encodePrep( bool bEos,
                PelStorage* pcPicYuvOrg,
                PelStorage* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space.
                std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                int& iNumEncoded, bool isTff );
 
+  bool encode( const InputColourSpaceConversion snrCSC, // used for SNR calculations. Picture in original colour space.
+               std::list<PelUnitBuf*>& rcListPicYuvRecOut,
+               int& iNumEncoded, bool isTff );
+
 
-  void printSummary(bool isField) { m_cGOPEncoder.printOutSummary (m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_printHexPsnr, m_spsMap.getFirstPS()->getBitDepths()); }
+  void printSummary( bool isField ) { m_cGOPEncoder.printOutSummary( m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_printHexPsnr, m_rprEnabled, m_spsMap.getFirstPS()->getBitDepths() ); }
 
+  int getLayerId() const { return m_layerId; }
 };
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncLibCommon.cpp b/source/Lib/EncoderLib/EncLibCommon.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2cbdc396efd15ff906123f2d7a73a2d429a48f1e
--- /dev/null
+++ b/source/Lib/EncoderLib/EncLibCommon.cpp
@@ -0,0 +1,51 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     EncLibCommon.cpp
+    \brief    Common encoder library class
+*/
+
+#include "CommonDef.h"
+#include "EncLibCommon.h"
+
+EncLibCommon::EncLibCommon()
+  : m_apsIdStart( ALF_CTB_MAX_NUM_APS )
+  , m_spsMap( MAX_NUM_SPS )
+  , m_ppsMap( MAX_NUM_PPS )
+  , m_apsMap( MAX_NUM_APS * MAX_NUM_APS_TYPE )
+{
+}
+
+EncLibCommon::~EncLibCommon()
+{
+}
diff --git a/source/Lib/EncoderLib/EncLibCommon.h b/source/Lib/EncoderLib/EncLibCommon.h
new file mode 100644
index 0000000000000000000000000000000000000000..989fdf7e48a6c660bde88fe4418a916ae3f6fa84
--- /dev/null
+++ b/source/Lib/EncoderLib/EncLibCommon.h
@@ -0,0 +1,63 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     EncLibCommon.h
+    \brief    Common encoder library class (header)
+*/
+
+#pragma once
+#include <list>
+#include <fstream>
+#include "CommonLib/Slice.h"
+
+class EncLibCommon
+{
+private:
+  int                       m_apsIdStart;         ///< ALF APS id, APS id space is shared across all layers
+  ParameterSetMap<SPS>      m_spsMap;             ///< SPS, it is shared across all layers
+  ParameterSetMap<PPS>      m_ppsMap;             ///< PPS, it is shared across all layers
+  ParameterSetMap<APS>      m_apsMap;             ///< APS, it is shared across all layers
+  PicList                   m_cListPic;           ///< DPB, it is shared across all layers
+
+public:
+  EncLibCommon();
+  virtual ~EncLibCommon();
+
+  int&                     getApsIdStart()         { return m_apsIdStart; }
+  PicList&                 getPictureBuffer()      { return m_cListPic;   }
+  ParameterSetMap<SPS>&    getSpsMap()             { return m_spsMap;     }
+  ParameterSetMap<PPS>&    getPpsMap()             { return m_ppsMap;     }
+  ParameterSetMap<APS>&    getApsMap()             { return m_apsMap;     }
+
+};
+
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index 0c8a5f7c6d4711c90895a3dd1cceb67cad5563a9..922835d58ac9ce7643923b6436c7b5e87d726262 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -143,7 +143,7 @@ void EncModeCtrl::xGetMinMaxQP( int& minQP, int& maxQP, const CodingStructure& c
 
   const unsigned subdivIncr = (splitMode == CU_QUAD_SPLIT) ? 2 : (splitMode == CU_BT_SPLIT) ? 1 : 0;
   const bool qgEnable = partitioner.currQgEnable(); // QG possible at current level
-  const bool qgEnableChildren = qgEnable && ((partitioner.currSubdiv + subdivIncr) <= pps.getCuQpDeltaSubdiv()) && (subdivIncr > 0); // QG possible at next level
+  const bool qgEnableChildren = qgEnable && ((partitioner.currSubdiv + subdivIncr) <= cs.slice->getCuQpDeltaSubdiv()) && (subdivIncr > 0); // QG possible at next level
   const bool isLeafQG = (qgEnable && !qgEnableChildren);
 
   if( isLeafQG ) // QG at deepest level
@@ -436,6 +436,14 @@ bool CacheBlkInfoCtrl::isSkip( const UnitArea& area )
   return m_codedCUInfo[idx1][idx2][idx3][idx4]->isSkip;
 }
 
+char CacheBlkInfoCtrl::getSelectColorSpaceOption(const UnitArea& area)
+{
+  unsigned idx1, idx2, idx3, idx4;
+  getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4);
+
+  return m_codedCUInfo[idx1][idx2][idx3][idx4]->selectColorSpaceOption;
+}
+
 bool CacheBlkInfoCtrl::isMMVDSkip(const UnitArea& area)
 {
   unsigned idx1, idx2, idx3, idx4;
@@ -586,19 +594,19 @@ bool CacheBlkInfoCtrl::getInter(const UnitArea& area)
 
   return m_codedCUInfo[idx1][idx2][idx3][idx4]->isInter;
 }
-void CacheBlkInfoCtrl::setGbiIdx(const UnitArea& area, uint8_t gBiIdx)
+void CacheBlkInfoCtrl::setBcwIdx(const UnitArea& area, uint8_t gBiIdx)
 {
   unsigned idx1, idx2, idx3, idx4;
   getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4);
 
-  m_codedCUInfo[idx1][idx2][idx3][idx4]->GBiIdx = gBiIdx;
+  m_codedCUInfo[idx1][idx2][idx3][idx4]->BcwIdx = gBiIdx;
 }
-uint8_t CacheBlkInfoCtrl::getGbiIdx(const UnitArea& area)
+uint8_t CacheBlkInfoCtrl::getBcwIdx(const UnitArea& area)
 {
   unsigned idx1, idx2, idx3, idx4;
   getAreaIdx(area.Y(), *m_slice_chblk->getPPS()->pcv, idx1, idx2, idx3, idx4);
 
-  return m_codedCUInfo[idx1][idx2][idx3][idx4]->GBiIdx;
+  return m_codedCUInfo[idx1][idx2][idx3][idx4]->BcwIdx;
 }
 
 #if REUSE_CU_RESULTS
@@ -625,29 +633,6 @@ static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, co
 
   const UnitArea &cmnAnc = ps[i - 1].parts[ps[i - 1].idx];
   const UnitArea cuArea  = CS::getArea( cs, cu, partitioner.chType );
-  bool sharedListReuseMode = true;
-  if(
-      pu.mergeFlag == true &&
-      cu.affine == false &&
-      cu.predMode == MODE_INTER
-    )
-  {
-    sharedListReuseMode = false;
-
-    if ((cu.lumaSize().width*cu.lumaSize().height) >= MRG_SHARELIST_SHARSIZE)
-    {
-      sharedListReuseMode = true;
-    }
-
-    if (((cmnAnc.lumaSize().width)*(cmnAnc.lumaSize().height) <= MRG_SHARELIST_SHARSIZE))
-    {
-      sharedListReuseMode = true;
-    }
-  }
-  else
-  {
-    sharedListReuseMode = true;
-  }
 //#endif
 
   for( int i = 0; i < cmnAnc.blocks.size(); i++ )
@@ -657,11 +642,6 @@ static bool isTheSameNbHood( const CodingUnit &cu, const CodingStructure& cs, co
       return false;
     }
   }
-  if(!sharedListReuseMode)
-  {
-    return false;
-  }
-
 
   return true;
 }
@@ -756,6 +736,12 @@ void BestEncInfoCache::destroy()
 
   delete[] m_pCoeff;
   delete[] m_pPcmBuf;
+
+  if (m_runType != nullptr)
+  {
+    delete[] m_runType;
+    m_runType = nullptr;
+  }
 }
 
 void BestEncInfoCache::init( const Slice &slice )
@@ -796,14 +782,22 @@ void BestEncInfoCache::init( const Slice &slice )
 #if REUSE_CU_RESULTS_WITH_MULTIPLE_TUS
   m_pCoeff  = new TCoeff[numCoeff*MAX_NUM_TUS];
   m_pPcmBuf = new Pel   [numCoeff*MAX_NUM_TUS];
+  if (slice.getSPS()->getPLTMode())
+  {
+    m_runType   = new bool[numCoeff*MAX_NUM_TUS];
+  }
 #else
   m_pCoeff  = new TCoeff[numCoeff];
   m_pPcmBuf = new Pel   [numCoeff];
+  if (slice.getSPS()->getPLTMode())
+  {
+    m_runType   = new bool[numCoeff];
+  }
 #endif
 
   TCoeff *coeffPtr = m_pCoeff;
   Pel    *pcmPtr   = m_pPcmBuf;
-
+  bool   *runTypePtr   = m_runType;
   m_dummyCS.pcv = m_slice_bencinf->getPPS()->pcv;
 
   for( unsigned x = 0; x < numPos; x++ )
@@ -818,6 +812,7 @@ void BestEncInfoCache::init( const Slice &slice )
           {
             TCoeff *coeff[MAX_NUM_TBLOCKS] = { 0, };
             Pel    *pcmbf[MAX_NUM_TBLOCKS] = { 0, };
+            bool   *runType[MAX_NUM_TBLOCKS - 1] = { 0, };
 
 #if REUSE_CU_RESULTS_WITH_MULTIPLE_TUS
             for( int i = 0; i < MAX_NUM_TUS; i++ )
@@ -829,10 +824,14 @@ void BestEncInfoCache::init( const Slice &slice )
               {
                 coeff[i] = coeffPtr; coeffPtr += area.blocks[i].area();
                 pcmbf[i] = pcmPtr;   pcmPtr += area.blocks[i].area();
+                if (i < 2)
+                {
+                  runType[i]   = runTypePtr;   runTypePtr   += area.blocks[i].area();
+                }
               }
 
               tu.cs = &m_dummyCS;
-              tu.init(coeff, pcmbf);
+              tu.init(coeff, pcmbf, runType);
             }
 #else
             const UnitArea &area = m_bestEncInfo[x][y][wIdx][hIdx]->tu;
@@ -841,10 +840,12 @@ void BestEncInfoCache::init( const Slice &slice )
             {
               coeff[i] = coeffPtr; coeffPtr += area.blocks[i].area();
               pcmbf[i] =   pcmPtr;   pcmPtr += area.blocks[i].area();
+              runType[i] = runTypePtr;     runTypePtr += area.blocks[i].area();
+              runLength[i] = runLengthPtr; runLengthPtr += area.blocks[i].area();
             }
 
             m_bestEncInfo[x][y][wIdx][hIdx]->tu.cs = &m_dummyCS;
-            m_bestEncInfo[x][y][wIdx][hIdx]->tu.init( coeff, pcmbf );
+            m_bestEncInfo[x][y][wIdx][hIdx]->tu.init(coeff, pcmbf, runLength, runType);
 #endif
           }
         }
@@ -909,11 +910,19 @@ bool BestEncInfoCache::setFromCs( const CodingStructure& cs, const Partitioner&
 
 bool BestEncInfoCache::isValid( const CodingStructure& cs, const Partitioner& partitioner, int qp )
 {
+  if( partitioner.treeType == TREE_C )
+  {
+    return false; //if save & load is allowed for chroma CUs, we should check whether luma info (pred, recon, etc) is the same, which is quite complex
+  }
   unsigned idx1, idx2, idx3, idx4;
   getAreaIdx( cs.area.Y(), *m_slice_bencinf->getPPS()->pcv, idx1, idx2, idx3, idx4 );
 
   BestEncodingInfo& encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4];
 
+  if( encInfo.cu.treeType != partitioner.treeType || encInfo.cu.modeType != partitioner.modeType )
+  {
+    return false;
+  }
   if( encInfo.cu.qp != qp )
     return false;
   if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner
@@ -1111,12 +1120,11 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice )
   }
 }
 
-
 void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStructure& cs )
 {
   // Min/max depth
   unsigned minDepth = 0;
-  unsigned maxDepth = g_aucLog2[cs.sps->getCTUSize()] - g_aucLog2[cs.sps->getMinQTSize( m_slice->getSliceType(), partitioner.chType )];
+  unsigned maxDepth = floorLog2(cs.sps->getCTUSize()) - floorLog2(cs.sps->getMinQTSize( m_slice->getSliceType(), partitioner.chType ));
   if( m_pcEncCfg->getUseFastLCTU() )
   {
     if( auto adPartitioner = dynamic_cast<AdaptiveDepthPartitioner*>( &partitioner ) )
@@ -1168,25 +1176,21 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
 
   // QP
   int baseQP = cs.baseQP;
-  if (!CS::isDualITree (cs) || isLuma (partitioner.chType))
+  if (!partitioner.isSepTree(cs) || isLuma(partitioner.chType))
   {
     if (m_pcEncCfg->getUseAdaptiveQP())
     {
       baseQP = Clip3(-cs.sps->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, baseQP + xComputeDQP(cs, partitioner));
     }
 #if ENABLE_QPA_SUB_CTU
-    else if (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && cs.pps->getUseDQP() && cs.pps->getCuQpDeltaSubdiv() > 0)
+    else if (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && cs.pps->getUseDQP() && cs.slice->getCuQpDeltaSubdiv() > 0)
     {
       const PreCalcValues &pcv = *cs.pcv;
 
       if ((partitioner.currArea().lwidth() < pcv.maxCUWidth) && (partitioner.currArea().lheight() < pcv.maxCUHeight) && cs.picture)
       {
         const Position    &pos = partitioner.currQgPos;
-#if MAX_TB_SIZE_SIGNALLING
-        const unsigned mtsLog2 = (unsigned)g_aucLog2[std::min (cs.sps->getMaxTbSize(), pcv.maxCUWidth)];
-#else
-        const unsigned mtsLog2 = (unsigned)g_aucLog2[std::min<uint32_t> (MAX_TB_SIZEY, pcv.maxCUWidth)];
-#endif
+        const unsigned mtsLog2 = (unsigned)floorLog2(std::min (cs.sps->getMaxTbSize(), pcv.maxCUWidth));
         const unsigned  stride = pcv.maxCUWidth >> mtsLog2;
 
         baseQP = cs.picture->m_subCtuQP[((pos.x & pcv.maxCUWidthMask) >> mtsLog2) + stride * ((pos.y & pcv.maxCUHeightMask) >> mtsLog2)];
@@ -1209,25 +1213,9 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
 
   xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, CU_QUAD_SPLIT );
   bool checkIbc = true;
-  if (cs.chType == CHANNEL_TYPE_CHROMA)
+  if (partitioner.chType == CHANNEL_TYPE_CHROMA)
   {
-    IbcLumaCoverage ibcLumaCoverage = cs.getIbcLumaCoverage(cs.area.Cb());
-    switch (ibcLumaCoverage)
-    {
-    case IBC_LUMA_COVERAGE_FULL:
-      // check IBC
-      break;
-    case IBC_LUMA_COVERAGE_PARTIAL:
-      // do not check IBC
-      checkIbc = false;
-      break;
-    case IBC_LUMA_COVERAGE_NONE:
-      // do not check IBC
-      checkIbc = false;
-      break;
-    default:
-      THROW("Unknown IBC luma coverage type");
-    }
+    checkIbc = false;
   }
   // Add coding modes here
   // NOTE: Working back to front, as a stack, which is more efficient with the container
@@ -1240,7 +1228,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
   {
     for( int qp = maxQP; qp >= minQP; qp-- )
     {
-      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp, false } );
+      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp } );
     }
   }
 
@@ -1249,7 +1237,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
     // add split modes
     for( int qp = maxQP; qp >= minQP; qp-- )
     {
-      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_V, ETO_STANDARD, qp, false } );
+      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_V, ETO_STANDARD, qp } );
     }
   }
 
@@ -1258,7 +1246,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
     // add split modes
     for( int qp = maxQP; qp >= minQP; qp-- )
     {
-      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_H, ETO_STANDARD, qp, false } );
+      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_TT_H, ETO_STANDARD, qp } );
     }
   }
 
@@ -1270,7 +1258,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
     // add split modes
     for( int qp = maxQP; qp >= minQP; qp-- )
     {
-      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_V, ETO_STANDARD, qp, false } );
+      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_V, ETO_STANDARD, qp } );
     }
     m_ComprCUCtxList.back().set( DID_VERT_SPLIT, true );
   }
@@ -1284,7 +1272,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
     // add split modes
     for( int qp = maxQP; qp >= minQP; qp-- )
     {
-      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_H, ETO_STANDARD, qp, false } );
+      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_BT_H, ETO_STANDARD, qp } );
     }
     m_ComprCUCtxList.back().set( DID_HORZ_SPLIT, true );
   }
@@ -1297,7 +1285,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
   {
     for( int qp = maxQPq; qp >= minQPq; qp-- )
     {
-      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp, false } );
+      m_ComprCUCtxList.back().testModes.push_back( { ETM_SPLIT_QT, ETO_STANDARD, qp } );
     }
   }
 
@@ -1305,93 +1293,107 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
 
   xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, CU_DONT_SPLIT );
 
-  bool useLossless = false;
   int  lowestQP = minQP;
-  if( cs.pps->getTransquantBypassEnabledFlag() )
-  {
-    useLossless = true; // mark that the first iteration is to cost TQB mode.
-    minQP = minQP - 1;  // increase loop variable range by 1, to allow testing of TQB mode along with other QPs
-
-    if( m_pcEncCfg->getCUTransquantBypassFlagForceValue() )
-    {
-      maxQP = minQP;
-    }
-  }
 
   //////////////////////////////////////////////////////////////////////////
   // Add unit coding modes: Intra, InterME, InterMerge ...
+  bool tryIntraRdo = true;
+  bool tryInterRdo = true;
+  bool tryIBCRdo   = true;
+  if( partitioner.isConsIntra() )
+  {
+    tryInterRdo = false;
+  }
+  else if( partitioner.isConsInter() )
+  {
+    tryIntraRdo = tryIBCRdo = false;
+  }
+  checkIbc &= tryIBCRdo;
 
   for( int qpLoop = maxQP; qpLoop >= minQP; qpLoop-- )
   {
     const int  qp       = std::max( qpLoop, lowestQP );
-    const bool lossless = useLossless && qpLoop == minQP;
 #if REUSE_CU_RESULTS
     const bool isReusingCu = isValid( cs, partitioner, qp );
     cuECtx.set( IS_REUSING_CU, isReusingCu );
     if( isReusingCu )
     {
-      m_ComprCUCtxList.back().testModes.push_back( {ETM_RECO_CACHED, ETO_STANDARD, qp, lossless} );
+      m_ComprCUCtxList.back().testModes.push_back( {ETM_RECO_CACHED, ETO_STANDARD, qp} );
     }
 #endif
     // add intra modes
-    m_ComprCUCtxList.back().testModes.push_back( { ETM_IPCM,  ETO_STANDARD, qp, lossless } );
-    m_ComprCUCtxList.back().testModes.push_back( { ETM_INTRA, ETO_STANDARD, qp, lossless } );
+    if( tryIntraRdo )
+    {
+    if (cs.slice->getSPS()->getPLTMode() && ( cs.slice->isIRAP() || (cs.area.lwidth() == 4 && cs.area.lheight() == 4) ) && getPltEnc() )
+    {
+      m_ComprCUCtxList.back().testModes.push_back({ ETM_PALETTE, ETO_STANDARD, qp });
+    }
+    m_ComprCUCtxList.back().testModes.push_back( { ETM_INTRA, ETO_STANDARD, qp } );
+    if (cs.slice->getSPS()->getPLTMode() && !cs.slice->isIRAP() && !(cs.area.lwidth() == 4 && cs.area.lheight() == 4) && getPltEnc() )
+    {
+      m_ComprCUCtxList.back().testModes.push_back({ ETM_PALETTE,  ETO_STANDARD, qp });
+    }
+    }
     // add ibc mode to intra path
     if (cs.sps->getIBCFlag() && checkIbc)
     {
-      m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC,         ETO_STANDARD,  qp, lossless });
-      if (cs.chType == CHANNEL_TYPE_LUMA)
+      m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC,         ETO_STANDARD,  qp });
+      if (partitioner.chType == CHANNEL_TYPE_LUMA)
       {
-        m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC_MERGE,   ETO_STANDARD,  qp, lossless });
+        m_ComprCUCtxList.back().testModes.push_back({ ETM_IBC_MERGE,   ETO_STANDARD,  qp });
       }
     }
   }
 
   // add first pass modes
-  if( !m_slice->isIRAP() )
+  if ( !m_slice->isIRAP() && !( cs.area.lwidth() == 4 && cs.area.lheight() == 4 ) && tryInterRdo )
   {
     for( int qpLoop = maxQP; qpLoop >= minQP; qpLoop-- )
     {
       const int  qp       = std::max( qpLoop, lowestQP );
-      const bool lossless = useLossless && qpLoop == minQP;
+      if (m_pcEncCfg->getIMV())
+      {
+        m_ComprCUCtxList.back().testModes.push_back({ ETM_INTER_ME,  EncTestModeOpts( 4 << ETO_IMV_SHIFT ), qp });
+      }
       if( m_pcEncCfg->getIMV() || m_pcEncCfg->getUseAffineAmvr() )
       {
         int imv = m_pcEncCfg->getIMV4PelFast() ? 3 : 2;
-        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( imv << ETO_IMV_SHIFT ), qp, lossless } );
-        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( 1 << ETO_IMV_SHIFT ), qp, lossless } );
+        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( imv << ETO_IMV_SHIFT ), qp } );
+        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME, EncTestModeOpts( 1 << ETO_IMV_SHIFT ), qp } );
       }
       // add inter modes
       if( m_pcEncCfg->getUseEarlySkipDetection() )
       {
         if( cs.sps->getUseTriangle() && cs.slice->isInterB() )
         {
-          m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp, lossless } );
+          m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp } );
         }
-        m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP,  ETO_STANDARD, qp, lossless } );
+        m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP,  ETO_STANDARD, qp } );
         if ( cs.sps->getUseAffine() || cs.sps->getSBTMVPEnabledFlag() )
         {
-          m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE,    ETO_STANDARD, qp, lossless } );
+          m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE,    ETO_STANDARD, qp } );
         }
-        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME,    ETO_STANDARD, qp, lossless } );
+        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME,    ETO_STANDARD, qp } );
       }
       else
       {
-        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME,    ETO_STANDARD, qp, lossless } );
+        m_ComprCUCtxList.back().testModes.push_back( { ETM_INTER_ME,    ETO_STANDARD, qp } );
         if( cs.sps->getUseTriangle() && cs.slice->isInterB() )
         {
-          m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp, lossless } );
+          m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_TRIANGLE, ETO_STANDARD, qp } );
         }
-        m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP,  ETO_STANDARD, qp, lossless } );
+        m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP,  ETO_STANDARD, qp } );
         if ( cs.sps->getUseAffine() || cs.sps->getSBTMVPEnabledFlag() )
         {
-          m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE,    ETO_STANDARD, qp, lossless } );
+          m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE,    ETO_STANDARD, qp } );
         }
       }
       if (m_pcEncCfg->getUseHashME())
       {
-        if ((cs.area.lwidth() == cs.area.lheight() && cs.area.lwidth() <= 64 && cs.area.lwidth() >= 4) || (cs.area.lwidth() == 4 && cs.area.lheight() == 8) || (cs.area.lwidth() == 8 && cs.area.lheight() == 4))
+        int minSize = min(cs.area.lwidth(), cs.area.lheight());
+        if (minSize < 128 && minSize >= 4)
         {
-          m_ComprCUCtxList.back().testModes.push_back({ ETM_HASH_INTER, ETO_STANDARD, qp, lossless });
+          m_ComprCUCtxList.back().testModes.push_back({ ETM_HASH_INTER, ETO_STANDARD, qp });
         }
       }
     }
@@ -1417,7 +1419,7 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
   ComprCUCtx& cuECtx = m_ComprCUCtxList.back();
 
   // Fast checks, partitioning depended
-  if (cuECtx.isHashPerfectMatch && encTestmode.type != ETM_MERGE_SKIP && encTestmode.type != ETM_AFFINE && encTestmode.type != ETM_MERGE_TRIANGLE)
+  if (cuECtx.isHashPerfectMatch && encTestmode.type != ETM_MERGE_SKIP && encTestmode.type != ETM_INTER_ME && encTestmode.type != ETM_AFFINE && encTestmode.type != ETM_MERGE_TRIANGLE)
   {
     return false;
   }
@@ -1459,9 +1461,6 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
   const SPS&             sps         = *slice.getSPS();
   const uint32_t             numComp     = getNumberValidComponents( slice.getSPS()->getChromaFormatIdc() );
   const uint32_t             width       = partitioner.currArea().lumaSize().width;
-#if FIX_PCM
-  const uint32_t             height       = partitioner.currArea().lumaSize().height;
-#endif
   const CodingStructure *bestCS      = cuECtx.bestCS;
   const CodingUnit      *bestCU      = cuECtx.bestCU;
   const EncTestMode      bestMode    = bestCS ? getCSEncMode( *bestCS ) : EncTestMode();
@@ -1513,6 +1512,14 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
     // INTRA MODES
     if (cs.sps->getIBCFlag() && !cuECtx.bestTU)
       return true;
+    if( partitioner.isConsIntra() && !cuECtx.bestTU )
+    {
+      return true;
+    }
+    if ( partitioner.currArea().lumaSize().width == 4 && partitioner.currArea().lumaSize().height == 4 && !slice.isIntra() && !cuECtx.bestTU )
+    {
+      return true;
+    }
     if( !( slice.isIRAP() || bestMode.type == ETM_INTRA || !cuECtx.bestTU ||
       ((!m_pcEncCfg->getDisableIntraPUsInInterSlices()) && (!relatedCU.isInter || !relatedCU.isIBC) && (
                                          ( cuECtx.bestTU->cbf[0] != 0 ) ||
@@ -1533,46 +1540,57 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
     if( lastTestMode().type != ETM_INTRA && cuECtx.bestCS && cuECtx.bestCU && interHadActive( cuECtx ) )
     {
       // Get SATD threshold from best Inter-CU
-      if( !cs.slice->isIRAP() && m_pcEncCfg->getUsePbIntraFast() )
+      if (!cs.slice->isIRAP() && m_pcEncCfg->getUsePbIntraFast() && !cs.slice->getDisableSATDForRD())
       {
         CodingUnit* bestCU = cuECtx.bestCU;
         if (bestCU && !CU::isIntra(*bestCU))
         {
           DistParam distParam;
-          const bool useHad = !bestCU->transQuantBypass;
+          const bool useHad = true;
           m_pcRdCost->setDistParam( distParam, cs.getOrgBuf( COMPONENT_Y ), cuECtx.bestCS->getPredBuf( COMPONENT_Y ), cs.sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, useHad );
           cuECtx.interHad = distParam.distFunc( distParam );
         }
       }
     }
-
+    if (bestMode.type == ETM_PALETTE && !slice.isIRAP() && !( partitioner.currArea().lumaSize().width == 4 && partitioner.currArea().lumaSize().height == 4) ) // inter slice
+    {
+      return false;
+    }
+    if ( m_pcEncCfg->getUseFastISP() && relatedCU.relatedCuIsValid )
+    {
+      cuECtx.ispPredModeVal     = relatedCU.ispPredModeVal;
+      cuECtx.bestDCT2NonISPCost = relatedCU.bestDCT2NonISPCost;
+      cuECtx.relatedCuIsValid   = relatedCU.relatedCuIsValid;
+      cuECtx.bestNonDCT2Cost    = relatedCU.bestNonDCT2Cost;
+      cuECtx.bestISPIntraMode   = relatedCU.bestISPIntraMode;
+    }
     return true;
   }
-  else if( encTestmode.type == ETM_IPCM )
+  else if (encTestmode.type == ETM_PALETTE)
   {
-    if( getFastDeltaQp() )
+    if (partitioner.currArea().lumaSize().width > 64 || partitioner.currArea().lumaSize().height > 64)
     {
-      const SPS &sps = *cs.sps;
-      const uint32_t fastDeltaQPCuMaxPCMSize = Clip3( ( uint32_t ) 1 << sps.getPCMLog2MinSize(), ( uint32_t ) 1 << sps.getPCMLog2MaxSize(), 32u );
-
-      if( cs.area.lumaSize().width > fastDeltaQPCuMaxPCMSize )
+      return false;
+    }
+    const Area curr_cu = CS::getArea(cs, cs.area, partitioner.chType).blocks[getFirstComponentOfChannel(partitioner.chType)];
+    try
+    {
+      double stored_cost = slice.m_mapPltCost.at(curr_cu.pos()).at(curr_cu.size());
+      if (bestMode.type != ETM_INVALID && stored_cost > cuECtx.bestCS->cost)
       {
-        return false;   // only check necessary PCM in fast deltaqp mode
+        return false;
       }
     }
-
-    // PCM MODES
-#if FIX_PCM
-    return sps.getPCMEnabledFlag() && width <= ( 1 << sps.getPCMLog2MaxSize() ) && width >= ( 1 << sps.getPCMLog2MinSize() )
-            && height <= ( 1 << sps.getPCMLog2MaxSize() ) && height >= ( 1 << sps.getPCMLog2MinSize() );
-#else
-    return sps.getPCMEnabledFlag() && width <= ( 1 << sps.getPCMLog2MaxSize() ) && width >= ( 1 << sps.getPCMLog2MinSize() );
-#endif
+    catch (const std::out_of_range &)
+    {
+      // do nothing if no stored cost value was found.
+    }
+    return true;
   }
   else if (encTestmode.type == ETM_IBC || encTestmode.type == ETM_IBC_MERGE)
   {
     // IBC MODES
-    return sps.getIBCFlag() && width <= IBC_MAX_CAND_SIZE && partitioner.currArea().lumaSize().height <= IBC_MAX_CAND_SIZE;
+    return sps.getIBCFlag() && (partitioner.currArea().lumaSize().width < 128 && partitioner.currArea().lumaSize().height < 128);
   }
   else if( isModeInter( encTestmode ) )
   {
@@ -1817,7 +1835,15 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
   else
   {
     CHECK( encTestmode.type != ETM_POST_DONT_SPLIT, "Unknown mode" );
-
+    if ((cuECtx.get<double>(BEST_NO_IMV_COST) == (MAX_DOUBLE * .5) || cuECtx.get<bool>(IS_REUSING_CU)) && !slice.isIntra())
+    {
+      unsigned idx1, idx2, idx3, idx4;
+      getAreaIdx(partitioner.currArea().Y(), *slice.getPPS()->pcv, idx1, idx2, idx3, idx4);
+      if (g_isReusedUniMVsFilled[idx1][idx2][idx3][idx4])
+      {
+        m_pcInterSearch->insertUniMvCands(partitioner.currArea().Y(), g_reusedUniMVs[idx1][idx2][idx3][idx4]);
+      }
+    }
     if( !bestCS || ( bestCS && isModeSplit( bestMode ) ) )
     {
       return false;
@@ -1828,30 +1854,104 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
       setFromCs( *bestCS, partitioner );
 
 #endif
+      if( partitioner.modeType == MODE_TYPE_INTRA && partitioner.chType == CHANNEL_TYPE_LUMA )
+      {
+        return false; //not set best coding mode for intra coding pass
+      }
       // assume the non-split modes are done and set the marks for the best found mode
       if( bestCS && bestCU )
       {
         if( CU::isInter( *bestCU ) )
         {
           relatedCU.isInter   = true;
-#if HM_CODED_CU_INFO
           relatedCU.isSkip   |= bestCU->skip;
           relatedCU.isMMVDSkip |= bestCU->mmvdSkip;
-#else
-          relatedCU.isSkip    = bestCU->skip;
-#endif
-          relatedCU.GBiIdx    = bestCU->GBiIdx;
+          relatedCU.BcwIdx    = bestCU->BcwIdx;
+          if (bestCU->slice->getSPS()->getUseColorTrans())
+          {
+            if (m_pcEncCfg->getRGBFormatFlag())
+            {
+              if (bestCU->colorTransform && bestCU->rootCbf)
+              {
+                relatedCU.selectColorSpaceOption = 1;
+              }
+              else
+              {
+                relatedCU.selectColorSpaceOption = 2;
+              }
+            }
+            else
+            {
+              if (!bestCU->colorTransform || !bestCU->rootCbf)
+              {
+                relatedCU.selectColorSpaceOption = 1;
+              }
+              else
+              {
+                relatedCU.selectColorSpaceOption = 2;
+              }
+            }
+          }
         }
         else if (CU::isIBC(*bestCU))
         {
           relatedCU.isIBC = true;
-#if HM_CODED_CU_INFO
           relatedCU.isSkip |= bestCU->skip;
-#endif
+          if (bestCU->slice->getSPS()->getUseColorTrans())
+          {
+            if (m_pcEncCfg->getRGBFormatFlag())
+            {
+              if (bestCU->colorTransform && bestCU->rootCbf)
+              {
+                relatedCU.selectColorSpaceOption = 1;
+              }
+              else
+              {
+                relatedCU.selectColorSpaceOption = 2;
+              }
+            }
+            else
+            {
+              if (!bestCU->colorTransform || !bestCU->rootCbf)
+              {
+                relatedCU.selectColorSpaceOption = 1;
+              }
+              else
+              {
+                relatedCU.selectColorSpaceOption = 2;
+              }
+            }
+          }
         }
         else if( CU::isIntra( *bestCU ) )
         {
           relatedCU.isIntra   = true;
+          if ( m_pcEncCfg->getUseFastISP() && cuECtx.ispWasTested && ( !relatedCU.relatedCuIsValid || bestCS->cost < relatedCU.bestCost ) )
+          {
+            // Compact data
+            int bit0 = true;
+            int bit1 = cuECtx.ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0;
+            int bit2 = cuECtx.ispMode == VER_INTRA_SUBPARTITIONS;
+            int bit3 = cuECtx.ispLfnstIdx > 0;
+            int bit4 = cuECtx.ispLfnstIdx == 2;
+            int bit5 = cuECtx.mipFlag;
+            int bit6 = cuECtx.bestCostIsp < cuECtx.bestNonDCT2Cost * 0.95;
+            int val =
+              (bit0) |
+              (bit1 << 1) |
+              (bit2 << 2) |
+              (bit3 << 3) |
+              (bit4 << 4) |
+              (bit5 << 5) |
+              (bit6 << 6) |
+              ( cuECtx.bestPredModeDCT2 << 9 );
+            relatedCU.ispPredModeVal     = val;
+            relatedCU.bestDCT2NonISPCost = cuECtx.bestDCT2NonISPCost;
+            relatedCU.bestCost           = bestCS->cost;
+            relatedCU.bestNonDCT2Cost    = cuECtx.bestNonDCT2Cost;
+            relatedCU.bestISPIntraMode   = cuECtx.bestISPIntraMode;
+            relatedCU.relatedCuIsValid   = true;
+          }
         }
 #if ENABLE_SPLIT_PARALLELISM
 #if REUSE_CU_RESULTS
@@ -1867,6 +1967,25 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
   }
 }
 
+bool EncModeCtrlMTnoRQT::checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner )
+{
+  xExtractFeatures( encTestmode, *tempCS );
+
+  ComprCUCtx& cuECtx  = m_ComprCUCtxList.back();
+  bool skipOtherLfnst = false;
+
+  if( encTestmode.type == ETM_INTRA )
+  {
+    if( !cuECtx.bestCS || ( tempCS->cost >= cuECtx.bestCS->cost && cuECtx.bestCS->cus.size() == 1 && CU::isIntra( *cuECtx.bestCS->cus[ 0 ] ) )
+      || ( tempCS->cost <  cuECtx.bestCS->cost && CU::isIntra( *tempCS->cus[ 0 ] ) ) )
+    {
+      skipOtherLfnst = !tempCS->cus[ 0 ]->rootCbf;
+    }
+  }
+
+  return skipOtherLfnst;
+}
+
 bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner )
 {
   xExtractFeatures( encTestmode, *tempCS );
@@ -1890,6 +2009,19 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt
   {
     cuECtx.set( BEST_TRIV_SPLIT_COST, tempCS->cost );
   }
+  else if( encTestmode.type == ETM_INTRA )
+  {
+    const CodingUnit cu = *tempCS->getCU( partitioner.chType );
+
+    if( !cu.mtsFlag )
+    {
+      cuECtx.bestMtsSize2Nx2N1stPass   = tempCS->cost;
+    }
+    if( !cu.ispMode )
+    {
+      cuECtx.bestCostMtsFirstPassNoIsp = tempCS->cost;
+    }
+  }
 
   if( m_pcEncCfg->getIMV4PelFast() && m_pcEncCfg->getIMV() && encTestmode.type == ETM_INTER_ME )
   {
@@ -2027,7 +2159,7 @@ bool EncModeCtrlMTnoRQT::isParallelSplit( const CodingStructure &cs, Partitioner
   const int parlAt  = m_pcEncCfg->getNumSplitThreads() <= 3 ? 1024 : 256;
   if(  cs.slice->isIntra() && numJobs > 2 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true;
   if( !cs.slice->isIntra() && numJobs > 1 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true;
-  return false; 
+  return false;
 }
 
 bool EncModeCtrlMTnoRQT::parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const
diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h
index f8ae006d18aa7f44f82aa8df76e972259d5a5f44..3ab1b298bafc255d2754a5eb69f176fec5b7918a 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.h
+++ b/source/Lib/EncoderLib/EncModeCtrl.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,7 @@
 
 #include "CommonLib/CommonDef.h"
 #include "CommonLib/CodingStructure.h"
+#include "InterSearch.h"
 
 #include <typeinfo>
 #include <vector>
@@ -60,7 +61,7 @@ enum EncTestModeType
   ETM_AFFINE,
   ETM_MERGE_TRIANGLE,
   ETM_INTRA,
-  ETM_IPCM,
+  ETM_PALETTE,
   ETM_SPLIT_QT,
   ETM_SPLIT_BT_H,
   ETM_SPLIT_BT_V,
@@ -97,18 +98,18 @@ static void getAreaIdx(const Area& area, const PreCalcValues &pcv, unsigned &idx
 struct EncTestMode
 {
   EncTestMode()
-    : type( ETM_INVALID ), opts( ETO_INVALID  ), qp( -1  ), lossless( false ) {}
+    : type( ETM_INVALID ), opts( ETO_INVALID  ), qp( -1  ) {}
   EncTestMode( EncTestModeType _type )
-    : type( _type       ), opts( ETO_STANDARD ), qp( -1  ), lossless( false ) {}
-  EncTestMode( EncTestModeType _type, int _qp, bool _lossless )
-    : type( _type       ), opts( ETO_STANDARD ), qp( _qp ), lossless( _lossless ) {}
-  EncTestMode( EncTestModeType _type, EncTestModeOpts _opts, int _qp, bool _lossless )
-    : type( _type       ), opts( _opts        ), qp( _qp ), lossless( _lossless ) {}
+    : type( _type       ), opts( ETO_STANDARD ), qp( -1  ) {}
+  EncTestMode( EncTestModeType _type, int _qp )
+    : type( _type       ), opts( ETO_STANDARD ), qp( _qp ) {}
+  EncTestMode( EncTestModeType _type, EncTestModeOpts _opts, int _qp )
+    : type( _type       ), opts( _opts        ), qp( _qp ) {}
 
   EncTestModeType type;
   EncTestModeOpts opts;
   int             qp;
-  bool            lossless;
+  double          maxCostAllowed;
 };
 
 
@@ -188,12 +189,35 @@ struct ComprCUCtx
     , extraFeatures (            )
     , extraFeaturesd(            )
     , bestInterCost ( MAX_DOUBLE )
+    , bestMtsSize2Nx2N1stPass
+                    ( MAX_DOUBLE )
+    , skipSecondMTSPass
+                    ( false )
     , interHad      (std::numeric_limits<Distortion>::max())
 #if ENABLE_SPLIT_PARALLELISM
     , isLevelSplitParallel
                     ( false )
 #endif
     , bestCostWithoutSplitFlags( MAX_DOUBLE )
+    , bestCostMtsFirstPassNoIsp( MAX_DOUBLE )
+    , bestCostIsp   ( MAX_DOUBLE )
+    , ispWasTested  ( false )
+    , bestPredModeDCT2
+                    ( UINT8_MAX )
+    , relatedCuIsValid
+                    ( false )
+    , ispPredModeVal( 0 )
+    , bestDCT2NonISPCost
+                    ( MAX_DOUBLE )
+    , bestNonDCT2Cost
+                    ( MAX_DOUBLE )
+    , bestISPIntraMode
+                    ( UINT8_MAX )
+    , mipFlag       ( false )
+    , ispMode       ( NOT_INTRA_SUBPARTITIONS )
+    , ispLfnstIdx   ( 0 )
+    , stopNonDCT2Transforms
+                    ( false )
   {
     getAreaIdx( cs.area.Y(), *cs.pcv, cuX, cuY, cuW, cuH );
     partIdx = ( ( cuX << 8 ) | cuY );
@@ -218,11 +242,26 @@ struct ComprCUCtx
   static_vector<int64_t,  30>         extraFeatures;
   static_vector<double, 30>         extraFeaturesd;
   double                            bestInterCost;
+  double                            bestMtsSize2Nx2N1stPass;
+  bool                              skipSecondMTSPass;
   Distortion                        interHad;
 #if ENABLE_SPLIT_PARALLELISM
   bool                              isLevelSplitParallel;
 #endif
   double                            bestCostWithoutSplitFlags;
+  double                            bestCostMtsFirstPassNoIsp;
+  double                            bestCostIsp;
+  bool                              ispWasTested;
+  uint16_t                          bestPredModeDCT2;
+  bool                              relatedCuIsValid;
+  uint16_t                          ispPredModeVal;
+  double                            bestDCT2NonISPCost;
+  double                            bestNonDCT2Cost;
+  uint8_t                           bestISPIntraMode;
+  bool                              mipFlag;
+  uint8_t                           ispMode;
+  uint8_t                           ispLfnstIdx;
+  bool                              stopNonDCT2Transforms;
 
   template<typename T> T    get( int ft )       const { return typeid(T) == typeid(double) ? (T&)extraFeaturesd[ft] : T(extraFeatures[ft]); }
   template<typename T> void set( int ft, T val )      { extraFeatures [ft] = int64_t( val ); }
@@ -250,6 +289,9 @@ protected:
 #if ENABLE_SPLIT_PARALLELISM
   int                   m_runNextInParallel;
 #endif
+  InterSearch*          m_pcInterSearch;
+
+  bool                  m_doPlt;
 
 public:
 
@@ -268,6 +310,7 @@ protected:
 public:
 
   virtual bool useModeResult        ( const EncTestMode& encTestmode, CodingStructure*& tempCS,  Partitioner& partitioner ) = 0;
+  virtual bool checkSkipOtherLfnst  ( const EncTestMode& encTestmode, CodingStructure*& tempCS,  Partitioner& partitioner ) = 0;
 #if ENABLE_SPLIT_PARALLELISM
   virtual void copyState            ( const EncModeCtrl& other, const UnitArea& area );
   virtual int  getNumParallelJobs   ( const CodingStructure &cs, Partitioner& partitioner )                                 const { return 1;     }
@@ -299,8 +342,37 @@ public:
   double getBestInterCost             ()                  const { return m_ComprCUCtxList.back().bestInterCost;           }
   Distortion getInterHad              ()                  const { return m_ComprCUCtxList.back().interHad;                }
   void enforceInterHad                ( Distortion had )        {        m_ComprCUCtxList.back().interHad = had;          }
+  double getMtsSize2Nx2NFirstPassCost ()                  const { return m_ComprCUCtxList.back().bestMtsSize2Nx2N1stPass; }
+  bool   getSkipSecondMTSPass         ()                  const { return m_ComprCUCtxList.back().skipSecondMTSPass;       }
+  void   setSkipSecondMTSPass         ( bool b )                { m_ComprCUCtxList.back().skipSecondMTSPass = b;          }
   double getBestCostWithoutSplitFlags ()                  const { return m_ComprCUCtxList.back().bestCostWithoutSplitFlags;         }
   void   setBestCostWithoutSplitFlags ( double cost )           { m_ComprCUCtxList.back().bestCostWithoutSplitFlags = cost;         }
+  double getMtsFirstPassNoIspCost     ()                  const { return m_ComprCUCtxList.back().bestCostMtsFirstPassNoIsp;         }
+  void   setMtsFirstPassNoIspCost     ( double cost )           { m_ComprCUCtxList.back().bestCostMtsFirstPassNoIsp = cost;         }
+  double getIspCost                   ()                  const { return m_ComprCUCtxList.back().bestCostIsp; }
+  void   setIspCost                   ( double val )            { m_ComprCUCtxList.back().bestCostIsp = val; }
+  bool   getISPWasTested              ()                  const { return m_ComprCUCtxList.back().ispWasTested; }
+  void   setISPWasTested              ( bool val )              { m_ComprCUCtxList.back().ispWasTested = val; }
+  void   setBestPredModeDCT2          ( uint16_t val )          { m_ComprCUCtxList.back().bestPredModeDCT2 = val; }
+  uint16_t getBestPredModeDCT2        ()                  const { return m_ComprCUCtxList.back().bestPredModeDCT2; }
+  bool   getRelatedCuIsValid          ()                  const { return m_ComprCUCtxList.back().relatedCuIsValid; }
+  void   setRelatedCuIsValid          ( bool val )              { m_ComprCUCtxList.back().relatedCuIsValid = val; }
+  uint16_t getIspPredModeValRelCU     ()                  const { return m_ComprCUCtxList.back().ispPredModeVal; }
+  void   setIspPredModeValRelCU       ( uint16_t val )          { m_ComprCUCtxList.back().ispPredModeVal = val; }
+  double getBestDCT2NonISPCostRelCU   ()                  const { return m_ComprCUCtxList.back().bestDCT2NonISPCost; }
+  void   setBestDCT2NonISPCostRelCU   ( double val )            { m_ComprCUCtxList.back().bestDCT2NonISPCost = val; }
+  double getBestNonDCT2Cost           ()                  const { return m_ComprCUCtxList.back().bestNonDCT2Cost; }
+  void   setBestNonDCT2Cost           ( double val )            { m_ComprCUCtxList.back().bestNonDCT2Cost = val; }
+  uint8_t getBestISPIntraModeRelCU    ()                  const { return m_ComprCUCtxList.back().bestISPIntraMode; }
+  void   setBestISPIntraModeRelCU     ( uint8_t val )           { m_ComprCUCtxList.back().bestISPIntraMode = val; }
+  void   setMIPFlagISPPass            ( bool val )              { m_ComprCUCtxList.back().mipFlag = val; }
+  void   setISPMode                   ( uint8_t val )           { m_ComprCUCtxList.back().ispMode = val; }
+  void   setISPLfnstIdx               ( uint8_t val )           { m_ComprCUCtxList.back().ispLfnstIdx = val; }
+  bool   getStopNonDCT2Transforms     ()                  const { return m_ComprCUCtxList.back().stopNonDCT2Transforms; }
+  void   setStopNonDCT2Transforms     ( bool val )              { m_ComprCUCtxList.back().stopNonDCT2Transforms = val; }
+  void setInterSearch                 (InterSearch* pcInterSearch)   { m_pcInterSearch = pcInterSearch; }
+  void   setPltEnc                    ( bool b )                { m_doPlt = b; }
+  bool   getPltEnc()                                      const { return m_doPlt; }
 
 protected:
   void xExtractFeatures ( const EncTestMode encTestmode, CodingStructure& cs );
@@ -359,7 +431,14 @@ struct CodedCUInfo
   bool validMv[NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS];
   Mv   saveMv [NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS];
 
-  uint8_t GBiIdx;
+  uint8_t BcwIdx;
+  char    selectColorSpaceOption;  // 0 - test both two color spaces; 1 - only test the first color spaces; 2 - only test the second color spaces
+  uint16_t ispPredModeVal;
+  double   bestDCT2NonISPCost;
+  double   bestCost;
+  double   bestNonDCT2Cost;
+  bool     relatedCuIsValid;
+  uint8_t  bestISPIntraMode;
 
 #if ENABLE_SPLIT_PARALLELISM
 
@@ -409,8 +488,10 @@ public:
   void setMv  ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, const Mv& rMv );
 
   bool  getInter( const UnitArea& area );
-  void  setGbiIdx( const UnitArea& area, uint8_t gBiIdx );
-  uint8_t getGbiIdx( const UnitArea& area );
+  void  setBcwIdx( const UnitArea& area, uint8_t gBiIdx );
+  uint8_t getBcwIdx( const UnitArea& area );
+
+  char  getSelectColorSpaceOption(const UnitArea& area);
 };
 
 #if REUSE_CU_RESULTS
@@ -442,6 +523,7 @@ private:
   BestEncodingInfo ***m_bestEncInfo[MAX_CU_SIZE >> MIN_CU_LOG2][MAX_CU_SIZE >> MIN_CU_LOG2];
   TCoeff             *m_pCoeff;
   Pel                *m_pPcmBuf;
+  bool               *m_runType;
   CodingStructure     m_dummyCS;
   XUCache             m_dummyCache;
 #if ENABLE_SPLIT_PARALLELISM
@@ -527,6 +609,7 @@ public:
   virtual bool isParallelSplit    ( const CodingStructure &cs, Partitioner& partitioner ) const;
   virtual bool parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const;
 #endif
+  virtual bool checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner );
 };
 
 
diff --git a/source/Lib/EncoderLib/EncReshape.cpp b/source/Lib/EncoderLib/EncReshape.cpp
index b09eb70d244916af5572a5b95c31fb7b701393bb..27d5ae535f845188a40c21b020262bf7cf6d89ea 100644
--- a/source/Lib/EncoderLib/EncReshape.cpp
+++ b/source/Lib/EncoderLib/EncReshape.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -78,6 +78,12 @@ void  EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui
     m_binImportance.resize(PIC_ANALYZE_CW_BINS);
   if (m_reshapePivot.empty())
     m_reshapePivot.resize(PIC_CODE_CW_BINS + 1, 0);
+  if (m_inputPivot.empty())
+    m_inputPivot.resize(PIC_CODE_CW_BINS + 1, 0);
+  if (m_fwdScaleCoef.empty())
+    m_fwdScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC);
+  if (m_invScaleCoef.empty())
+    m_invScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC);
   if (m_chromaAdjHelpLUT.empty())
     m_chromaAdjHelpLUT.resize(PIC_CODE_CW_BINS, 1<<CSCALE_FP_PREC);
 
@@ -87,6 +93,7 @@ void  EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui
   m_sliceReshapeInfo.reshaperModelMinBinIdx = 0;
   m_sliceReshapeInfo.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1;
   memset(m_sliceReshapeInfo.reshaperModelBinCWDelta, 0, (PIC_CODE_CW_BINS) * sizeof(int));
+  m_sliceReshapeInfo.chrResScalingOffset = 0;
 
   m_picWidth = picWidth;
   m_picHeight = picHeight;
@@ -95,6 +102,9 @@ void  EncReshape::createEnc(int picWidth, int picHeight, uint32_t maxCUWidth, ui
   m_widthInCtus = (m_picWidth + m_maxCUWidth - 1) / m_maxCUWidth;
   m_heightInCtus = (m_picHeight + m_maxCUHeight - 1) / m_maxCUHeight;
   m_numCtuInFrame = m_widthInCtus * m_heightInCtus;
+  m_binNum = PIC_CODE_CW_BINS;
+  initSeqStats(m_srcSeqStats);
+  initSeqStats(m_rspSeqStats);
 }
 
 void  EncReshape::destroy()
@@ -111,18 +121,9 @@ void EncReshape::preAnalyzerHDR(Picture *pcPic, const SliceType sliceType, const
   if (m_lumaBD >= 10)
   {
     m_sliceReshapeInfo.sliceReshaperEnableFlag = true;
-    if (reshapeCW.rspIntraPeriod == 1)
-    {
-      if (pcPic->getPOC() == 0)          { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true;  }
-      else                               { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; }
-    }
-    else
-    {
       if (sliceType == I_SLICE )                                              { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true;  }
       else                                                                    { m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false; }
-    }
-    if (sliceType == I_SLICE  && isDualT)                                     { m_sliceReshapeInfo.enableChromaAdj = 0;                   }
-    else                                                                      { m_sliceReshapeInfo.enableChromaAdj = 1;                   }
+    { m_sliceReshapeInfo.enableChromaAdj = 1;                   }
   }
   else
   {
@@ -137,343 +138,414 @@ void EncReshape::preAnalyzerHDR(Picture *pcPic, const SliceType sliceType, const
 \param   sliceType describe the slice type
 \param   reshapeCW describe some input info
 */
-void EncReshape::preAnalyzerSDR(Picture *pcPic, const SliceType sliceType, const ReshapeCW& reshapeCW, bool isDualT)
+void EncReshape::initSeqStats(SeqInfo &stats)
 {
-  m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true;
-  m_sliceReshapeInfo.sliceReshaperEnableFlag = true;
-
-  int modIP = pcPic->getPOC() - pcPic->getPOC() / reshapeCW.rspFpsToIp * reshapeCW.rspFpsToIp;
-  if (sliceType == I_SLICE || (reshapeCW.rspIntraPeriod == -1 && modIP == 0))
+  for (int i = 0; i < m_binNum; i++)
   {
-    if (m_sliceReshapeInfo.sliceReshaperModelPresentFlag == true)
+    stats.binVar[i] = 0.0;
+    stats.binHist[i] = 0.0;
+    stats.normVar[i] = 0.0;
+  }
+  stats.nonZeroCnt = 0;
+  stats.weightVar = 0.0;
+  stats.weightNorm = 0.0;
+  stats.minBinVar = 0.0;
+  stats.maxBinVar = 0.0;
+  stats.meanBinVar = 0.0;
+  stats.ratioStdU = 0.0;
+  stats.ratioStdV = 0.0;
+}
+void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
+{
+  PelBuf picY = pcPic->getOrigBuf(COMPONENT_Y);
+  const int width = picY.width;
+  const int height = picY.height;
+  const int stride = picY.stride;
+  uint32_t winLens = (m_binNum == PIC_CODE_CW_BINS) ? (std::min(height, width) / 240) : 2;
+  winLens = winLens > 0 ? winLens : 1;
+
+  int64_t tempSq = 0;
+  int64_t topSum = 0, topSumSq = 0;
+  int64_t leftSum = 0, leftSumSq = 0;
+  int64_t *leftColSum = new int64_t[width];
+  int64_t *leftColSumSq = new int64_t[width];
+  int64_t *topRowSum = new int64_t[height];
+  int64_t *topRowSumSq = new int64_t[height];
+  int64_t *topColSum = new int64_t[width];
+  int64_t *topColSumSq = new int64_t[width];
+  uint32_t *binCnt = new uint32_t[m_binNum];
+  memset(leftColSum, 0, width * sizeof(int64_t));
+  memset(leftColSumSq, 0, width * sizeof(int64_t));
+  memset(topRowSum, 0, height * sizeof(int64_t));
+  memset(topRowSumSq, 0, height * sizeof(int64_t));
+  memset(topColSum, 0, width * sizeof(int64_t));
+  memset(topColSumSq, 0, width * sizeof(int64_t));
+  memset(binCnt, 0, m_binNum * sizeof(uint32_t));
+
+  initSeqStats(stats);
+  for (uint32_t y = 0; y < height; y++)
+  {
+    for (uint32_t x = 0; x < width; x++)
     {
-      int stdMin = 16 <<(m_lumaBD-8);
-      int stdMax = 235 << (m_lumaBD - 8);
-      int  binLen = m_reshapeLUTSize / PIC_ANALYZE_CW_BINS;
-
-      m_reshapeCW = reshapeCW;
-      m_initCWAnalyze = binLen;
-
-      for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++)
+      const Pel pxlY = picY.buf[x];
+      int64_t sum = 0, sumSq = 0;
+      uint32_t numPixInPart = 0;
+      uint32_t y1 = std::max((int)(y - winLens), 0);
+      uint32_t y2 = std::min((int)(y + winLens), (height - 1));
+      uint32_t x1 = std::max((int)(x - winLens), 0);
+      uint32_t x2 = std::min((int)(x + winLens), (width - 1));
+      uint32_t bx = 0, by = 0;
+      const Pel *pWinY = &picY.buf[0];
+      numPixInPart = (x2 - x1 + 1) * (y2 - y1 + 1);
+
+      if (x == 0 && y == 0)
       {
-        m_binImportance[b] = 0;
-        m_binCW[b] = binLen;
+        for (by = y1; by <= y2; by++)
+        {
+          for (bx = x1; bx <= x2; bx++)
+          {
+            tempSq = pWinY[bx] * pWinY[bx];
+            leftSum += pWinY[bx];
+            leftSumSq += tempSq;
+            leftColSum[bx] += pWinY[bx];
+            leftColSumSq[bx] += tempSq;
+            topColSum[bx] += pWinY[bx];
+            topColSumSq[bx] += tempSq;
+            topRowSum[by] += pWinY[bx];
+            topRowSumSq[by] += tempSq;
+          }
+          pWinY += stride;
+        }
+        topSum = leftSum;
+        topSumSq = leftSumSq;
+        sum = leftSum;
+        sumSq = leftSumSq;
       }
-
-      int startBinIdx = stdMin / binLen;
-      int endBinIdx = stdMax / binLen;
-      m_sliceReshapeInfo.reshaperModelMinBinIdx = startBinIdx;
-      m_sliceReshapeInfo.reshaperModelMaxBinIdx = endBinIdx;
-
-      PelBuf picY = pcPic->getOrigBuf(COMPONENT_Y);
-      const int width = picY.width;
-      const int height = picY.height;
-      const int stride = picY.stride;
-
-      double blockBinVarSum[PIC_ANALYZE_CW_BINS] = { 0.0 };
-      uint32_t   bockBinCnt[PIC_ANALYZE_CW_BINS] = { 0 };
-
-      const int PIC_ANALYZE_WIN_SIZE = 5;
-      const uint32_t winSize = PIC_ANALYZE_WIN_SIZE;
-      const uint32_t winLens = (winSize - 1) >> 1;
-
-      int64_t tempSq = 0;
-      int64_t leftSum = 0, leftSumSq = 0;
-      int64_t *leftColSum = new int64_t[width];
-      int64_t *leftColSumSq = new int64_t[width];
-      memset(leftColSum, 0, width * sizeof(int64_t));
-      memset(leftColSumSq, 0, width * sizeof(int64_t));
-      int64_t topSum = 0, topSumSq = 0;
-      int64_t *topRowSum = new int64_t[height];
-      int64_t *topRowSumSq = new int64_t[height];
-      memset(topRowSum, 0, height * sizeof(int64_t));
-      memset(topRowSumSq, 0, height * sizeof(int64_t));
-      int64_t *topColSum = new int64_t[width];
-      int64_t *topColSumSq = new int64_t[width];
-      memset(topColSum, 0, width * sizeof(int64_t));
-      memset(topColSumSq, 0, width * sizeof(int64_t));
-
-      for (uint32_t y = 0; y < height; y++)
+      else if (x == 0 && y > 0)
       {
-        for (uint32_t x = 0; x < width; x++)
+        if (y < height - winLens)
         {
-          const Pel pxlY = picY.buf[x];
-          int64_t sum = 0;
-          int64_t sumSq = 0;
-          uint32_t numPixInPart = 0;
-
-          uint32_t y1 = std::max((int)(y - winLens), 0);
-          uint32_t y2 = std::min((int)(y + winLens), (height - 1));
-          uint32_t x1 = std::max((int)(x - winLens), 0);
-          uint32_t x2 = std::min((int)(x + winLens), (width - 1));
-
-
-          uint32_t bx = 0, by = 0;
-          const Pel *pWinY = &picY.buf[0];
-          numPixInPart = (x2 - x1 + 1) * (y2 - y1 + 1);
-
-          if (x == 0 && y == 0)           // for the 1st Pixel, calc all points
+          pWinY += winLens*stride;
+          topRowSum[y + winLens] = 0;
+          topRowSumSq[y + winLens] = 0;
+          for (bx = x1; bx <= x2; bx++)
           {
-            for (by = y1; by <= y2; by++)
-            {
-              for (bx = x1; bx <= x2; bx++)
-              {
-                tempSq = pWinY[bx] * pWinY[bx];
-                leftSum += pWinY[bx];
-                leftSumSq += tempSq;
-                leftColSum[bx] += pWinY[bx];
-                leftColSumSq[bx] += tempSq;
-                topColSum[bx] += pWinY[bx];
-                topColSumSq[bx] += tempSq;
-                topRowSum[by] += pWinY[bx];
-                topRowSumSq[by] += tempSq;
-              }
-              pWinY += stride;
-            }
-            topSum = leftSum;
-            topSumSq = leftSumSq;
-            sum = leftSum;
-            sumSq = leftSumSq;
+            topRowSum[y + winLens] += pWinY[bx];
+            topRowSumSq[y + winLens] += pWinY[bx] * pWinY[bx];
           }
-          else if (x == 0 && y > 0)       // for the 1st column, calc the bottom stripe
+          topSum += topRowSum[y + winLens];
+          topSumSq += topRowSumSq[y + winLens];
+        }
+        if (y > winLens)
+        {
+          topSum -= topRowSum[y - 1 - winLens];
+          topSumSq -= topRowSumSq[y - 1 - winLens];
+        }
+        memset(leftColSum, 0, width * sizeof(int64_t));
+        memset(leftColSumSq, 0, width * sizeof(int64_t));
+        pWinY = &picY.buf[0];
+        pWinY -= (y <= winLens ? y : winLens)*stride;
+        for (by = y1; by <= y2; by++)
+        {
+          for (bx = x1; bx <= x2; bx++)
           {
-            if (y < height - winLens)
-            {
-              pWinY += winLens*stride;
-              topRowSum[y + winLens] = 0;
-              topRowSumSq[y + winLens] = 0;
-              for (bx = x1; bx <= x2; bx++)
-              {
-                topRowSum[y + winLens] += pWinY[bx];
-                topRowSumSq[y + winLens] += pWinY[bx] * pWinY[bx];
-              }
-              topSum += topRowSum[y + winLens];
-              topSumSq += topRowSumSq[y + winLens];
-            }
-            if (y > winLens)
-            {
-              topSum -= topRowSum[y - 1 - winLens];
-              topSumSq -= topRowSumSq[y - 1 - winLens];
-            }
-
-            memset(leftColSum, 0, width * sizeof(int64_t));
-            memset(leftColSumSq, 0, width * sizeof(int64_t));
-            pWinY = &picY.buf[0];
-            pWinY -= (y <= winLens ? y : winLens)*stride;
+            leftColSum[bx] += pWinY[bx];
+            leftColSumSq[bx] += pWinY[bx] * pWinY[bx];
+          }
+          pWinY += stride;
+        }
+        leftSum = topSum;
+        leftSumSq = topSumSq;
+        sum = topSum;
+        sumSq = topSumSq;
+      }
+      else if (x > 0)
+      {
+        if (x < width - winLens)
+        {
+          pWinY -= (y <= winLens ? y : winLens)*stride;
+          if (y == 0)
+          {
+            leftColSum[x + winLens] = 0;
+            leftColSumSq[x + winLens] = 0;
             for (by = y1; by <= y2; by++)
             {
-              for (bx = x1; bx <= x2; bx++)
-              {
-                leftColSum[bx] += pWinY[bx];
-                leftColSumSq[bx] += pWinY[bx] * pWinY[bx];
-              }
+              leftColSum[x + winLens] += pWinY[x + winLens];
+              leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens];
               pWinY += stride;
             }
-
-            leftSum = topSum;
-            leftSumSq = topSumSq;
-            sum = topSum;
-            sumSq = topSumSq;
           }
-
-          else if (x > 0)
+          else
           {
-            if (x < width - winLens)
+            leftColSum[x + winLens] = topColSum[x + winLens];
+            leftColSumSq[x + winLens] = topColSumSq[x + winLens];
+            if (y < height - winLens)
             {
-              pWinY -= (y <= winLens ? y : winLens)*stride;
-              if (y == 0)                 // for the 1st row, calc the right stripe
-              {
-                leftColSum[x + winLens] = 0;
-                leftColSumSq[x + winLens] = 0;
-                for (by = y1; by <= y2; by++)
-                {
-                  leftColSum[x + winLens] += pWinY[x + winLens];
-                  leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens];
-                  pWinY += stride;
-                }
-              }
-              else                        // for the main area, calc the B-R point
-              {
-                leftColSum[x + winLens] = topColSum[x + winLens];
-                leftColSumSq[x + winLens] = topColSumSq[x + winLens];
-                if (y < height - winLens)
-                {
-                  pWinY = &picY.buf[0];
-                  pWinY += winLens * stride;
-                  leftColSum[x + winLens] += pWinY[x + winLens];
-                  leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens];
-                }
-                if (y > winLens)
-                {
-                  pWinY = &picY.buf[0];
-                  pWinY -= (winLens + 1) * stride;
-                  leftColSum[x + winLens] -= pWinY[x + winLens];
-                  leftColSumSq[x + winLens] -= pWinY[x + winLens] * pWinY[x + winLens];
-                }
-              }
-              topColSum[x + winLens] = leftColSum[x + winLens];
-              topColSumSq[x + winLens] = leftColSumSq[x + winLens];
-              leftSum += leftColSum[x + winLens];
-              leftSumSq += leftColSumSq[x + winLens];
+              pWinY = &picY.buf[0];
+              pWinY += winLens * stride;
+              leftColSum[x + winLens] += pWinY[x + winLens];
+              leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens];
             }
-            if (x > winLens)
+            if (y > winLens)
             {
-              leftSum -= leftColSum[x - 1 - winLens];
-              leftSumSq -= leftColSumSq[x - 1 - winLens];
+              pWinY = &picY.buf[0];
+              pWinY -= (winLens + 1) * stride;
+              leftColSum[x + winLens] -= pWinY[x + winLens];
+              leftColSumSq[x + winLens] -= pWinY[x + winLens] * pWinY[x + winLens];
             }
-            sum = leftSum;
-            sumSq = leftSumSq;
-          }
-
-          double average = double(sum) / numPixInPart;
-          double variance = double(sumSq) / numPixInPart - average * average;
-          uint32_t binNum = (uint32_t)(pxlY/PIC_ANALYZE_CW_BINS);
-
-          if (m_lumaBD > 10)
-          {
-            average = average / (double)(1<<(m_lumaBD - 10));
-            variance = variance / (double)(1 << (2*m_lumaBD - 20));
-            binNum = (uint32_t)((pxlY>>(m_lumaBD - 10)) / PIC_ANALYZE_CW_BINS);
-          }
-          else if (m_lumaBD < 10)
-          {
-            average = average * (double)(1 << (10 - m_lumaBD));
-            variance = variance * (double)(1 << (20-2*m_lumaBD));
-            binNum = (uint32_t)((pxlY << (10 - m_lumaBD)) / PIC_ANALYZE_CW_BINS);
           }
-          double varLog10 = log10(variance + 1.0);
-          blockBinVarSum[binNum] += varLog10;
-          bockBinCnt[binNum]++;
+          topColSum[x + winLens] = leftColSum[x + winLens];
+          topColSumSq[x + winLens] = leftColSumSq[x + winLens];
+          leftSum += leftColSum[x + winLens];
+          leftSumSq += leftColSumSq[x + winLens];
+        }
+        if (x > winLens)
+        {
+          leftSum -= leftColSum[x - 1 - winLens];
+          leftSumSq -= leftColSumSq[x - 1 - winLens];
         }
-        picY.buf += stride;
+        sum = leftSum;
+        sumSq = leftSumSq;
       }
 
-      delete[] topColSum;
-      delete[] topColSumSq;
-      delete[] topRowSum;
-      delete[] topRowSumSq;
-      delete[] leftColSum;
-      delete[] leftColSumSq;
-
-      for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++)
+      double average = double(sum) / numPixInPart;
+      double variance = double(sumSq) / numPixInPart - average * average;
+      int binLen = m_reshapeLUTSize / m_binNum;
+      uint32_t binIdx = (uint32_t)(pxlY / binLen);
+      if (m_lumaBD > 10)
+      {
+        average = average / (double)(1 << (m_lumaBD - 10));
+        variance = variance / (double)(1 << (2 * m_lumaBD - 20));
+      }
+      else if (m_lumaBD < 10)
       {
-        if (bockBinCnt[b] > 0)
-          blockBinVarSum[b] = blockBinVarSum[b] / bockBinCnt[b];
+        average = average * (double)(1 << (10 - m_lumaBD));
+        variance = variance * (double)(1 << (20 - 2 * m_lumaBD));
       }
+      double varLog10 = log10(variance + 1.0);
+      stats.binVar[binIdx] += varLog10;
+      binCnt[binIdx]++;
+    }
+    picY.buf += stride;
+  }
+
+  for (int b = 0; b < m_binNum; b++)
+  {
+    stats.binHist[b] = (double)binCnt[b] / (double)(m_reshapeCW.rspPicSize);
+    stats.binVar[b] = (binCnt[b] > 0) ? (stats.binVar[b] / binCnt[b]) : 0.0;
+  }
+  delete[] binCnt;
+  delete[] topColSum;
+  delete[] topColSumSq;
+  delete[] topRowSum;
+  delete[] topRowSumSq;
+  delete[] leftColSum;
+  delete[] leftColSumSq;
+
+  stats.minBinVar = 5.0;
+  stats.maxBinVar = 0.0;
+  stats.meanBinVar = 0.0;
+  stats.nonZeroCnt = 0;
+  for (int b = 0; b < m_binNum; b++)
+  {
+    if (stats.binHist[b] > 0.001)
+    {
+      stats.nonZeroCnt++;
+      stats.meanBinVar += stats.binVar[b];
+      if (stats.binVar[b] > stats.maxBinVar) { stats.maxBinVar = stats.binVar[b]; }
+      if (stats.binVar[b] < stats.minBinVar) { stats.minBinVar = stats.binVar[b]; }
+    }
+  }
+  stats.meanBinVar /= (double)stats.nonZeroCnt;
+  for (int b = 0; b < m_binNum; b++)
+  {
+    if (stats.meanBinVar > 0.0)
+      stats.normVar[b] = stats.binVar[b] / stats.meanBinVar;
+    stats.weightVar += stats.binHist[b] * stats.binVar[b];
+    stats.weightNorm += stats.binHist[b] * stats.normVar[b];
+  }
+
+  picY = pcPic->getOrigBuf(COMPONENT_Y);
+  PelBuf picU = pcPic->getOrigBuf(COMPONENT_Cb);
+  PelBuf picV = pcPic->getOrigBuf(COMPONENT_Cr);
+  const int widthC = picU.width;
+  const int heightC = picU.height;
+  const int strideC = picU.stride;
+  double avgY = 0.0, avgU = 0.0, avgV = 0.0;
+  double varY = 0.0, varU = 0.0, varV = 0.0;
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      avgY += picY.buf[x];
+      varY += picY.buf[x] * picY.buf[x];
+    }
+    picY.buf += stride;
+  }
+  for (int y = 0; y < heightC; y++)
+  {
+    for (int x = 0; x < widthC; x++)
+    {
+      avgU += picU.buf[x];
+      avgV += picV.buf[x];
+      varU += picU.buf[x] * picU.buf[x];
+      varV += picV.buf[x] * picV.buf[x];
+    }
+    picU.buf += strideC;
+    picV.buf += strideC;
+  }
+  avgY = avgY / (width * height);
+  avgU = avgU / (widthC * heightC);
+  avgV = avgV / (widthC * heightC);
+  varY = varY / (width * height) - avgY * avgY;
+  varU = varU / (widthC * heightC) - avgU * avgU;
+  varV = varV / (widthC * heightC) - avgV * avgV;
+  if (varY > 0)
+  {
+    stats.ratioStdU = sqrt(varU) / sqrt(varY);
+    stats.ratioStdV = sqrt(varV) / sqrt(varY);
+  }
+}
+void EncReshape::preAnalyzerLMCS(Picture *pcPic, const uint32_t signalType, const SliceType sliceType, const ReshapeCW& reshapeCW)
+{
+  m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true;
+  m_sliceReshapeInfo.sliceReshaperEnableFlag = true;
+  int modIP = pcPic->getPOC() - pcPic->getPOC() / reshapeCW.rspFpsToIp * reshapeCW.rspFpsToIp;
+  if (sliceType == I_SLICE || (reshapeCW.updateCtrl == 2 && modIP == 0))
+  {
+    if (m_sliceReshapeInfo.sliceReshaperModelPresentFlag == true)
+    {
+      m_reshapeCW = reshapeCW;
+      m_binNum = PIC_CODE_CW_BINS;
+      int stdMin = 16 << (m_lumaBD - 8);
+      int stdMax = 235 << (m_lumaBD - 8);
+      int binLen = m_reshapeLUTSize / m_binNum;
+      int startBinIdx = stdMin / binLen;
+      int endBinIdx = stdMax / binLen;
+      m_sliceReshapeInfo.reshaperModelMinBinIdx = startBinIdx;
+      m_sliceReshapeInfo.reshaperModelMaxBinIdx = endBinIdx;
+      m_initCWAnalyze = m_lumaBD > 10 ? (binLen >> (m_lumaBD - 10)) : m_lumaBD < 10 ? (binLen << (10 - m_lumaBD)) : binLen;
+      for (int b = 0; b < m_binNum; b++) { m_binCW[b] = m_initCWAnalyze; }
 
       m_reshape = true;
-      m_exceedSTD = false;
       m_useAdpCW = false;
+      m_exceedSTD = false;
       m_chromaWeight = 1.0;
       m_sliceReshapeInfo.enableChromaAdj = 1;
-      m_rateAdpMode                      = 0;
-      m_tcase                            = 0;
-      bool   intraAdp = false;
-      bool   interAdp = true;
-      double reshapeTH1 = 0.0;
-      double reshapeTH2 = 5.0;
-      deriveReshapeParametersSDRfromStats(bockBinCnt, blockBinVarSum, &reshapeTH1, &reshapeTH2, &intraAdp, &interAdp);
+      m_rateAdpMode = 0;  m_tcase = 0;
+      bool intraAdp = true, interAdp = true;
 
-      if (m_rateAdpMode == 2 && reshapeCW.rspBaseQP <= 22)
+      calcSeqStats(pcPic, m_srcSeqStats);
+      if (m_binNum == PIC_CODE_CW_BINS)
       {
-        intraAdp = false;
-        interAdp = false;
+        if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.005) { m_exceedSTD = true; }
+        if (m_srcSeqStats.binHist[m_binNum - 1] > 0.0003) { intraAdp = false;  interAdp = false; }
+        if (m_srcSeqStats.binHist[0] > 0.03) { intraAdp = false;  interAdp = false; }
       }
-
-      m_sliceReshapeInfo.sliceReshaperEnableFlag = intraAdp;
-
-      if (!intraAdp && !interAdp)
+      else if (m_binNum == PIC_ANALYZE_CW_BINS)
       {
-        m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false;
-        m_reshape = false;
-        return;
+        if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[1] + m_srcSeqStats.binHist[m_binNum - 2] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.01) { m_exceedSTD = true; }
+        if ((m_srcSeqStats.binHist[m_binNum - 2] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.0003) { intraAdp = false;  interAdp = false; }
+        if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[1]) > 0.03) { intraAdp = false;  interAdp = false; }
       }
-
       if (m_exceedSTD)
       {
-        startBinIdx = 2;
-        endBinIdx = 29;
-        for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++)
+        for (int i = 0; i < m_binNum; i++)
         {
-          if (bockBinCnt[b] > 0 && b < startBinIdx)
-            startBinIdx = b;
-          if (bockBinCnt[b] > 0 && b > endBinIdx)
-            endBinIdx = b;
+          if (m_srcSeqStats.binHist[i] > 0 && i < startBinIdx) { startBinIdx = i; }
+          if (m_srcSeqStats.binHist[i] > 0 && i > endBinIdx) { endBinIdx = i; }
         }
         m_sliceReshapeInfo.reshaperModelMinBinIdx = startBinIdx;
         m_sliceReshapeInfo.reshaperModelMaxBinIdx = endBinIdx;
       }
 
-      m_initCWAnalyze = m_lumaBD > 10 ? (m_initCWAnalyze >> (m_lumaBD - 10)) : m_lumaBD < 10 ? (m_initCWAnalyze << (10 - m_lumaBD)) : m_initCWAnalyze;
-      if (reshapeCW.rspBaseQP <= 22 && m_rateAdpMode == 1)
+      if ((m_srcSeqStats.ratioStdU + m_srcSeqStats.ratioStdV) > 1.5 && m_srcSeqStats.binHist[1] > 0.5) { intraAdp = false;  interAdp = false; }
+      if (m_srcSeqStats.ratioStdU > 0.36 && m_srcSeqStats.ratioStdV > 0.2 && m_reshapeCW.rspPicSize > 5184000)
+      {
+        m_sliceReshapeInfo.enableChromaAdj = 0; m_chromaWeight = 1.05;
+        if ((m_srcSeqStats.ratioStdU + m_srcSeqStats.ratioStdV) < 0.69) { m_chromaWeight = 0.95; }
+      }
+
+      if (interAdp)
       {
-        for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++)
+        if (m_reshapeCW.adpOption)
         {
-          if (i >= startBinIdx && i <= endBinIdx)
-            m_binCW[i] = m_initCWAnalyze + 1;
+          m_reshapeCW.binCW[0] = 0; m_reshapeCW.binCW[1] = m_reshapeCW.initialCW;
+          m_rateAdpMode = m_reshapeCW.adpOption - 2 * (m_reshapeCW.adpOption / 2);
+          if (m_reshapeCW.adpOption == 2) { m_tcase = 9; }
+          else if (m_reshapeCW.adpOption > 2) { intraAdp = false; }
+        }
+        else if (signalType == RESHAPE_SIGNAL_SDR)
+        {
+          m_reshapeCW.binCW[0] = 0; m_reshapeCW.binCW[1] = 1022;
+          deriveReshapeParametersSDR(&intraAdp, &interAdp);
+        }
+        else if (signalType == RESHAPE_SIGNAL_HLG)
+        {
+          if (m_reshapeCW.updateCtrl == 0)
+          {
+            m_rateAdpMode = 0;  m_tcase = 9;
+            m_reshapeCW.binCW[1] = 952;
+            if (m_srcSeqStats.meanBinVar < 2.5) { m_reshapeCW.binCW[1] = 840; }
+          }
           else
-            m_binCW[i] = 0;
+          {
+            m_useAdpCW = true;
+            m_rateAdpMode = 2;
+            if (m_binNum == PIC_CODE_CW_BINS) { m_reshapeCW.binCW[0] = 72;  m_reshapeCW.binCW[1] = 58; }
+            else if (m_binNum == PIC_ANALYZE_CW_BINS) { m_reshapeCW.binCW[0] = 36;  m_reshapeCW.binCW[1] = 30; }
+            if (m_srcSeqStats.meanBinVar < 2.5) { intraAdp = false; interAdp = false; }
+          }
         }
       }
-      else if (m_useAdpCW)
+
+      if (m_rateAdpMode == 2 && reshapeCW.rspBaseQP <= 22) { intraAdp = false; interAdp = false; }
+      m_sliceReshapeInfo.sliceReshaperEnableFlag = intraAdp;
+      if (!intraAdp && !interAdp)
       {
-        double Alpha = 1.0, Beta = 0.0;
-        deriveReshapeParameters(blockBinVarSum, startBinIdx, endBinIdx, m_reshapeCW, Alpha, Beta);
-        for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++)
+        m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false;
+        m_reshape = false;
+        return;
+      }
+
+      if (m_rateAdpMode == 1 && reshapeCW.rspBaseQP <= 22)
+      {
+        for (int i = 0; i < m_binNum; i++)
         {
-          if (i >= startBinIdx && i <= endBinIdx)
-            m_binCW[i] = (uint32_t)round(Alpha*blockBinVarSum[i] + Beta);
-          else
-            m_binCW[i] = 0;
+          if (i >= startBinIdx && i <= endBinIdx) { m_binCW[i] = m_initCWAnalyze + 2; }
+          else { m_binCW[i] = 0; }
         }
       }
-      else
+      else if (m_useAdpCW)
       {
-        for (int b = startBinIdx; b <= endBinIdx; b++)
+        if (signalType == RESHAPE_SIGNAL_SDR && m_reshapeCW.updateCtrl == 2)
         {
-          if (blockBinVarSum[b] < reshapeTH1)
-            m_binImportance[b] = 2;
-          else if (blockBinVarSum[b] > reshapeTH2)
-            m_binImportance[b] = 3;
-          else
-            m_binImportance[b] = 1;
+          m_binNum = PIC_ANALYZE_CW_BINS;
+          startBinIdx = startBinIdx * 2;
+          endBinIdx = endBinIdx * 2 + 1;
+          calcSeqStats(pcPic, m_srcSeqStats);
         }
-
-        for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++)
+        double alpha = 1.0, beta = 0.0;
+        deriveReshapeParameters(m_srcSeqStats.binVar, startBinIdx, endBinIdx, m_reshapeCW, alpha, beta);
+        for (int i = 0; i < m_binNum; i++)
         {
-          if (m_binImportance[i] == 0)
-            m_binCW[i] = 0;
-          else if (m_binImportance[i] == 1)
-            m_binCW[i] = m_initCWAnalyze + 1;
-          else if (m_binImportance[i] == 2)
-            m_binCW[i] = m_reshapeCW.binCW[0];
-          else if (m_binImportance[i] == 3)
-            m_binCW[i] = m_reshapeCW.binCW[1];
-          else
-            THROW("SDR Reshape Bin Importance not supported");
+          if (i >= startBinIdx && i <= endBinIdx) { m_binCW[i] = (uint32_t)round(alpha*m_srcSeqStats.binVar[i] + beta); }
+          else { m_binCW[i] = 0; }
         }
       }
-      if (m_reshapeCW.rspPicSize <= 1497600 && reshapeCW.rspIntraPeriod == -1 && modIP == 0 && sliceType != I_SLICE)
+      else
       {
-        m_sliceReshapeInfo.sliceReshaperEnableFlag = false;
+        cwPerturbation(startBinIdx, endBinIdx, (uint16_t)m_reshapeCW.binCW[1]);
       }
-
+      cwReduction(startBinIdx, endBinIdx);
     }
     m_chromaAdj = m_sliceReshapeInfo.enableChromaAdj;
-    if (sliceType == I_SLICE && isDualT)
-    {
-        m_sliceReshapeInfo.enableChromaAdj = 0;
-    }
   }
   else // Inter slices
   {
     m_sliceReshapeInfo.sliceReshaperModelPresentFlag = false;
     m_sliceReshapeInfo.enableChromaAdj = m_chromaAdj;
-
-    if (!m_reshape)
-    {
-      m_sliceReshapeInfo.sliceReshaperEnableFlag = false;
-    }
+    if (!m_reshape) { m_sliceReshapeInfo.sliceReshaperEnableFlag = false; }
     else
     {
       const int cTid = m_reshapeCW.rspTid;
@@ -505,482 +577,303 @@ void EncReshape::bubbleSortDsd(double* array, int * idx, int n)
   }
 }
 
-void EncReshape::deriveReshapeParametersSDRfromStats(uint32_t * blockBinCnt, double *blockBinVarSum, double* reshapeTH1, double* reshapeTH2, bool *intraAdp, bool *interAdp)
+void EncReshape::cwPerturbation(int startBinIdx, int endBinIdx, uint16_t maxCW)
+{
+  for (int i = 0; i < m_binNum; i++)
+  {
+    if (i >= startBinIdx && i <= endBinIdx) { m_binCW[i] = (uint32_t)round((double)maxCW / (endBinIdx - startBinIdx + 1)); }
+    else { m_binCW[i] = 0; }
+  }
+
+  double hist = 0.0;
+  uint16_t delta1 = 0, delta2 = 0;
+  for (int i = 0; i < m_binNum; i++)
+  {
+    if (m_srcSeqStats.binHist[i] > 0.001)
+    {
+      hist = m_srcSeqStats.binHist[i] > 0.4 ? 0.4 : m_srcSeqStats.binHist[i];
+      delta1 = (uint16_t)(10.0 * hist + 0.5);
+      delta2 = (uint16_t)(20.0 * hist + 0.5);
+      if (m_srcSeqStats.normVar[i] < 0.8) { m_binCW[i] = m_binCW[i] + delta2; }
+      else if (m_srcSeqStats.normVar[i] < 0.9) { m_binCW[i] = m_binCW[i] + delta1; }
+      if (m_srcSeqStats.normVar[i] > 1.2) { m_binCW[i] = m_binCW[i] - delta2; }
+      else if (m_srcSeqStats.normVar[i] > 1.1) { m_binCW[i] = m_binCW[i] - delta1; }
+    }
+  }
+}
+void EncReshape::cwReduction(int startBinIdx, int endBinIdx)
+{
+  int bdShift = m_lumaBD - 10;
+  int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize;
+  int maxAllowedCW = totCW - 1, usedCW = 0;
+  for (int i = 0; i < m_binNum; i++) { usedCW += m_binCW[i]; }
+  if (usedCW > maxAllowedCW)
+  {
+    int deltaCW = usedCW - maxAllowedCW;
+    int divCW = deltaCW / (endBinIdx - startBinIdx + 1);
+    int modCW = deltaCW - divCW * (endBinIdx - startBinIdx + 1);
+    if (divCW > 0)
+    {
+      for (int i = startBinIdx; i <= endBinIdx; i++) { m_binCW[i] -= divCW; }
+    }
+    for (int i = startBinIdx; i <= endBinIdx; i++)
+    {
+      if (modCW == 0)  break;
+      if (m_binCW[i] > 0) { m_binCW[i]--; modCW--; }
+    }
+  }
+}
+void EncReshape::deriveReshapeParametersSDR(bool *intraAdp, bool *interAdp)
 {
-  int    binIdxSortDsd[PIC_ANALYZE_CW_BINS]    = { 0 };
-  double binVarSortDsd[PIC_ANALYZE_CW_BINS]    = { 0.0 };
-  double binHist[PIC_ANALYZE_CW_BINS]          = { 0.0 };
-  double binVarSortDsdCDF[PIC_ANALYZE_CW_BINS] = { 0.0 };
-  double maxBinVar = 0.0, meanBinVar = 0.0, minBinVar = 5.0;
-  int    nonZeroBinCt = 0;
+  bool   isSkipCase = false;
+  bool   isLowCase = false;
   int    firstBinVarLessThanVal1 = 0;
   int    firstBinVarLessThanVal2 = 0;
   int    firstBinVarLessThanVal3 = 0;
-  int    firstBinVarLessThanVal4 = 0;
-
-  for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++)
+  double percBinVarLessThenVal1 = 0.0;
+  double percBinVarLessThenVal2 = 0.0;
+  double percBinVarLessThenVal3 = 0.0;
+  int    *binIdxSortDsd = new int[m_binNum];
+  double *binVarSortDsd = new double[m_binNum];
+  double *binVarSortDsdCDF = new double[m_binNum];
+  double ratioWeiVar = 0.0, ratioWeiVarNorm = 0.0;
+  int startBinIdx = m_sliceReshapeInfo.reshaperModelMinBinIdx;
+  int endBinIdx = m_sliceReshapeInfo.reshaperModelMaxBinIdx;
+
+  for (int b = 0; b < m_binNum; b++)
   {
-    binHist[b] = (double) blockBinCnt[b] / (double)(m_reshapeCW.rspPicSize);
-    if (binHist[b] > 0.001)
+    binVarSortDsd[b] = m_srcSeqStats.binVar[b];
+    binIdxSortDsd[b] = b;
+  }
+  bubbleSortDsd(binVarSortDsd, binIdxSortDsd, m_binNum);
+  binVarSortDsdCDF[0] = m_srcSeqStats.binHist[binIdxSortDsd[0]];
+  for (int b = 1; b < m_binNum; b++) { binVarSortDsdCDF[b] = binVarSortDsdCDF[b - 1] + m_srcSeqStats.binHist[binIdxSortDsd[b]]; }
+  for (int b = 0; b < m_binNum - 1; b++)
+  {
+    if (binVarSortDsd[b] > 3.4) { firstBinVarLessThanVal1 = b + 1; }
+    if (binVarSortDsd[b] > 2.8) { firstBinVarLessThanVal2 = b + 1; }
+    if (binVarSortDsd[b] > 2.5) { firstBinVarLessThanVal3 = b + 1; }
+  }
+  percBinVarLessThenVal1 = binVarSortDsdCDF[firstBinVarLessThanVal1];
+  percBinVarLessThenVal2 = binVarSortDsdCDF[firstBinVarLessThanVal2];
+  percBinVarLessThenVal3 = binVarSortDsdCDF[firstBinVarLessThanVal3];
+  delete[] binIdxSortDsd;
+  delete[] binVarSortDsd;
+  delete[] binVarSortDsdCDF;
+
+  cwPerturbation(startBinIdx, endBinIdx, (uint16_t)m_reshapeCW.binCW[1]);
+  cwReduction(startBinIdx, endBinIdx);
+  initSeqStats(m_rspSeqStats);
+  for (int b = 0; b < m_binNum; b++)
+  {
+    double scale = (m_binCW[b] > 0) ? ((double)m_binCW[b] / (double)m_initCWAnalyze) : 1.0;
+    m_rspSeqStats.binHist[b] = m_srcSeqStats.binHist[b];
+    m_rspSeqStats.binVar[b] = m_srcSeqStats.binVar[b] + 2.0 * log10(scale);
+  }
+  m_rspSeqStats.minBinVar = 5.0;
+  m_rspSeqStats.maxBinVar = 0.0;
+  m_rspSeqStats.meanBinVar = 0.0;
+  m_rspSeqStats.nonZeroCnt = 0;
+  for (int b = 0; b < m_binNum; b++)
+  {
+    if (m_rspSeqStats.binHist[b] > 0.001)
     {
-      nonZeroBinCt++;
-      meanBinVar += blockBinVarSum[b];
-      if (blockBinVarSum[b] > maxBinVar)        {        maxBinVar = blockBinVarSum[b];      }
-      if (blockBinVarSum[b] < minBinVar)        {        minBinVar = blockBinVarSum[b];      }
+      m_rspSeqStats.nonZeroCnt++;
+      m_rspSeqStats.meanBinVar += m_rspSeqStats.binVar[b];
+      if (m_rspSeqStats.binVar[b] > m_rspSeqStats.maxBinVar) { m_rspSeqStats.maxBinVar = m_rspSeqStats.binVar[b]; }
+      if (m_rspSeqStats.binVar[b] < m_rspSeqStats.minBinVar) { m_rspSeqStats.minBinVar = m_rspSeqStats.binVar[b]; }
     }
-    binVarSortDsd[b] = blockBinVarSum[b];
-    binIdxSortDsd[b] = b;
   }
-  if ((binHist[0] + binHist[1] + binHist[PIC_ANALYZE_CW_BINS - 2] + binHist[PIC_ANALYZE_CW_BINS - 1]) > 0.01)   {    m_exceedSTD = true;  }
-  if ((binHist[PIC_ANALYZE_CW_BINS - 2] + binHist[PIC_ANALYZE_CW_BINS - 1]) > 0.01)   {    *interAdp = false;    return;   }
-  else                                                                                {    *interAdp = true;               }
-
-  meanBinVar = meanBinVar / (double)nonZeroBinCt;
-  bubbleSortDsd(binVarSortDsd, binIdxSortDsd, PIC_ANALYZE_CW_BINS);
-  binVarSortDsdCDF[0] = binHist[binIdxSortDsd[0]];
-
-  for (int b = 1; b < PIC_ANALYZE_CW_BINS; b++)
+  m_rspSeqStats.meanBinVar /= (double)m_rspSeqStats.nonZeroCnt;
+  for (int b = 0; b < m_binNum; b++)
   {
-    binVarSortDsdCDF[b] = binVarSortDsdCDF[b - 1] + binHist[binIdxSortDsd[b]];
+    if (m_rspSeqStats.meanBinVar > 0.0)
+      m_rspSeqStats.normVar[b] = m_rspSeqStats.binVar[b] / m_rspSeqStats.meanBinVar;
+    m_rspSeqStats.weightVar += m_rspSeqStats.binHist[b] * m_rspSeqStats.binVar[b];
+    m_rspSeqStats.weightNorm += m_rspSeqStats.binHist[b] * m_rspSeqStats.normVar[b];
   }
+  ratioWeiVar = m_rspSeqStats.weightVar / m_srcSeqStats.weightVar;
+  ratioWeiVarNorm = m_rspSeqStats.weightNorm / m_srcSeqStats.weightNorm;
 
-  for (int b = 0; b < PIC_ANALYZE_CW_BINS - 1; b++)
+  if ((m_srcSeqStats.binHist[0] + m_srcSeqStats.binHist[m_binNum - 1]) > 0.0001 && m_srcSeqStats.binHist[m_binNum - 2] < 0.001)
   {
-    if (binVarSortDsd[b] > 3.5)     {      firstBinVarLessThanVal1 = b + 1;    }
-    if (binVarSortDsd[b] > 3.0)     {      firstBinVarLessThanVal2 = b + 1;    }
-    if (binVarSortDsd[b] > 2.5)     {      firstBinVarLessThanVal3 = b + 1;    }
-    if (binVarSortDsd[b] > 2.0)     {      firstBinVarLessThanVal4 = b + 1;    }
+    if (percBinVarLessThenVal3 > 0.8 && percBinVarLessThenVal2 > 0.4 && m_srcSeqStats.binVar[m_binNum - 2] > 4.8) { isSkipCase = true; }
+    else if (percBinVarLessThenVal3 < 0.1 && percBinVarLessThenVal1 < 0.05 && m_srcSeqStats.binVar[m_binNum - 2] < 4.0) { isSkipCase = true; }
   }
+  if (isSkipCase) { *intraAdp = false;  *interAdp = false;  return; }
 
-  m_reshapeCW.binCW[0] = 38;
-  m_reshapeCW.binCW[1] = 28;
+  if (m_reshapeCW.rspPicSize > 5184000) { isLowCase = true; }
+  else if (m_srcSeqStats.binVar[1] > 4.0) { isLowCase = true; }
+  else if (m_rspSeqStats.meanBinVar > 3.4 && ratioWeiVarNorm > 1.005 && ratioWeiVar > 1.02) { isLowCase = true; }
+  else if (m_rspSeqStats.meanBinVar > 3.1 && ratioWeiVarNorm > 1.005 && ratioWeiVar > 1.04) { isLowCase = true; }
+  else if (m_rspSeqStats.meanBinVar > 2.8 && ratioWeiVarNorm > 1.01 && ratioWeiVar > 1.04) { isLowCase = true; }
 
-  if (m_reshapeCW.rspIntraPeriod == -1)
+  if (m_reshapeCW.updateCtrl == 0)
   {
-    *intraAdp = true;
-    if (m_reshapeCW.rspPicSize > 1497600)
-    {
-      m_reshapeCW.binCW[0] = 36;
-      *reshapeTH1 = 2.4;
-      *reshapeTH2 = 4.5;
-      m_rateAdpMode = 2;
-
-      if (meanBinVar >= 2.52)
-      {
-        if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5)
-        {
-          *reshapeTH1 = 2.5;
-          *reshapeTH2 = 3.0;
-        }
-        else if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.02)
-        {
-          *reshapeTH1 = 2.2;
-        }
-        else if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.25)
-        {
-          m_reshapeCW.binCW[1] = 30;
-          *reshapeTH1 = 2.0;
-          m_rateAdpMode = 0;
-        }
-        else
-        {
-          m_reshapeCW.binCW[1] = 30;
-          m_rateAdpMode = 1;
-        }
-      }
-    }
-    else if (m_reshapeCW.rspPicSize > 660480)
+    m_reshapeCW.binCW[1] = 1022;
+    if (isLowCase)
     {
-      m_reshapeCW.binCW[0] = 34;
-      *reshapeTH1 = 3.4;
-      *reshapeTH2 = 4.0;
-      m_rateAdpMode = 2;
-
-      if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.6)
+      *intraAdp = false;
+      m_rateAdpMode = 1;
+      m_reshapeCW.binCW[1] = 980;
+      if (m_srcSeqStats.binHist[m_binNum - 2] > 0.05)
       {
-        if (maxBinVar < 3.5)
-        {
-          m_useAdpCW = true;
-          m_reshapeCW.binCW[0] = 38;
-        }
-        else
-        {
-          m_reshapeCW.binCW[0] = 40;
-          *reshapeTH1 = 2.2;
-          *reshapeTH2 = 4.5;
-          m_rateAdpMode = 0;
-        }
+        m_reshapeCW.binCW[1] = 896;
+        if (m_srcSeqStats.binVar[m_binNum - 2] < 1.2) { m_reshapeCW.binCW[1] = 938; }
       }
-      else
+      else if (percBinVarLessThenVal2 < 0.8 && percBinVarLessThenVal3 == 1.0)
       {
-        if (maxBinVar > 3.3)
-        {
-          m_reshapeCW.binCW[1] = 30;
-        }
-        else
-        {
-          m_reshapeCW.binCW[1] = 28;
-        }
+        m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 938;
       }
     }
-    else if (m_reshapeCW.rspPicSize > 249600)
+    if (m_srcSeqStats.binHist[m_binNum - 2] < 0.001)
     {
-      m_reshapeCW.binCW[0] = 36;
-      *reshapeTH1 = 2.5;
-      *reshapeTH2 = 4.5;
-
-      if (m_exceedSTD)
+      if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binVar[1] > 3.0)
       {
-        m_reshapeCW.binCW[0] = 36;
-        m_reshapeCW.binCW[1] = 30;
+        *intraAdp = true;
+        m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 784;
       }
-      if (minBinVar > 2.6)
+      else if (m_srcSeqStats.binHist[1] < 0.006)
       {
-        *reshapeTH1 = 3.0;
+        *intraAdp = false;
+        m_rateAdpMode = 0;
+        m_reshapeCW.binCW[1] = 1008;
       }
-      else {
-        double diff1 = binVarSortDsdCDF[firstBinVarLessThanVal4] - binVarSortDsdCDF[firstBinVarLessThanVal3];
-        double diff2 = binVarSortDsdCDF[firstBinVarLessThanVal2] - binVarSortDsdCDF[firstBinVarLessThanVal1];
-        if (diff1 > 0.4 || binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.1)
-        {
-          m_useAdpCW = true;
-          m_rateAdpMode = 1;
-        }
-        else if (diff2 <= 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.99 && binVarSortDsdCDF[firstBinVarLessThanVal3] > 0.642 && binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.03)
-        {
-          m_useAdpCW = true;
-          m_rateAdpMode = 1;
-        }
-        else
-        {
-          m_rateAdpMode = 2;
-        }
+      else if (percBinVarLessThenVal3 < 0.5)
+      {
+        *intraAdp = true;
+        m_rateAdpMode = 0;
+        m_reshapeCW.binCW[1] = 1022;
       }
     }
-    else
+    else if ((m_srcSeqStats.maxBinVar > 4.0 && m_rspSeqStats.meanBinVar > 3.2 && percBinVarLessThenVal2 < 0.25) || ratioWeiVar < 1.03)
     {
-      m_reshapeCW.binCW[0] = 36;
-      *reshapeTH1 = 2.6;
-      *reshapeTH2 = 4.5;
-
-      if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5 && maxBinVar < 4.7)
-      {
-        *reshapeTH1 = 3.2;
-        m_rateAdpMode = 1;
-      }
+      *intraAdp = true;
+      m_rateAdpMode = 0;
+      m_reshapeCW.binCW[1] = 1022;
     }
+    if (*intraAdp == true && m_rateAdpMode == 0) { m_tcase = 9; }
   }
-  else if (m_reshapeCW.rspIntraPeriod == 1)
+  else if (m_reshapeCW.updateCtrl == 1)
   {
-    *intraAdp = true;
-    if (m_reshapeCW.rspPicSize > 5184000)
+    m_reshapeCW.binCW[1] = 952;
+    if (isLowCase)
     {
-      *reshapeTH1 = 2.0;
-      *reshapeTH2 = 3.0;
-      m_rateAdpMode = 2;
-
-      if (maxBinVar > 2.4)
+      if (m_reshapeCW.rspPicSize > 5184000)
       {
-        if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.88)
-        {
-          if (maxBinVar < 2.695)
-          {
-            *reshapeTH2 = 2.2;
-          }
-          else
-          {
-            if (binVarSortDsdCDF[firstBinVarLessThanVal3] < 0.45)
-            {
-              *reshapeTH1 = 2.5;
-              *reshapeTH2 = 4.0;
-              m_reshapeCW.binCW[0] = 36;
-              m_sliceReshapeInfo.enableChromaAdj = 0;
-              m_rateAdpMode = 0;
-            }
-            else
-            {
-              m_useAdpCW = true;
-              m_reshapeCW.binCW[0] = 36;
-              m_reshapeCW.binCW[1] = 30;
-            }
-          }
-        }
-        else
-        {
-          if (maxBinVar > 2.8)
-          {
-            *reshapeTH1 = 2.2;
-            *reshapeTH2 = 4.0;
-            m_reshapeCW.binCW[0] = 36;
-            m_sliceReshapeInfo.enableChromaAdj = 0;
-          }
-          else
-          {
-            m_useAdpCW = true;
-            m_reshapeCW.binCW[0] = 38;
-            m_reshapeCW.binCW[1] = 28;
-          }
-        }
+        m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 812;
       }
-      else
+      if (m_srcSeqStats.binHist[m_binNum - 2] > 0.05)
       {
-        if (maxBinVar > 2.24)
-        {
-          m_useAdpCW = true;
-          m_reshapeCW.binCW[0] = 34;
-          m_reshapeCW.binCW[1] = 30;
-        }
-      }
-    }
-    else if (m_reshapeCW.rspPicSize > 1497600)
-    {
-      *reshapeTH1 = 2.0;
-      *reshapeTH2 = 4.5;
-      m_rateAdpMode = 2;
-
-      if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.25)
-      {
-        int firstVarCDFLargerThanVal = 1;
-        for (int b = 0; b < PIC_ANALYZE_CW_BINS; b++)
-        {
-          if (binVarSortDsdCDF[b] > 0.7)
-          {
-            firstVarCDFLargerThanVal = b;
-            break;
-          }
-        }
-        if (meanBinVar < 2.52 || binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5)
-        {
-          *reshapeTH1 = 2.2;
-          *reshapeTH2 = (binVarSortDsd[firstVarCDFLargerThanVal] + binVarSortDsd[firstVarCDFLargerThanVal - 1]) / 2.0;
-        }
-        else
+        m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 812;
+        if (m_srcSeqStats.binHist[m_binNum - 2] > 0.1 || m_srcSeqStats.binHist[1] > 0.1)
         {
-          m_reshapeCW.binCW[1] = 30;
-          *reshapeTH2 = 2.8;
+          m_rateAdpMode = 0;
+          m_reshapeCW.binCW[1] = 924;
         }
       }
-      else if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.02)
+      else if (percBinVarLessThenVal2 < 0.8 && percBinVarLessThenVal3 == 1.0)
       {
-        m_reshapeCW.binCW[0] = 36;
-        *reshapeTH1 = 3.5;
         m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 896;
       }
-    }
-    else if (m_reshapeCW.rspPicSize > 660480)
-    {
-      *reshapeTH1 = 2.5;
-      *reshapeTH2 = 4.5;
-      m_rateAdpMode = 1;
-
-      if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.6)
+      else if (percBinVarLessThenVal2 > 0.98 && m_srcSeqStats.binHist[1] > 0.05)
       {
-        if (maxBinVar < 3.5)
-        {
-          *reshapeTH1 = 2.0;
-        }
+        m_rateAdpMode = 0;
+        m_reshapeCW.binCW[1] = 784;
       }
-      else
+      else if (percBinVarLessThenVal2 < 0.1)
       {
-        if (maxBinVar > 3.3)
-        {
-          m_reshapeCW.binCW[0] = 35;
-        }
-        else
-        {
-          *reshapeTH1 = 2.8;
-          m_reshapeCW.binCW[0] = 35;
-        }
+        m_rateAdpMode = 0;
+        m_reshapeCW.binCW[1] = 1022;
       }
     }
-    else if (m_reshapeCW.rspPicSize > 249600)
+    if (m_srcSeqStats.binHist[1] > 0.1 && (m_srcSeqStats.binVar[1] > 1.8 && m_srcSeqStats.binVar[1] < 3.0))
     {
       m_rateAdpMode = 1;
-      m_reshapeCW.binCW[0] = 36;
-      *reshapeTH1 = 2.5;
-      *reshapeTH2 = 4.5;
+      if (m_srcSeqStats.binVar[m_binNum - 2] > 1.2 && m_srcSeqStats.binVar[m_binNum - 2] < 4.0) { m_reshapeCW.binCW[1] = 784; }
     }
-    else
+    else if (m_srcSeqStats.binHist[m_binNum - 2] < 0.001)
     {
-      if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.33 && m_reshapeCW.rspFps>40)
+      if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binVar[1] > 3.0)
       {
-        *intraAdp = false;
-        *interAdp = false;
+        m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 784;
       }
-      else
+      else if (m_srcSeqStats.binHist[1] < 0.006)
       {
-        m_rateAdpMode = 1;
-        m_reshapeCW.binCW[0] = 36;
-        *reshapeTH1 = 3.0;
-        *reshapeTH2 = 4.0;
+        m_rateAdpMode = 0;
+        m_reshapeCW.binCW[1] = 980;
+      }
+      else if (percBinVarLessThenVal3 < 0.5)
+      {
+        m_rateAdpMode = 0;
+        m_reshapeCW.binCW[1] = 924;
       }
     }
+    else if ((m_srcSeqStats.maxBinVar > 4.0 && m_rspSeqStats.meanBinVar > 3.2 && percBinVarLessThenVal2 < 0.25) || ratioWeiVar < 1.03)
+    {
+      m_rateAdpMode = 0;
+      m_reshapeCW.binCW[1] = 980;
+    }
   }
   else
   {
-    if (m_reshapeCW.rspPicSize > 5184000)
+    m_useAdpCW = true;
+    m_reshapeCW.binCW[0] = 36;  m_reshapeCW.binCW[1] = 30;
+    if (isLowCase)
     {
-      m_reshapeCW.binCW[0] = 40;
-      *reshapeTH2 = 4.0;
-      m_rateAdpMode = 2;
-
-      if (maxBinVar < 2.4)
-      {
-        *reshapeTH1 = 3.0;
-        if (m_reshapeCW.rspBaseQP <= 22)
-          m_tcase = 3;
-      }
-      else if (maxBinVar > 3.0)
+      if (m_srcSeqStats.binHist[m_binNum - 2] > 0.05)
       {
-        if (minBinVar > 1)
-        {
-          m_reshapeCW.binCW[0] = 36;
-          *reshapeTH1 = 2.8;
-          *reshapeTH2 = 3.5;
-          m_sliceReshapeInfo.enableChromaAdj = 0;
-          m_chromaWeight = 1.05;
-          m_rateAdpMode = 0;
-        }
-        else
-        {
-          m_reshapeCW.binCW[0] = 36;
-          *reshapeTH1 = 2.2;
-          *reshapeTH2 = 3.5;
-          m_sliceReshapeInfo.enableChromaAdj = 0;
-          m_chromaWeight = 0.95;
-        }
-      }
-      else
-      {
-        *reshapeTH1 = 1.5;
+        m_useAdpCW = false;
+        m_rateAdpMode = 1;
+        m_reshapeCW.binCW[1] = 896;
+        if (m_srcSeqStats.binHist[1] > 0.005) { m_rateAdpMode = 0; }
       }
+      else if (percBinVarLessThenVal2 < 0.8 && percBinVarLessThenVal3 == 1.0) { m_reshapeCW.binCW[1] = 28; }
     }
-    else if (m_reshapeCW.rspPicSize > 1497600)
+    if (m_srcSeqStats.binHist[1] > 0.1 && m_srcSeqStats.binVar[1] > 1.8 && m_srcSeqStats.binVar[1] < 3.0)
     {
-      *reshapeTH1 = 2.5;
-      *reshapeTH2 = 4.5;
+      m_useAdpCW = false;
       m_rateAdpMode = 1;
-
-      if (meanBinVar < 2.52)
-      {
-        *intraAdp = true;
-        m_rateAdpMode = 0;
-        m_tcase = 9;
-      }
-      else
-      {
-        if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5)
-        {
-          *reshapeTH2 = 3.0;
-          *intraAdp = true;
-        }
-        else if (binVarSortDsdCDF[firstBinVarLessThanVal2] < 0.1 && binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.02)
-        {
-          *reshapeTH1 = 3.0;
-          *intraAdp = true;
-          m_rateAdpMode = 0;
-          m_tcase = 9;
-        }
-        else if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.25)
-        {
-          *reshapeTH1 = 2.4;
-          m_reshapeCW.binCW[0] = 36;
-        }
-        else
-        {
-          *reshapeTH1 = 2.4;
-          m_reshapeCW.binCW[0] = 36;
-        }
-      }
+      m_reshapeCW.binCW[1] = 952;
     }
-    else if (m_reshapeCW.rspPicSize > 660480)
+    else if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binHist[m_binNum - 2] < 0.001 && m_srcSeqStats.binVar[1] > 3.0)
     {
-      *intraAdp = true;
+      m_useAdpCW = false;
       m_rateAdpMode = 1;
-
-      if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.6)
-      {
-        if (maxBinVar < 3.5)
-        {
-          *reshapeTH1 = 2.1;
-          *reshapeTH2 = 3.5;
-        }
-        else
-        {
-          *reshapeTH1 = 2.4;
-          *reshapeTH2 = 4.5;
-          m_reshapeCW.binCW[0] = 40;
-          m_rateAdpMode = 0;
-        }
-      }
-      else
-      {
-        if (maxBinVar > 3.3)
-        {
-          *reshapeTH1 = 3.5;
-          *reshapeTH2 = 3.8;
-        }
-        else
-        {
-          *reshapeTH1 = 3.0;
-          *reshapeTH2 = 4.0;
-          m_reshapeCW.binCW[1] = 30;
-        }
-      }
+      m_reshapeCW.binCW[1] = 784;
     }
-    else if (m_reshapeCW.rspPicSize > 249600)
+    else if (m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binHist[m_binNum - 2] < 0.005 && m_srcSeqStats.binVar[1] > 1.0 && m_srcSeqStats.binVar[1] < 1.5)
     {
-      m_reshapeCW.binCW[1] = 30;
-      *reshapeTH1 = 2.5;
-      *reshapeTH2 = 4.5;
-      *intraAdp = true;
-      m_rateAdpMode = 1;
-
-      if (minBinVar > 2.6)
-      {
-        *reshapeTH1 = 3.2;
-        m_rateAdpMode = 0;
-        m_tcase = 9;
-      }
-      else {
-        double diff1 = binVarSortDsdCDF[firstBinVarLessThanVal4] - binVarSortDsdCDF[firstBinVarLessThanVal3];
-        double diff2 = binVarSortDsdCDF[firstBinVarLessThanVal2] - binVarSortDsdCDF[firstBinVarLessThanVal1];
-        if (diff1 > 0.4 || binVarSortDsdCDF[firstBinVarLessThanVal1] > 0.1)
-        {
-          *reshapeTH1 = 2.9;
-          *intraAdp = false;
-        }
-        else
-        {
-          if (diff2 > 0.1)
-          {
-            *reshapeTH1 = 2.5;
-          }
-          else
-          {
-            *reshapeTH1 = 2.9;
-            if (binVarSortDsdCDF[firstBinVarLessThanVal4] > 0.99 && binVarSortDsdCDF[firstBinVarLessThanVal3] > 0.642 && binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.03)
-            {
-              m_rateAdpMode = 0;
-              m_tcase = 9;
-            }
-          }
-        }
-      }
+      m_rateAdpMode = 2;
+      m_reshapeCW.binCW[0] = 38;
     }
-    else
+    else if (m_srcSeqStats.binHist[1] < 0.005 && m_srcSeqStats.binHist[m_binNum - 2] > 0.05 && m_srcSeqStats.binVar[m_binNum - 2] > 1.0 && m_srcSeqStats.binVar[m_binNum - 2] < 1.5)
     {
+      m_rateAdpMode = 2;
       m_reshapeCW.binCW[0] = 36;
-      m_reshapeCW.binCW[1] = 30;
-      *reshapeTH1 = 2.6;
-      *reshapeTH2 = 4.5;
-      *intraAdp = true;
+    }
+    else if (m_srcSeqStats.binHist[1] > 0.02 && m_srcSeqStats.binHist[m_binNum - 2] > 0.04 && m_srcSeqStats.binVar[1] < 2.0 && m_srcSeqStats.binVar[m_binNum - 2] < 1.5)
+    {
+      m_rateAdpMode = 2;
+      m_reshapeCW.binCW[0] = 34;
+    }
+    else if ((m_srcSeqStats.binHist[1] > 0.05 && m_srcSeqStats.binHist[m_binNum - 2] > 0.2 && m_srcSeqStats.binVar[1] > 3.0 && m_srcSeqStats.binVar[1] < 4.0) || ratioWeiVar < 1.03)
+    {
       m_rateAdpMode = 1;
-      if (binVarSortDsdCDF[firstBinVarLessThanVal2] > 0.5 && maxBinVar < 4.7)
-      {
-        *reshapeTH1 = 3.4;
-      }
+      m_reshapeCW.binCW[0] = 34;
+    }
+    else if (m_srcSeqStats.binVar[1] < 4.0 && percBinVarLessThenVal2 == 1.0 && percBinVarLessThenVal3 == 1.0)
+    {
+      m_rateAdpMode = 0;
+      m_reshapeCW.binCW[0] = 34;
     }
+    if (m_useAdpCW && !isLowCase) { m_reshapeCW.binCW[1] = 66 - m_reshapeCW.binCW[0]; }
   }
 }
 
@@ -1042,6 +935,12 @@ void EncReshape::initLUTfromdQPModel()
   {
     m_binCW[i] = m_reshapePivot[i + 1] - m_reshapePivot[i];
   }
+  for (int i = 0; i <= PIC_CODE_CW_BINS; i++)
+  {
+    m_inputPivot[i] = m_initCW * i;
+  }
+
+  adjustLmcsPivot();
 
   int maxAbsDeltaCW = 0, absDeltaCW = 0, deltaCW = 0;
   for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++)
@@ -1055,127 +954,50 @@ void EncReshape::initLUTfromdQPModel()
 
   for (int i = 0; i < pwlFwdLUTsize; i++)
   {
-    int16_t Y1 = m_reshapePivot[i];
-    int16_t Y2 = m_reshapePivot[i + 1];
-    m_fwdLUT[i*pwlFwdBinLen] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)Y1);
-    int log2PwlFwdBinLen = floorLog2(pwlFwdBinLen);
-    int32_t scale = ((int32_t)(Y2 - Y1) * (1 << FP_PREC) + (1 << (log2PwlFwdBinLen - 1))) >> (log2PwlFwdBinLen);
-    for (int j = 1; j < pwlFwdBinLen; j++)
+    m_fwdScaleCoef[i] = ((int32_t)m_binCW[i] * (1 << FP_PREC) + (1 << (floorLog2(pwlFwdBinLen) - 1))) >> floorLog2(pwlFwdBinLen);
+    if (m_binCW[i] == 0)
     {
-      int tempVal = Y1 + (((int32_t)scale * (int32_t)j + (1 << (FP_PREC - 1))) >> FP_PREC);
-      m_fwdLUT[i*pwlFwdBinLen + j] = Clip3((Pel)0, (Pel)((1<<m_lumaBD) -1), (Pel)tempVal);
+      m_invScaleCoef[i] = 0;
+      m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC;
+    }
+    else
+    {
+      m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]);
+      m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset));
     }
   }
-  reverseLUT(m_fwdLUT, m_invLUT, m_reshapeLUTSize);
-  updateChromaScaleLUT();
+  for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++)
+  {
+    int idxY = lumaSample / m_initCW;
+    int tempVal = m_reshapePivot[idxY] + ((m_fwdScaleCoef[idxY] * (lumaSample - m_inputPivot[idxY]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_fwdLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(tempVal));
+
+    int idxYInv = getPWLIdxInv(lumaSample);
+    int invSample = m_inputPivot[idxYInv] + ((m_invScaleCoef[idxYInv] * (lumaSample - m_reshapePivot[idxYInv]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_invLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(invSample));
+  }
 }
 
-void EncReshape::constructReshaperSDR()
+void EncReshape::constructReshaperLMCS()
 {
   int bdShift = m_lumaBD - 10;
-  int usedCW = 0;
-  int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1<<bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize;
-  int histBins = PIC_ANALYZE_CW_BINS;
-  int histLenth = totCW/histBins;
+  int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize;
+  int histLenth = totCW / m_binNum;
   int log2HistLenth = floorLog2(histLenth);
-  int16_t *tempFwdLUT = new int16_t[m_reshapeLUTSize + 1]();
-  int i, j;
-  int cwScaleBins1, cwScaleBins2;
-  int maxAllowedCW = totCW-1;
+  int i;
 
-  cwScaleBins1 = m_reshapeCW.binCW[0];
-  cwScaleBins2 = m_reshapeCW.binCW[1];
-
-  for (i = 0; i < histBins; i++)
-    usedCW += m_binCW[i];
-
-  if (usedCW > maxAllowedCW)
+  if (m_binNum == PIC_ANALYZE_CW_BINS)
   {
-    int cnt0 = 0, cnt1 = 0, cnt2 = 0;
-    for (i = 0; i < histBins; i++)
-    {
-      if (m_binCW[i] == histLenth + 1)             cnt0++;
-      else if (m_binCW[i] == cwScaleBins1)         cnt1++;
-      else if (m_binCW[i] == cwScaleBins2)         cnt2++;
-    }
-
-    int resCW = usedCW - maxAllowedCW;
-    int cwReduce1 = (cwScaleBins1 - histLenth - 1) * cnt1;
-    int cwReduce2 = (histLenth + 1 - cwScaleBins2) * cnt0;
-
-    if (resCW <= cwReduce1)
-    {
-      int idx = 0;
-      while (resCW > 0)
-      {
-        if (m_binCW[idx] > (histLenth + 1))
-        {
-          m_binCW[idx]--;
-          resCW--;
-        }
-        idx++;
-        if (idx == histBins)
-          idx = 0;
-      }
-    }
-    else if (resCW > cwReduce1 && resCW <= (cwReduce1 + cwReduce2))
-    {
-      resCW -= cwReduce1;
-      int idx = 0;
-      while (resCW > 0)
-      {
-        if (m_binCW[idx] > cwScaleBins2 && m_binCW[idx] < cwScaleBins1)
-        {
-          m_binCW[idx]--;
-          resCW--;
-        }
-        idx++;
-        if (idx == histBins)
-          idx = 0;
-      }
-      for (i = 0; i < histBins; i++)
-      {
-        if (m_binCW[i] == cwScaleBins1)
-          m_binCW[i] = histLenth + 1;
-      }
-    }
-    else if (resCW > (cwReduce1 + cwReduce2))
+    for (int i = 0; i < PIC_CODE_CW_BINS; i++)
     {
-      resCW -= (cwReduce1 + cwReduce2);
-      int idx = 0;
-      while (resCW > 0)
-      {
-        if (m_binCW[idx] > 0 && m_binCW[idx] < (histLenth + 1))
-        {
-          m_binCW[idx]--;
-          resCW--;
-        }
-        idx++;
-        if (idx == histBins)
-          idx = 0;
-      }
-      for (i = 0; i < histBins; i++)
-      {
-        if (m_binCW[i] == histLenth + 1)
-          m_binCW[i] = cwScaleBins2;
-        if (m_binCW[i] == cwScaleBins1)
-          m_binCW[i] = histLenth + 1;
-      }
+      m_binCW[i] = m_binCW[2 * i] + m_binCW[2 * i + 1];
     }
   }
-
-  if (bdShift != 0)
+  for (int i = 0; i <= PIC_CODE_CW_BINS; i++)
   {
-    for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++)
-    {
-      m_binCW[i] = bdShift > 0 ? m_binCW[i] * (1 << bdShift) : m_binCW[i] / (1 << (-bdShift));
-    }
+    m_inputPivot[i] = m_initCW * i;
   }
 
-  for (int i = 0; i < PIC_CODE_CW_BINS; i++)
-  {
-    m_binCW[i] = m_binCW[2 * i] + m_binCW[2 * i + 1];
-  }
   m_sliceReshapeInfo.reshaperModelMinBinIdx = 0;
   m_sliceReshapeInfo.reshaperModelMaxBinIdx = PIC_CODE_CW_BINS - 1;
   for (int i = 0; i < PIC_CODE_CW_BINS; i++)
@@ -1195,13 +1017,23 @@ void EncReshape::constructReshaperSDR()
     }
   }
 
+  if (bdShift != 0)
+  {
+    for (int i = 0; i < PIC_ANALYZE_CW_BINS; i++)
+    {
+      m_binCW[i] = bdShift > 0 ? m_binCW[i] * (1 << bdShift) : m_binCW[i] / (1 << (-bdShift));
+    }
+  }
+
+  adjustLmcsPivot();
+
   int maxAbsDeltaCW = 0, absDeltaCW = 0, deltaCW = 0;
   for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++)
   {
     deltaCW = (int)m_binCW[i] - (int)m_initCW;
     m_sliceReshapeInfo.reshaperModelBinCWDelta[i] = deltaCW;
     absDeltaCW = (deltaCW < 0) ? (-deltaCW) : deltaCW;
-    if (absDeltaCW > maxAbsDeltaCW)      {      maxAbsDeltaCW = absDeltaCW;    }
+    if (absDeltaCW > maxAbsDeltaCW) { maxAbsDeltaCW = absDeltaCW; }
   }
   m_sliceReshapeInfo.maxNbitsNeededDeltaCW = std::max(1, 1 + floorLog2(maxAbsDeltaCW));
 
@@ -1209,40 +1041,109 @@ void EncReshape::constructReshaperSDR()
   log2HistLenth = floorLog2(histLenth);
 
   int sumBins = 0;
-  for (i = 0; i < PIC_CODE_CW_BINS; i++)   { sumBins += m_binCW[i];  }
+  for (i = 0; i < PIC_CODE_CW_BINS; i++) { sumBins += m_binCW[i]; }
   CHECK(sumBins >= m_reshapeLUTSize, "SDR CW assignment is wrong!!");
-  memset(tempFwdLUT, 0, (m_reshapeLUTSize + 1) * sizeof(int16_t));
-  tempFwdLUT[0] = 0;
-
-  for (i = 0; i < PIC_CODE_CW_BINS; i++)
+  for (int i = 0; i < PIC_CODE_CW_BINS; i++)
   {
-    tempFwdLUT[(i + 1)*histLenth] = tempFwdLUT[i*histLenth] + m_binCW[i];
-    int16_t Y1 = tempFwdLUT[i*histLenth];
-    int16_t Y2 = tempFwdLUT[(i + 1)*histLenth];
-    m_reshapePivot[i + 1] = Y2;
-    int32_t scale = ((int32_t)(Y2 - Y1) * (1 << FP_PREC) + (1 << (log2HistLenth - 1))) >> (log2HistLenth);
-    m_fwdLUT[i*histLenth] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)Y1);
-    for (j = 1; j < histLenth; j++)
+    m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i];
+    m_fwdScaleCoef[i] = ((int32_t)m_binCW[i] * (1 << FP_PREC) + (1 << (log2HistLenth - 1))) >> log2HistLenth;
+    if (m_binCW[i] == 0)
+    {
+      m_invScaleCoef[i] = 0;
+      m_chromaAdjHelpLUT[i] = 1 << CSCALE_FP_PREC;
+    }
+    else
     {
-      tempFwdLUT[i*histLenth + j] = Y1 + (((int32_t)scale * (int32_t)j + (1 << (FP_PREC - 1))) >> FP_PREC);
-      m_fwdLUT[i*histLenth + j] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)tempFwdLUT[i*histLenth + j]);
+      m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]);
+      m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset));
     }
   }
+  for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++)
+  {
+    int idxY = lumaSample / m_initCW;
+    int tempVal = m_reshapePivot[idxY] + ((m_fwdScaleCoef[idxY] * (lumaSample - m_inputPivot[idxY]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_fwdLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(tempVal));
 
+    int idxYInv = getPWLIdxInv(lumaSample);
+    int invSample = m_inputPivot[idxYInv] + ((m_invScaleCoef[idxYInv] * (lumaSample - m_reshapePivot[idxYInv]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_invLUT[lumaSample] = Clip3((Pel)0, (Pel)((1 << m_lumaBD) - 1), (Pel)(invSample));
+  }
   for (i = 0; i < PIC_CODE_CW_BINS; i++)
   {
     int start = i*histLenth;
     int end = (i + 1)*histLenth - 1;
     m_cwLumaWeight[i] = m_fwdLUT[end] - m_fwdLUT[start];
   }
+}
+
+void EncReshape::adjustLmcsPivot()
+{
+  int bdShift = m_lumaBD - 10;
+  int totCW = bdShift != 0 ? (bdShift > 0 ? m_reshapeLUTSize / (1 << bdShift) : m_reshapeLUTSize * (1 << (-bdShift))) : m_reshapeLUTSize;
+  int orgCW = totCW / PIC_CODE_CW_BINS;
+  int log2SegSize = m_lumaBD - floorLog2(LMCS_SEG_NUM);
+
+  m_reshapePivot[0] = 0;
+  for (int i = 0; i < PIC_CODE_CW_BINS; i++)
+  {
+    m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i];
+  }
+  int segIdxMax = (m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1] >> log2SegSize);
+  for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++)
+  {
+    m_reshapePivot[i + 1] = m_reshapePivot[i] + m_binCW[i];
+    int segIdxCurr = (m_reshapePivot[i]     >> log2SegSize);
+    int segIdxNext = (m_reshapePivot[i + 1] >> log2SegSize);
 
-  if (tempFwdLUT != nullptr)   {     delete[] tempFwdLUT;    tempFwdLUT = nullptr;  }
+    if ((segIdxCurr == segIdxNext) && (m_reshapePivot[i] != (segIdxCurr << log2SegSize)))
+    {
+      if (segIdxCurr == segIdxMax)
+      {
+        m_reshapePivot[i] = m_reshapePivot[m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1];
+        for (int j = i; j <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; j++)
+        {
+          m_reshapePivot[j + 1] = m_reshapePivot[i];
+          m_binCW[j] = 0;
+        }
+        m_binCW[i - 1] = m_reshapePivot[i] - m_reshapePivot[i - 1];
+        break;
+      }
+      else
+      {
+        int16_t adjustVal = ((segIdxCurr + 1) << log2SegSize) - m_reshapePivot[i + 1];
+        m_reshapePivot[i + 1] += adjustVal;
+        m_binCW[i] += adjustVal;
+
+        for (int j = i + 1; j <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; j++)
+        {
+          if (m_binCW[j] < (adjustVal + (orgCW >> 3)))
+          {
+            adjustVal -= (m_binCW[j] - (orgCW >> 3));
+            m_binCW[j] = (orgCW >> 3);
+          }
+          else
+          {
+            m_binCW[j] -= adjustVal;
+            adjustVal = 0;
+          }
+          if (adjustVal == 0)
+            break;
+        }
+      }
+    }
+  }
 
-  reverseLUT(m_fwdLUT, m_invLUT, m_reshapeLUTSize);
-  updateChromaScaleLUT();
+  for (int i = PIC_CODE_CW_BINS - 1; i >= 0; i--)
+  {
+    if (m_binCW[i] > 0)
+    {
+      m_sliceReshapeInfo.reshaperModelMaxBinIdx = i;
+      break;
+    }
+  }
 }
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 void EncReshape::copyState(const EncReshape &other)
 {
   m_srcReshaped     = other.m_srcReshaped;
@@ -1274,6 +1175,9 @@ void EncReshape::copyState(const EncReshape &other)
   m_initCW           = other.m_initCW;
   m_reshape          = other.m_reshape;
   m_reshapePivot     = other.m_reshapePivot;
+  m_inputPivot       = other.m_inputPivot;
+  m_fwdScaleCoef     = other.m_fwdScaleCoef;
+  m_invScaleCoef     = other.m_invScaleCoef;
   m_lumaBD           = other.m_lumaBD;
   m_reshapeLUTSize   = other.m_reshapeLUTSize;
 }
diff --git a/source/Lib/EncoderLib/EncReshape.h b/source/Lib/EncoderLib/EncReshape.h
index 6e4871866a05ae33047cd22d06befe504902211c..ba9b5195058d2c216e599f37ab1c4d6ed83f90f3 100644
--- a/source/Lib/EncoderLib/EncReshape.h
+++ b/source/Lib/EncoderLib/EncReshape.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,20 @@
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
+struct SeqInfo
+{
+  double binVar[PIC_ANALYZE_CW_BINS];
+  double binHist[PIC_ANALYZE_CW_BINS];
+  double normVar[PIC_ANALYZE_CW_BINS];
+  int    nonZeroCnt;
+  double weightVar;
+  double weightNorm;
+  double minBinVar;
+  double maxBinVar;
+  double meanBinVar;
+  double ratioStdU;
+  double ratioStdV;
+};
 
 class EncReshape : public Reshape
 {
@@ -71,6 +85,9 @@ private:
   Pel                     m_cwLumaWeight[PIC_CODE_CW_BINS];
   double                  m_chromaWeight;
   int                     m_chromaAdj;
+  int                     m_binNum;
+  SeqInfo                 m_srcSeqStats;
+  SeqInfo                 m_rspSeqStats;
 public:
 
   EncReshape();
@@ -81,20 +98,25 @@ public:
 
   bool getSrcReshaped() { return m_srcReshaped; }
   void setSrcReshaped(bool b) { m_srcReshaped = b; }
-  void preAnalyzerSDR(Picture *pcPic, const SliceType sliceType, const ReshapeCW& reshapeCW, bool isDualT);
+  void initSeqStats(SeqInfo &stats);
+  void calcSeqStats(Picture *pcPic, SeqInfo &stats);
+  void preAnalyzerLMCS(Picture *pcPic, const uint32_t signalType, const SliceType sliceType, const ReshapeCW& reshapeCW);
   void preAnalyzerHDR(Picture *pcPic, const SliceType sliceType, const ReshapeCW& reshapeCW, bool isDualT);
   void bubbleSortDsd(double *array, int * idx, int n);
   void swap(int *xp, int *yp) { int temp = *xp;  *xp = *yp;  *yp = temp; }
   void swap(double *xp, double *yp) { double temp = *xp;  *xp = *yp;  *yp = temp; }
-  void deriveReshapeParametersSDRfromStats(uint32_t *, double*, double* reshapeTH1, double* reshapeTH2, bool *intraAdp, bool *interAdp);
+  void cwPerturbation(int startBinIdx, int endBinIdx, uint16_t maxCW);
+  void cwReduction(int startBinIdx, int endBinIdx);
+  void deriveReshapeParametersSDR(bool *intraAdp, bool *interAdp);
   void deriveReshapeParameters(double *array, int start, int end, ReshapeCW respCW, double &alpha, double &beta);
   void initLUTfromdQPModel();
-  void constructReshaperSDR();
+  void constructReshaperLMCS();
   ReshapeCW * getReshapeCW() { return &m_reshapeCW; }
   Pel * getWeightTable() { return m_cwLumaWeight; }
   double getCWeight() { return m_chromaWeight; }
+  void adjustLmcsPivot();
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   void copyState(const EncReshape& other);
 #endif
 };// END CLASS DEFINITION EncReshape
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
index 8e467b2eec9fe3c335f9602e6efbe696cce1ab3a..15ed5b0e24d0cf823d5a44eeb7d3cb83702b4fbe 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -79,6 +79,8 @@ inline double xRoundIbdi(int bitDepth, double x)
 EncSampleAdaptiveOffset::EncSampleAdaptiveOffset()
 {
   m_CABACEstimator = NULL;
+
+  ::memset( m_saoDisabledRate, 0, sizeof( m_saoDisabledRate ) );
 }
 
 EncSampleAdaptiveOffset::~EncSampleAdaptiveOffset()
@@ -113,7 +115,6 @@ void EncSampleAdaptiveOffset::createEncData(bool isPreDBFSamplesUsed, uint32_t n
 
   }
 
-  ::memset(m_saoDisabledRate, 0, sizeof(m_saoDisabledRate));
 
   for(int typeIdc=0; typeIdc < NUM_SAO_NEW_TYPES; typeIdc++)
   {
@@ -210,11 +211,7 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
 #if ENABLE_QPA
                                           const double lambdaChromaWeight,
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
                                           const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding )
-#else
-                                          const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed )
-#endif
 {
   PelUnitBuf org = cs.getOrgBuf();
   PelUnitBuf res = cs.getRecoBuf();
@@ -239,11 +236,7 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
 #if ENABLE_QPA
                    lambdaChromaWeight,
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
                    saoEncodingRate, saoEncodingRateChroma, isGreedyMergeEncoding );
-#else
-                   saoEncodingRate, saoEncodingRateChroma );
-#endif
 
   DTRACE_UPDATE(g_trace_ctx, (std::make_pair("poc", cs.slice->getPOC())));
   DTRACE_PIC_COMP(D_REC_CB_LUMA_SAO, cs, cs.getRecoBuf(), COMPONENT_Y);
@@ -253,7 +246,6 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
   DTRACE    ( g_trace_ctx, D_CRC, "SAO" );
   DTRACE_CRC( g_trace_ctx, D_CRC, cs, cs.getRecoBuf() );
 
-  xPCMLFDisableProcess(cs);
 }
 
 
@@ -311,6 +303,13 @@ void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats
       isBelowAvail      = (yPos + pcv.maxCUHeight < pcv.lumaHeight);
       isAboveRightAvail = ((yPos > 0) && (isRightAvail));
 
+      int numHorVirBndry = 0, numVerVirBndry = 0;
+      int horVirBndryPos[] = { -1,-1,-1 };
+      int verVirBndryPos[] = { -1,-1,-1 };
+      int horVirBndryPosComp[] = { -1,-1,-1 };
+      int verVirBndryPosComp[] = { -1,-1,-1 };
+      bool isCtuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries(xPos, yPos, width, height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cs.picHeader );
+
       for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
       {
         const ComponentID compID = ComponentID(compIdx);
@@ -322,10 +321,20 @@ void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats
         int  orgStride  = orgYuv.get(compID).stride;
         Pel* orgBlk     = orgYuv.get(compID).bufAt( compArea );
 
+        for (int i = 0; i < numHorVirBndry; i++)
+        {
+          horVirBndryPosComp[i] = (horVirBndryPos[i] >> ::getComponentScaleY(compID, area.chromaFormat)) - compArea.y;
+        }
+        for (int i = 0; i < numVerVirBndry; i++)
+        {
+          verVirBndryPosComp[i] = (verVirBndryPos[i] >> ::getComponentScaleX(compID, area.chromaFormat)) - compArea.x;
+        }
+
         getBlkStats(compID, cs.sps->getBitDepth(toChannelType(compID)), blkStats[ctuRsAddr][compID]
                   , srcBlk, orgBlk, srcStride, orgStride, compArea.width, compArea.height
                   , isLeftAvail,  isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail
                   , isCalculatePreDeblockSamples
+                  , isCtuCrossedByVirtualBoundaries, horVirBndryPosComp, verVirBndryPosComp, numHorVirBndry, numVerVirBndry
                   );
       }
       ctuRsAddr++;
@@ -604,7 +613,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c
     m_CABACEstimator->resetBits();
     m_CABACEstimator->sao_offset_pars( modeParam[compIdx], compIdx, sliceEnabled[compIdx], bitDepths.recon[CHANNEL_TYPE_LUMA] );
     modeDist[compIdx] = 0;
-    minCost= m_lambda[compIdx]*(FracBitsScale*(double)m_CABACEstimator->getEstFracBits());
+    minCost           = m_lambda[compIdx] * (FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits());
     ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
     if(sliceEnabled[compIdx])
     {
@@ -626,7 +635,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c
         m_CABACEstimator->getCtx() = SAOCtx( ctxStartLuma );
         m_CABACEstimator->resetBits();
         m_CABACEstimator->sao_offset_pars( testOffset[compIdx], compIdx, sliceEnabled[compIdx], bitDepths.recon[CHANNEL_TYPE_LUMA] );
-        double rate = FracBitsScale*(double)m_CABACEstimator->getEstFracBits();
+        double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
         cost = (double)dist[compIdx] + m_lambda[compIdx]*rate;
         if(cost < minCost)
         {
@@ -653,7 +662,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c
     modeDist [component]         = 0;
     m_CABACEstimator->sao_offset_pars( modeParam[component], component, sliceEnabled[component], bitDepths.recon[CHANNEL_TYPE_CHROMA] );
     const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
-    cost += m_lambda[component] * FracBitsScale * double( currentFracBits - previousFracBits );
+    cost += m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits);
     previousFracBits = currentFracBits;
   }
 
@@ -686,7 +695,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c
       dist[component] = getDistortion(bitDepths.recon[CHANNEL_TYPE_CHROMA], typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][component][typeIdc]);
       m_CABACEstimator->sao_offset_pars( testOffset[component], component, sliceEnabled[component], bitDepths.recon[CHANNEL_TYPE_CHROMA] );
       const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
-      cost += dist[component] + (m_lambda[component] * FracBitsScale * double(currentFracBits - previousFracBits));
+      cost += dist[component] + (m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits));
       previousFracBits = currentFracBits;
     }
 
@@ -712,7 +721,7 @@ void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int c
   m_CABACEstimator->getCtx() = SAOCtx( ctxStartBlk );
   m_CABACEstimator->resetBits();
   m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
-  modeNormCost += FracBitsScale*(double)m_CABACEstimator->getEstFracBits();
+  modeNormCost += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
 }
 
 void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
@@ -755,7 +764,7 @@ void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int
     m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
     m_CABACEstimator->resetBits();
     m_CABACEstimator->sao_block_pars( testBlkParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
-    double rate = FracBitsScale*(double)m_CABACEstimator->getEstFracBits();
+    double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
     cost = normDist+rate;
 
     if(cost < modeNormCost)
@@ -776,11 +785,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
 #if ENABLE_QPA
                                                const double chromaWeight,
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
                                                const double saoEncodingRate, const double saoEncodingRateChroma, const bool isGreedymergeEncoding)
-#else
-                                               const double saoEncodingRate, const double saoEncodingRateChroma)
-#endif
 
 {
   const PreCalcValues& pcv = *cs.pcv;
@@ -799,7 +804,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
   SAOBlkParam modeParam;
   double minCost, modeCost;
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   double minCost2 = 0;
   std::vector<SAOStatData**> groupBlkStat;
   if (isGreedymergeEncoding)
@@ -824,13 +828,12 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
   double  Cost[2] = { 0, 0 };
   TempCtx ctxBeforeMerge(m_CtxCache);
   TempCtx ctxAfterMerge(m_CtxCache);
-#endif
 
   double totalCost = 0; // Used if bTestSAODisableAtPictureLevel==true
 
   int ctuRsAddr = 0;
 #if ENABLE_QPA
-  CHECK ((chromaWeight > 0.0) && (cs.slice->getSliceCurStartCtuTsAddr() != 0), "incompatible start CTU address, must be 0");
+  CHECK ((chromaWeight > 0.0) && (cs.slice->getFirstCtuRsAddrInSlice() != 0), "incompatible start CTU address, must be 0");
 #endif
 
   for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
@@ -850,12 +853,10 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
       const TempCtx  ctxStart ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
       TempCtx        ctxBest  ( m_CtxCache );
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
       if (ctuRsAddr == (mergeCtuAddr - 1))
       {
         ctxBeforeMerge = SAOCtx(m_CABACEstimator->getCtx());
       }
-#endif
 
       //get merge list
       SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES] = { NULL };
@@ -903,14 +904,10 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
         }
       } //mode
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
       if (!isGreedymergeEncoding)
       {
-#endif
       totalCost += minCost;
-#if K0238_SAO_GREEDY_MERGE_ENCODING
       }
-#endif
 
 
       m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
@@ -919,7 +916,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
       reconParams[ctuRsAddr] = codedParams[ctuRsAddr];
       reconstructBlkSAOParam(reconParams[ctuRsAddr], mergeList);
 
-#if K0238_SAO_GREEDY_MERGE_ENCODING
       if (isGreedymergeEncoding)
       {
         if (ctuRsAddr == (mergeCtuAddr - 1))
@@ -964,7 +960,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
           testBlkParam[COMPONENT_Y].typeIdc = SAO_MERGE_LEFT;
           m_CABACEstimator->resetBits();
           m_CABACEstimator->sao_block_pars(testBlkParam, cs.sps->getBitDepths(), sliceEnabled, true, false, true);
-          double rate = FracBitsScale * (double)m_CABACEstimator->getEstFracBits();
+          double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
           modeCost += rate * groupSize;
           if (modeCost < minCost2)
           {
@@ -1035,11 +1031,8 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
       }
       else
       {
-#endif
       offsetCTU(area, srcYuv, resYuv, reconParams[ctuRsAddr], cs);
-#if K0238_SAO_GREEDY_MERGE_ENCODING
       }
-#endif
 
       ctuRsAddr++;
     } //ctuRsAddr
@@ -1050,7 +1043,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
   if (chromaWeight > 0.0) memcpy (m_lambda, cs.slice->getLambdas(), sizeof (m_lambda));
 
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
   //reconstruct
   if (isGreedymergeEncoding)
   {
@@ -1079,7 +1071,6 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
     }
     groupBlkStat.clear();
   }
-#endif
   if (!allBlksDisabled && (totalCost >= 0) && bTestSAODisableAtPictureLevel) //SAO has not beneficial in this case - disable it
   {
     for( ctuRsAddr = 0; ctuRsAddr < pcv.sizeInCtus; ctuRsAddr++)
@@ -1135,6 +1126,7 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
                         , Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height
                         , bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail
                         , bool isCalculatePreDeblockSamples
+                        , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry
                         )
 {
   int x,y, startX, startY, endX, endY, edgeType, firstLineStartX, firstLineEndX;
@@ -1172,6 +1164,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
           for (x=startX; x<endX; x++)
           {
             signRight =  (int8_t)sgn(srcLine[x] - srcLine[x+1]);
+            if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos))
+            {
+              signLeft = -signRight;
+              continue;
+            }
             edgeType  =  signRight + signLeft;
             signLeft  = -signRight;
 
@@ -1194,6 +1191,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
               for (x=startX; x<endX; x++)
               {
                 signRight =  (int8_t)sgn(srcLine[x] - srcLine[x+1]);
+                if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, endY + y, numVerVirBndry, 0, verVirBndryPos, horVirBndryPos))
+                {
+                  signLeft = -signRight;
+                  continue;
+                }
                 edgeType  =  signRight + signLeft;
                 signLeft  = -signRight;
 
@@ -1241,6 +1243,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
           for (x=startX; x<endX; x++)
           {
             signDown  = (int8_t)sgn(srcLine[x] - srcLineBelow[x]);
+            if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+            {
+              signUpLine[x] = -signDown;
+              continue;
+            }
             edgeType  = signDown + signUpLine[x];
             signUpLine[x]= -signDown;
 
@@ -1264,6 +1271,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
 
               for (x=startX; x<endX; x++)
               {
+                if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y + endY, 0, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+                {
+                  continue;
+                }
                 edgeType = sgn(srcLine[x] - srcLineBelow[x]) + sgn(srcLine[x] - srcLineAbove[x]);
                 diff [edgeType] += (orgLine[x] - srcLine[x]);
                 count[edgeType] ++;
@@ -1307,6 +1318,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
         firstLineEndX   = (!isCalculatePreDeblockSamples) ? (isAboveAvail     ? endX : 1) : endX;
         for(x=firstLineStartX; x<firstLineEndX; x++)
         {
+          if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+          {
+            continue;
+          }
           edgeType = sgn(srcLine[x] - srcLineAbove[x-1]) - signUpLine[x+1];
           diff [edgeType] += (orgLine[x] - srcLine[x]);
           count[edgeType] ++;
@@ -1323,6 +1338,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
           for (x=startX; x<endX; x++)
           {
             signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x+1]);
+            if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+            {
+              signDownLine[x + 1] = -signDown;
+              continue;
+            }
             edgeType = signDown + signUpLine[x];
             diff [edgeType] += (orgLine[x] - srcLine[x]);
             count[edgeType] ++;
@@ -1352,6 +1372,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
 
               for (x=startX; x< endX; x++)
               {
+                if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y + endY, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+                {
+                  continue;
+                }
                 edgeType = sgn(srcLine[x] - srcLineBelow[x+1]) + sgn(srcLine[x] - srcLineAbove[x-1]);
                 diff [edgeType] += (orgLine[x] - srcLine[x]);
                 count[edgeType] ++;
@@ -1395,6 +1419,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
                                                           ;
         for(x=firstLineStartX; x<firstLineEndX; x++)
         {
+          if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, 0, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+          {
+            continue;
+          }
           edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) - signUpLine[x-1];
           diff [edgeType] += (orgLine[x] - srcLine[x]);
           count[edgeType] ++;
@@ -1411,6 +1439,11 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
           for(x=startX; x<endX; x++)
           {
             signDown = (int8_t)sgn(srcLine[x] - srcLineBelow[x-1]);
+            if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+            {
+              signUpLine[x - 1] = -signDown;
+              continue;
+            }
             edgeType = signDown + signUpLine[x];
 
             diff [edgeType] += (orgLine[x] - srcLine[x]);
@@ -1436,6 +1469,10 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
 
               for (x=startX; x<endX; x++)
               {
+                if (isCtuCrossedByVirtualBoundaries && isProcessDisabled(x, y + endY, numVerVirBndry, numHorVirBndry, verVirBndryPos, horVirBndryPos))
+                {
+                  continue;
+                }
                 edgeType = sgn(srcLine[x] - srcLineBelow[x-1]) + sgn(srcLine[x] - srcLineAbove[x+1]);
                 diff [edgeType] += (orgLine[x] - srcLine[x]);
                 count[edgeType] ++;
@@ -1503,9 +1540,8 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
 
 void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const
 {
-#if HEVC_TILES_WPP
+  bool isLoopFiltAcrossSlicePPS = cs.pps->getLoopFilterAcrossSlicesEnabledFlag();
   bool isLoopFiltAcrossTilePPS = cs.pps->getLoopFilterAcrossTilesEnabledFlag();
-#endif
 
   const int width = cs.pcv->maxCUWidth;
   const int height = cs.pcv->maxCUHeight;
@@ -1514,20 +1550,25 @@ void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructu
   const CodingUnit* cuAbove = cs.getCU(pos.offset(0, -height), CH_L);
   const CodingUnit* cuAboveLeft = cs.getCU(pos.offset(-width, -height), CH_L);
 
+  if (!isLoopFiltAcrossSlicePPS)
   {
-    isLeftAvail      = (cuLeft != NULL)      ? ( !CU::isSameSlice(*cuCurr, *cuLeft)      ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false;
-    isAboveAvail     = (cuAbove != NULL)     ? ( !CU::isSameSlice(*cuCurr, *cuAbove)     ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false;
-    isAboveLeftAvail = (cuAboveLeft != NULL) ? ( !CU::isSameSlice(*cuCurr, *cuAboveLeft) ? cuCurr->slice->getLFCrossSliceBoundaryFlag() : true ) : false;
+    isLeftAvail      = (cuLeft == NULL)      ? false : CU::isSameTile(*cuCurr, *cuLeft);
+    isAboveAvail     = (cuAbove == NULL)     ? false : CU::isSameTile(*cuCurr, *cuAbove);
+    isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft);
+  }
+  else 
+  {
+    isLeftAvail      = (cuLeft != NULL);
+    isAboveAvail     = (cuAbove != NULL);
+    isAboveLeftAvail = (cuAboveLeft != NULL);
   }
 
-#if HEVC_TILES_WPP
   if (!isLoopFiltAcrossTilePPS)
   {
     isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameTile(*cuCurr, *cuLeft);
     isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameTile(*cuCurr, *cuAbove);
     isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft);
   }
-#endif
 }
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
index 048f63db9e304a53d339d06a12201a94b219bd4f..8a0530ec791d63d711cbb65ac10b94b13e93cc42 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -93,11 +93,7 @@ public:
 #if ENABLE_QPA
                    const double lambdaChromaWeight,
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
                    const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding );
-#else
-                   const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed );
-#endif
 
   void disabledRate( CodingStructure& cs, SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma );
   void getPreDBFStatistics(CodingStructure& cs);
@@ -110,12 +106,10 @@ private: //methods
 #if ENABLE_QPA
                         const double chromaWeight,
 #endif
-#if K0238_SAO_GREEDY_MERGE_ENCODING
                         const double saoEncodingRate, const double saoEncodingRateChroma, const bool isGreedymergeEncoding );
-#else
-                        const double saoEncodingRate, const double saoEncodingRateChroma );
-#endif
-  void getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes, Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height, bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isCalculatePreDeblockSamples);
+  void getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes, Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height, bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isCalculatePreDeblockSamples
+                 , bool isCtuCrossedByVirtualBoundaries, int horVirBndryPos[], int verVirBndryPos[], int numHorVirBndry, int numVerVirBndry
+    );
   void deriveModeNewRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost );
   void deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost );
   int64_t getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* offsetVal, SAOStatData& statData);
@@ -128,7 +122,6 @@ private: //members
   CABACWriter*           m_CABACEstimator;
   CtxCache*              m_CtxCache;
   double                 m_lambda[MAX_NUM_COMPONENT];
-  const double           FracBitsScale = 1.0 / double( 1 << SCALE_BITS );
 
   //statistics
   std::vector<SAOStatData**>         m_statData; //[ctu][comp][classes]
diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp
index 82a08fa1206a03c52f5cfc86d14083ac13faa76d..ab194d148c76b8c169a871d9d2cf7335eb4fe844 100644
--- a/source/Lib/EncoderLib/EncSlice.cpp
+++ b/source/Lib/EncoderLib/EncSlice.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,10 +44,6 @@
 #include "CommonLib/dtrace_blockstatistics.h"
 #endif
 
-#if ENABLE_WPP_PARALLELISM
-#include <mutex>
-extern recursive_mutex g_cache_mutex;
-#endif
 
 #include <math.h>
 
@@ -107,6 +103,8 @@ void EncSlice::init( EncLib* pcEncLib, const SPS& sps )
 void
 EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP)
 {
+  m_pcRdCost->resetStore();
+  m_pcTrQuant->resetStore();
   // store lambda
   m_pcRdCost ->setLambda( dLambda, slice->getSPS()->getBitDepths() );
 
@@ -117,19 +115,17 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP)
   {
     const ComponentID compID = ComponentID( compIdx );
     int chromaQPOffset       = slice->getPPS()->getQpOffset( compID ) + slice->getSliceChromaQpDelta( compID );
-    int qpc                  = ( iQP + chromaQPOffset < 0 ) ? iQP : getScaledChromaQP( iQP + chromaQPOffset, m_pcCfg->getChromaFormatIdc() );
+    int qpc = slice->getSPS()->getMappedChromaQpValue(compID, iQP) + chromaQPOffset;
     double tmpWeight         = pow( 2.0, ( iQP - qpc ) / 3.0 );  // takes into account of the chroma qp mapping and chroma qp Offset
-    if( m_pcCfg->getDepQuantEnabledFlag() )
+#if JVET_Q0433_MODIFIED_CHROMA_DIST_WEIGHT
+    if( m_pcCfg->getDepQuantEnabledFlag()  )
+#else
+    if( m_pcCfg->getDepQuantEnabledFlag() && !( m_pcCfg->getLFNST() ) )
+#endif
     {
       tmpWeight *= ( m_pcCfg->getGOPSize() >= 8 ? pow( 2.0, 0.1/3.0 ) : pow( 2.0, 0.2/3.0 ) );  // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
     }
     m_pcRdCost->setDistortionWeight( compID, tmpWeight );
-#if ENABLE_WPP_PARALLELISM
-    for( int jId = 1; jId < ( m_pcLib->getNumWppThreads() + m_pcLib->getNumWppExtraLines() ); jId++ )
-    {
-      m_pcLib->getRdCost( slice->getPic()->scheduler.getWppDataId( jId ) )->setDistortionWeight( compID, tmpWeight );
-    }
-#endif
     dLambdas[compIdx] = dLambda / tmpWeight;
   }
 
@@ -199,7 +195,7 @@ static double getAveragePictureEnergy (const CPelBuf picOrig, const uint32_t uBi
 }
 #endif
 
-static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, const uint32_t startAddr, const uint32_t boundingAddr,
+static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, Slice* const pcSlice,
                                     const int bitDepth,   uint32_t &avgLumaValue)
 {
   const PreCalcValues& pcv  = *pcPic->cs->pcv;
@@ -208,25 +204,20 @@ static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, con
   const uint32_t chrHeight  = pcv.maxCUHeight >> getChannelTypeScaleY (CH_C, chrFmt);
   const int      midLevel   = 1 << (bitDepth - 1);
   int chrValue = MAX_INT;
-  avgLumaValue = (startAddr < boundingAddr) ? 0 : (uint32_t)pcPic->getOrigBuf().Y().computeAvg();
+  avgLumaValue = (pcSlice != nullptr) ? 0 : (uint32_t)pcPic->getOrigBuf().Y().computeAvg();
 
   if (ctuAddr >= 0) // luma
   {
     avgLumaValue = (uint32_t)pcPic->m_iOffsetCtu[ctuAddr];
   }
-  else if (startAddr < boundingAddr)
+  else if (pcSlice != nullptr)
   {
-    for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
+    for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++)
     {
-#if HEVC_TILES_WPP
-      const uint32_t ctuRsAddr = pcPic->tileMap->getCtuTsToRsAddrMap (ctuTsAddr);
-#else
-      const uint32_t ctuRsAddr = ctuTsAddr;
-#endif
-
+      uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
       avgLumaValue += pcPic->m_iOffsetCtu[ctuRsAddr];
     }
-    avgLumaValue = (avgLumaValue + ((boundingAddr - startAddr) >> 1)) / (boundingAddr - startAddr);
+    avgLumaValue = (avgLumaValue + (pcSlice->getNumCtuInSlice() >> 1)) / pcSlice->getNumCtuInSlice();
   }
 
   for (uint32_t comp = COMPONENT_Cb; comp < MAX_NUM_COMPONENT; comp++)
@@ -280,7 +271,7 @@ static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice,
         int     averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP); // mean slice QP
 #endif
 
-        averageAdaptedLumaQP += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, 0 /*startAddr*/, 0 /*boundingAddr*/, bitDepth, meanLuma);
+        averageAdaptedLumaQP += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, nullptr /*pcSlice*/, bitDepth, meanLuma);
 
         if (averageAdaptedLumaQP > MAX_QP
 #if SHARP_LUMA_DELTA_QP
@@ -301,7 +292,7 @@ static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice,
         savedLumaQP = averageAdaptedLumaQP;
       } // savedLumaQP < 0
 
-      const int lumaChromaMappingDQP = savedLumaQP - getScaledChromaQP (savedLumaQP, pcEncCfg->getChromaFormatIdc());
+      const int lumaChromaMappingDQP = savedLumaQP - pcSlice->getSPS()->getMappedChromaQpValue(compID, savedLumaQP);
 
       optSliceChromaQpOffset[comp-1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP);
     }
@@ -334,25 +325,24 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
 {
   double dQP;
   double dLambda;
+  PicHeader *picHeader = pcPic->cs->picHeader;
+  pcPic->cs->resetPrevPLT(pcPic->cs->prevPLT);
 
   rpcSlice = pcPic->slices[0];
   rpcSlice->setSliceBits(0);
   rpcSlice->setPic( pcPic );
+  rpcSlice->setPicHeader( picHeader );
   rpcSlice->initSlice();
   int multipleFactor = m_pcCfg->getUseCompositeRef() ? 2 : 1;
   if (m_pcCfg->getUseCompositeRef() && isEncodeLtRef)
   {
-    rpcSlice->setPicOutputFlag(false);
+    picHeader->setPicOutputFlag(false);
   }
   else
   {
-    rpcSlice->setPicOutputFlag(true);
+    picHeader->setPicOutputFlag(true);
   }
   rpcSlice->setPOC( pocCurr );
-  rpcSlice->setDepQuantEnabledFlag( m_pcCfg->getDepQuantEnabledFlag() );
-#if HEVC_USE_SIGN_HIDING
-  rpcSlice->setSignDataHidingEnabledFlag( m_pcCfg->getSignDataHidingEnabledFlag() );
-#endif
 
 #if SHARP_LUMA_DELTA_QP
   pcPic->fieldPic = isField;
@@ -420,22 +410,13 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     }
   }
 
+  rpcSlice->setDepth        ( depth );
   rpcSlice->setSliceType    ( eSliceType );
 
   // ------------------------------------------------------------------------------------------------------------------
   // Non-referenced frame marking
   // ------------------------------------------------------------------------------------------------------------------
 
-#if !JVET_M0101_HLS
-  if(pocLast == 0)
-  {
-    rpcSlice->setTemporalLayerNonReferenceFlag(false);
-  }
-  else
-  {
-    rpcSlice->setTemporalLayerNonReferenceFlag(!m_pcCfg->getGOPEntry(iGOPid).m_refPic);
-  }
-#endif
   pcPic->referenced = true;
 
   // ------------------------------------------------------------------------------------------------------------------
@@ -448,11 +429,6 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
   dQP = m_pcCfg->getBaseQP();
   if(eSliceType!=I_SLICE)
   {
-#if SHARP_LUMA_DELTA_QP
-    if (!(( m_pcCfg->getMaxDeltaQP() == 0) && (!m_pcCfg->getLumaLevelToDeltaQPMapping().isEnabled()) && (dQP == -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA) ) && (rpcSlice->getPPS()->getTransquantBypassEnabledFlag())))
-#else
-    if (!(( m_pcCfg->getMaxDeltaQP() == 0 ) && (dQP == -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA) ) && (rpcSlice->getPPS()->getTransquantBypassEnabledFlag())))
-#endif
     {
       dQP += m_pcCfg->getGOPEntry(iGOPid).m_QPOffset;
     }
@@ -490,89 +466,12 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
   {
     // compute QP value
     dQP = dOrigQP + ((iDQpIdx+1)>>1)*(iDQpIdx%2 ? -1 : 1);
-#if SHARP_LUMA_DELTA_QP
-    dLambda = calculateLambda(rpcSlice, iGOPid, depth, dQP, dQP, iQP );
-#else
     // compute lambda value
-    int    NumberBFrames = ( m_pcCfg->getGOPSize() - 1 );
-    int    SHIFT_QP = 12;
-
-    int    bitdepth_luma_qp_scale =
-      6
-      * (rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
-         - DISTORTION_PRECISION_ADJUSTMENT(rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
-    double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP;
-#if FULL_NBIT
-    double qp_temp_orig = (double) dQP - SHIFT_QP;
-#endif
-    // Case #1: I or P-slices (key-frame)
-    double dQPFactor = m_pcCfg->getGOPEntry(iGOPid).m_QPFactor;
-    if ( eSliceType==I_SLICE )
-    {
-      if (m_pcCfg->getIntraQpFactor()>=0.0 && m_pcCfg->getGOPEntry(iGOPid).m_sliceType != I_SLICE)
-      {
-        dQPFactor=m_pcCfg->getIntraQpFactor();
-      }
-      else
-      {
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-        if(m_pcCfg->getLambdaFromQPEnable())
-        {
-          dQPFactor=0.57;
-        }
-        else
-        {
-#endif
-        double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? NumberBFrames/2 : NumberBFrames) );
-
-        dQPFactor=0.57*dLambda_scale;
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-        }
-#endif
-      }
-    }
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-    else if( m_pcCfg->getLambdaFromQPEnable() )
-    {
-      dQPFactor=0.57;
-    }
-#endif
-
-    dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
-
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-    if(!m_pcCfg->getLambdaFromQPEnable() && depth>0)
-#else
-    if ( depth>0 )
-#endif
-    {
-#if FULL_NBIT
-        dLambda *= Clip3( 2.00, 4.00, (qp_temp_orig / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 )
+#if SHARP_LUMA_DELTA_QP
+    dLambda = calculateLambda (rpcSlice, iGOPid, dQP, dQP, iQP);
 #else
-        dLambda *= Clip3( 2.00, 4.00, (qp_temp / 6.0) ); // (j == B_SLICE && p_cur_frm->layer != 0 )
-#endif
-    }
-
-    // if hadamard is used in ME process
-    if ( !m_pcCfg->getUseHADME() && rpcSlice->getSliceType( ) != I_SLICE )
-    {
-      dLambda *= 0.95;
-    }
-
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-    double lambdaModifier;
-    if( rpcSlice->getSliceType( ) != I_SLICE || intraLambdaModifiers.empty())
-    {
-      lambdaModifier = m_pcCfg->getLambdaModifier( temporalId );
-    }
-    else
-    {
-      lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
-    }
-    dLambda *= lambdaModifier;
-#endif
-
-    iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
+    dLambda = initializeLambda (rpcSlice, iGOPid, int (dQP + 0.5), dQP);
+    iQP = Clip3 (-rpcSlice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, int (dQP + 0.5));
 #endif
 
     m_vdRdPicLambda[iDQpIdx] = dLambda;
@@ -605,6 +504,7 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     const bool bUseIntraOrPeriodicOffset = (rpcSlice->isIntra() && !rpcSlice->getSPS()->getIBCFlag()) || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0);
     int cbQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset;
     int crQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true)  : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset;
+    int cbCrQP = (cbQP + crQP) >> 1; // use floor of average chroma QP offset for joint-Cb/Cr coding
 
     cbQP = Clip3( -12, 12, cbQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb);
     crQP = Clip3( -12, 12, crQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr);
@@ -612,11 +512,17 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     CHECK(!(rpcSlice->getSliceChromaQpDelta(COMPONENT_Cb)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb)<=12 && rpcSlice->getSliceChromaQpDelta(COMPONENT_Cb)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb)>=-12), "Unspecified error");
     rpcSlice->setSliceChromaQpDelta(COMPONENT_Cr, Clip3( -12, 12, crQP));
     CHECK(!(rpcSlice->getSliceChromaQpDelta(COMPONENT_Cr)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr)<=12 && rpcSlice->getSliceChromaQpDelta(COMPONENT_Cr)+rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr)>=-12), "Unspecified error");
+    if (rpcSlice->getSPS()->getJointCbCrEnabledFlag())
+    {
+      cbCrQP = Clip3(-12, 12, cbCrQP + rpcSlice->getPPS()->getQpOffset(JOINT_CbCr)) - rpcSlice->getPPS()->getQpOffset(JOINT_CbCr);
+      rpcSlice->setSliceChromaQpDelta(JOINT_CbCr, Clip3( -12, 12, cbCrQP ));
+    }
   }
   else
   {
     rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 );
     rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 );
+    rpcSlice->setSliceChromaQpDelta( JOINT_CbCr, 0 );
   }
 #endif
 
@@ -634,6 +540,9 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
   dLambda *= lambdaModifier;
 #endif
 
+#if RDOQ_CHROMA_LAMBDA
+  m_pcRdCost->setDistortionWeight (COMPONENT_Y, 1.0); // no chroma weighting for luma
+#endif
   setUpLambda(rpcSlice, dLambda, iQP);
 
 #if WCG_EXT
@@ -675,10 +584,11 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
 #if !W0038_CQP_ADJ
   rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 );
   rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 );
+  rpcSlice->setSliceChromaQpDelta( JOINT_CbCr,   0 );
 #endif
-  rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag() );
-  rpcSlice->setNumRefIdx(REF_PIC_LIST_0,m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive);
-  rpcSlice->setNumRefIdx(REF_PIC_LIST_1,m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive);
+  rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getCuChromaQpOffsetEnabledFlag() );
+  rpcSlice->setNumRefIdx(REF_PIC_LIST_0, m_pcCfg->getRPLEntry(0, iGOPid).m_numRefPicsActive);
+  rpcSlice->setNumRefIdx(REF_PIC_LIST_1, m_pcCfg->getRPLEntry(1, iGOPid).m_numRefPicsActive);
 
   if ( m_pcCfg->getDeblockingFilterMetric() )
   {
@@ -713,8 +623,6 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     rpcSlice->setDeblockingFilterTcOffsetDiv2( 0 );
   }
 
-  rpcSlice->setDepth            ( depth );
-
   pcPic->layer =  temporalId;
   if(eSliceType==I_SLICE)
   {
@@ -722,114 +630,93 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
   }
   rpcSlice->setTLayer( pcPic->layer );
 
-  rpcSlice->setSliceMode            ( m_pcCfg->getSliceMode()            );
-  rpcSlice->setSliceArgument        ( m_pcCfg->getSliceArgument()        );
-#if HEVC_DEPENDENT_SLICES
-  rpcSlice->setSliceSegmentMode     ( m_pcCfg->getSliceSegmentMode()     );
-  rpcSlice->setSliceSegmentArgument ( m_pcCfg->getSliceSegmentArgument() );
-#endif
-  rpcSlice->setMaxNumMergeCand      ( m_pcCfg->getMaxNumMergeCand()      );
-  rpcSlice->setMaxNumAffineMergeCand( m_pcCfg->getMaxNumAffineMergeCand() );
-  rpcSlice->setSplitConsOverrideFlag(false);
-  rpcSlice->setMinQTSize( rpcSlice->getSPS()->getMinQTSize(eSliceType));
-  rpcSlice->setMaxBTDepth( rpcSlice->isIntra() ? rpcSlice->getSPS()->getMaxBTDepthI() : rpcSlice->getSPS()->getMaxBTDepth() );
-  rpcSlice->setMaxBTSize( rpcSlice->isIntra() ? rpcSlice->getSPS()->getMaxBTSizeI() : rpcSlice->getSPS()->getMaxBTSize() );
-  rpcSlice->setMaxTTSize( rpcSlice->isIntra() ? rpcSlice->getSPS()->getMaxTTSizeI() : rpcSlice->getSPS()->getMaxTTSize() );
-  if ( eSliceType == I_SLICE && rpcSlice->getSPS()->getUseDualITree() )
+  rpcSlice->setDisableSATDForRD(false);
+
+  if( ( m_pcCfg->getIBCHashSearch() && m_pcCfg->getIBCMode() ) || m_pcCfg->getAllowDisFracMMVD() )
   {
-    rpcSlice->setMinQTSizeIChroma( rpcSlice->getSPS()->getMinQTSize(eSliceType, CHANNEL_TYPE_CHROMA) );
-    rpcSlice->setMaxBTDepthIChroma( rpcSlice->getSPS()->getMaxBTDepthIChroma() );
-    rpcSlice->setMaxBTSizeIChroma( rpcSlice->getSPS()->getMaxBTSizeIChroma() );
-    rpcSlice->setMaxTTSizeIChroma( rpcSlice->getSPS()->getMaxTTSizeIChroma() );
+    m_pcCuEncoder->getIbcHashMap().destroy();
+    m_pcCuEncoder->getIbcHashMap().init( pcPic->cs->pps->getPicWidthInLumaSamples(), pcPic->cs->pps->getPicHeightInLumaSamples() );
   }
 }
 
-
-#if SHARP_LUMA_DELTA_QP
-double EncSlice::calculateLambda( const Slice*     slice,
-                                  const int        GOPid, // entry in the GOP table
-                                  const int        depth, // slice GOP hierarchical depth.
-                                  const double     refQP, // initial slice-level QP
-                                  const double     dQP,   // initial double-precision QP
-                                        int       &iQP )  // returned integer QP.
+double EncSlice::initializeLambda(const Slice* slice, const int GOPid, const int refQP, const double dQP)
 {
-  enum   SliceType eSliceType    = slice->getSliceType();
-  const  bool      isField       = slice->getPic()->fieldPic;
-  const  int       NumberBFrames = ( m_pcCfg->getGOPSize() - 1 );
-  const  int       SHIFT_QP      = 12;
+  const int   bitDepthLuma  = slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+  const int   bitDepthShift = 6 * (bitDepthLuma - 8 - DISTORTION_PRECISION_ADJUSTMENT(bitDepthLuma)) - 12;
+  const int   numberBFrames = m_pcCfg->getGOPSize() - 1;
+  const SliceType sliceType = slice->getSliceType();
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
-  const int temporalId=m_pcCfg->getGOPEntry(GOPid).m_temporalId;
-  const std::vector<double> &intraLambdaModifiers=m_pcCfg->getIntraLambdaModifier();
+  const int      temporalId = m_pcCfg->getGOPEntry(GOPid).m_temporalId;
+  const std::vector<double> &intraLambdaModifiers = m_pcCfg->getIntraLambdaModifier();
 #endif
-
-  int bitdepth_luma_qp_scale = 6
-                               * (slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8
-                                  - DISTORTION_PRECISION_ADJUSTMENT(slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)));
-  double qp_temp = dQP + bitdepth_luma_qp_scale - SHIFT_QP;
-  // Case #1: I or P-slices (key-frame)
+  // case #1: I or P slices (key-frame)
   double dQPFactor = m_pcCfg->getGOPEntry(GOPid).m_QPFactor;
-  if ( eSliceType==I_SLICE )
+  double dLambda, lambdaModifier;
+
+  if (sliceType == I_SLICE)
   {
-    if (m_pcCfg->getIntraQpFactor()>=0.0 && m_pcCfg->getGOPEntry(GOPid).m_sliceType != I_SLICE)
+    if ((m_pcCfg->getIntraQpFactor() >= 0.0) && (m_pcCfg->getGOPEntry(GOPid).m_sliceType != I_SLICE))
     {
-      dQPFactor=m_pcCfg->getIntraQpFactor();
+      dQPFactor = m_pcCfg->getIntraQpFactor();
     }
     else
     {
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
-      if(m_pcCfg->getLambdaFromQPEnable())
+      if (m_pcCfg->getLambdaFromQPEnable())
       {
-        dQPFactor=0.57;
+        dQPFactor = 0.57;
       }
       else
-      {
-#endif
-        double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05*(double)(isField ? NumberBFrames/2 : NumberBFrames) );
-        dQPFactor=0.57*dLambda_scale;
-#if X0038_LAMBDA_FROM_QP_CAPABILITY
-      }
 #endif
+      dQPFactor = 0.57 * (1.0 - Clip3(0.0, 0.5, 0.05 * double (slice->getPic()->fieldPic ? numberBFrames >> 1 : numberBFrames)));
     }
   }
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
-  else if( m_pcCfg->getLambdaFromQPEnable() )
+  else if (m_pcCfg->getLambdaFromQPEnable())
   {
-    dQPFactor=0.57;
+    dQPFactor = 0.57;
   }
 #endif
 
-  double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
+  dLambda = dQPFactor * pow(2.0, (dQP + bitDepthShift) / 3.0);
 
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
-  if( !(m_pcCfg->getLambdaFromQPEnable()) && depth>0 )
+  if (slice->getDepth() > 0 && !m_pcCfg->getLambdaFromQPEnable())
 #else
-  if ( depth>0 )
+  if (slice->getDepth() > 0)
 #endif
   {
-    double qp_temp_ref = refQP + bitdepth_luma_qp_scale - SHIFT_QP;
-    dLambda *= Clip3(2.00, 4.00, (qp_temp_ref / 6.0));   // (j == B_SLICE && p_cur_frm->layer != 0 )
+    dLambda *= Clip3(2.0, 4.0, ((refQP + bitDepthShift) / 6.0));
   }
-
-  // if hadamard is used in ME process
-  if ( !m_pcCfg->getUseHADME() && slice->getSliceType( ) != I_SLICE )
+  // if Hadamard is used in motion estimation process
+  if (!m_pcCfg->getUseHADME() && (sliceType != I_SLICE))
   {
     dLambda *= 0.95;
   }
-
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
-  double lambdaModifier;
-  if( eSliceType != I_SLICE || intraLambdaModifiers.empty())
+  if ((sliceType != I_SLICE) || intraLambdaModifiers.empty())
   {
-    lambdaModifier = m_pcCfg->getLambdaModifier( temporalId );
+    lambdaModifier = m_pcCfg->getLambdaModifier(temporalId);
   }
   else
   {
-    lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
+    lambdaModifier = intraLambdaModifiers[temporalId < intraLambdaModifiers.size() ? temporalId : intraLambdaModifiers.size() - 1];
   }
   dLambda *= lambdaModifier;
 #endif
 
-  iQP = Clip3( -slice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
+  return dLambda;
+}
+
+#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
+double EncSlice::calculateLambda( const Slice*     slice,
+                                  const int        GOPid, // entry in the GOP table
+                                  const double     refQP, // initial slice-level QP
+                                  const double     dQP,   // initial double-precision QP
+                                        int       &iQP )  // returned integer QP.
+{
+  double dLambda = initializeLambda (slice, GOPid, int (refQP + 0.5), dQP);
+  iQP = Clip3 (-slice->getSPS()->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, int (dQP + 0.5));
 
   if( m_pcCfg->getDepQuantEnabledFlag() )
   {
@@ -847,19 +734,22 @@ void EncSlice::resetQP( Picture* pic, int sliceQP, double lambda )
 
   // store lambda
   slice->setSliceQp( sliceQP );
+#if RDOQ_CHROMA_LAMBDA
+  m_pcRdCost->setDistortionWeight (COMPONENT_Y, 1.0); // no chroma weighting for luma
+#endif
   setUpLambda(slice, lambda, sliceQP);
+#if WCG_EXT
+  m_pcRdCost->saveUnadjustedLambda();
+#endif
 }
 
 #if ENABLE_QPA
 static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,        const PreCalcValues& pcv,
-                               const uint32_t startAddr,   const uint32_t boundingAddr, const bool useSharpLumaDQP,
+                               const bool useSharpLumaDQP,
                                const bool useFrameWiseQPA, const int previouslyAdaptedLumaQP = -1)
 {
   const int  bitDepth    = pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA);
   const int  iQPIndex    = pcSlice->getSliceQp(); // initial QP index for current slice, used in following loops
-#if HEVC_TILES_WPP
-  const TileMap& tileMap = *pcPic->tileMap;
-#endif
   bool   sliceQPModified = false;
   uint32_t   meanLuma    = MAX_UINT;
   double     hpEnerAvg   = 0.0;
@@ -868,13 +758,9 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
   if (!useFrameWiseQPA || previouslyAdaptedLumaQP < 0)  // mean visual activity value and luma value in each CTU
 #endif
   {
-    for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
+    for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++)
     {
-#if HEVC_TILES_WPP
-      const uint32_t ctuRsAddr  = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
-#else
-      const uint32_t ctuRsAddr  = ctuTsAddr;
-#endif
+      uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
       const Position pos ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight);
       const CompArea ctuArea    = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y());
       const CompArea fltArea    = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area (pos.x > 0 ? pos.x - 1 : 0, pos.y > 0 ? pos.y - 1 : 0, pcv.maxCUWidth + (pos.x > 0 ? 2 : 1), pcv.maxCUHeight + (pos.y > 0 ? 2 : 1))), pcPic->Y());
@@ -888,7 +774,7 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
       pcPic->m_iOffsetCtu[ctuRsAddr] = pcPic->getOrigBuf (ctuArea).computeAvg();
     }
 
-    hpEnerAvg /= double (boundingAddr - startAddr);
+    hpEnerAvg /= double (pcSlice->getNumCtuInSlice());
   }
 #if GLOBAL_AVERAGING
   const double hpEnerPic = 1.0 / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), bitDepth);  // inverse, speed
@@ -902,7 +788,7 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
 
     if (isChromaEnabled (pcPic->chromaFormat) && (iQPIndex < MAX_QP) && (previouslyAdaptedLumaQP < 0))
     {
-      iQPFixed += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, startAddr, boundingAddr, bitDepth, meanLuma);
+      iQPFixed += getGlaringColorQPOffset (pcPic, -1 /*ctuRsAddr*/, pcSlice, bitDepth, meanLuma);
 
       if (iQPFixed > MAX_QP
 #if SHARP_LUMA_DELTA_QP
@@ -919,17 +805,13 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
       {
         meanLuma = 0;
 
-        for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
+        for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++)
         {
- #if HEVC_TILES_WPP
-          const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
- #else
-          const uint32_t ctuRsAddr = ctuTsAddr;
- #endif
+          uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
 
           meanLuma += pcPic->m_iOffsetCtu[ctuRsAddr];  // CTU mean
         }
-        meanLuma = (meanLuma + ((boundingAddr - startAddr) >> 1)) / (boundingAddr - startAddr);
+        meanLuma = (meanLuma + (pcSlice->getNumCtuInSlice() >> 1)) / pcSlice->getNumCtuInSlice();
       }
       iQPFixed = Clip3 (0, MAX_QP, iQPFixed + lumaDQPOffset (meanLuma, bitDepth));
     }
@@ -951,26 +833,18 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
       sliceQPModified = true;
     }
 
-    for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
+    for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++)
     {
-#if HEVC_TILES_WPP
-      const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
-#else
-      const uint32_t ctuRsAddr = ctuTsAddr;
-#endif
+      uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
 
       pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPFixed; // fixed QPs
     }
   }
   else // CTU-wise QPA
   {
-    for (uint32_t ctuTsAddr = startAddr; ctuTsAddr < boundingAddr; ctuTsAddr++)
+    for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++)
     {
-#if HEVC_TILES_WPP
-      const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
-#else
-      const uint32_t ctuRsAddr = ctuTsAddr;
-#endif
+      uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
 
       int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic));
 
@@ -980,7 +854,7 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
 
         if (isChromaEnabled (pcPic->chromaFormat))
         {
-          iQPAdapt += getGlaringColorQPOffset (pcPic, (int)ctuRsAddr, startAddr, boundingAddr, bitDepth, meanLuma);
+          iQPAdapt += getGlaringColorQPOffset (pcPic, (int)ctuRsAddr, nullptr, bitDepth, meanLuma);
 
           if (iQPAdapt > MAX_QP
 #if SHARP_LUMA_DELTA_QP
@@ -1001,7 +875,7 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
         }
 
 #endif
-        const uint32_t uRefScale  = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + bitDepth - 4);
+        const uint32_t uRefScale  = g_invQuantScales[0][iQPAdapt % 6] << ((iQPAdapt / 6) + bitDepth - 4);
         const CompArea subArea    = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y());
         const Pel*     pSrc       = pcPic->getOrigBuf (subArea).buf;
         const SizeType iSrcStride = pcPic->getOrigBuf (subArea).stride;
@@ -1041,7 +915,7 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
       pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt; // adapted QPs
 
 #if ENABLE_QPA_SUB_CTU
-      if (pcv.widthInCtus > 1 && pcSlice->getPPS()->getCuQpDeltaSubdiv() == 0)  // reduce local DQP rate peaks
+      if (pcv.widthInCtus > 1 && pcSlice->getCuQpDeltaSubdiv() == 0)  // reduce local DQP rate peaks
 #elif ENABLE_QPA_SUB_CTU
       if (pcv.widthInCtus > 1 && pcSlice->getPPS()->getMaxCuDQPDepth() == 0)  // reduce local DQP rate peaks
 #else
@@ -1065,7 +939,7 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
         {
           pcPic->m_iOffsetCtu[ctuRsAddr - 1] = (Pel)iQPAdapt;
         }
-        if ((ctuTsAddr == boundingAddr - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice
+        if ((ctuIdx == pcSlice->getNumCtuInSlice() - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice
         {
           iQPAdapt = std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]);
           if (pcPic->m_iOffsetCtu[ctuRsAddr] < (Pel)iQPAdapt)
@@ -1088,7 +962,7 @@ static int applyQPAdaptationSubCtu (CodingStructure &cs, const UnitArea ctuArea,
   const int       bitDepth = cs.slice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA); // overall image bit-depth
   const int   adaptedCtuQP = pcPic ? pcPic->m_iOffsetCtu[ctuAddr] : cs.slice->getSliceQpBase();
 
-  if (!pcPic || cs.pps->getCuQpDeltaSubdiv() == 0) return adaptedCtuQP;
+  if (!pcPic || cs.slice->getCuQpDeltaSubdiv() == 0) return adaptedCtuQP;
 
   for (unsigned addr = 0; addr < cs.picture->m_subCtuQP.size(); addr++)
   {
@@ -1099,12 +973,8 @@ static int applyQPAdaptationSubCtu (CodingStructure &cs, const UnitArea ctuArea,
 #if SHARP_LUMA_DELTA_QP
     const int   lumaCtuDQP = useSharpLumaDQP ? lumaDQPOffset ((uint32_t)pcPic->m_uEnerHpCtu[ctuAddr], bitDepth) : 0;
 #endif
-#if MAX_TB_SIZE_SIGNALLING
     const unsigned     mts = std::min (cs.sps->getMaxTbSize(), pcv.maxCUWidth);
-#else
-    const unsigned     mts = std::min<uint32_t> (MAX_TB_SIZEY, pcv.maxCUWidth);
-#endif
-    const unsigned mtsLog2 = (unsigned)g_aucLog2[mts];
+    const unsigned mtsLog2 = (unsigned)floorLog2(mts);
     const unsigned  stride = pcv.maxCUWidth >> mtsLog2;
     unsigned numAct = 0;    // number of block activities
     double   sumAct = 0.0; // sum of all block activities
@@ -1198,12 +1068,6 @@ void EncSlice::setSearchRange( Slice* pcSlice )
       iRefPOC = pcSlice->getRefPic(e, iRefIdx)->getPOC();
       int newSearchRange = Clip3(m_pcCfg->getMinSearchWindow(), iMaxSR, (iMaxSR*ADAPT_SR_SCALE*abs(iCurrPOC - iRefPOC)+iOffset)/iGOPSize);
       m_pcInterSearch->setAdaptiveSearchRange(iDir, iRefIdx, newSearchRange);
-#if ENABLE_WPP_PARALLELISM
-      for( int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++ )
-      {
-        m_pcLib->getInterSearch( jId )->setAdaptiveSearchRange( iDir, iRefIdx, newSearchRange );
-      }
-#endif
     }
   }
 }
@@ -1228,20 +1092,7 @@ void EncSlice::precompressSlice( Picture* pcPic )
 
   Slice* pcSlice        = pcPic->slices[getSliceSegmentIdx()];
 
-#if HEVC_DEPENDENT_SLICES
-  if (pcSlice->getDependentSliceSegmentFlag())
-  {
-    // if this is a dependent slice segment, then it was optimised
-    // when analysing the entire slice.
-    return;
-  }
-#endif
 
-  if (pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES)
-  {
-    // TODO: investigate use of average cost per CTU so that this Slice Mode can be used.
-    THROW( "Unable to optimise Slice-level QP if Slice Mode is set to FIXED_NUMBER_OF_BYTES\n" );
-  }
 
   double     dPicRdCostBest = MAX_DOUBLE;
   uint32_t       uiQpIdxBest = 0;
@@ -1298,31 +1149,15 @@ void EncSlice::calCostSliceI(Picture* pcPic) // TODO: this only analyses the fir
 {
   double         iSumHadSlice      = 0;
   Slice * const  pcSlice           = pcPic->slices[getSliceSegmentIdx()];
-#if HEVC_TILES_WPP
-  const TileMap &tileMap           = *pcPic->tileMap;
-#endif
   const PreCalcValues& pcv         = *pcPic->cs->pcv;
   const SPS     &sps               = *(pcSlice->getSPS());
   const int      shift             = sps.getBitDepth(CHANNEL_TYPE_LUMA)-8;
   const int      offset            = (shift>0)?(1<<(shift-1)):0;
 
-#if HEVC_DEPENDENT_SLICES
-  pcSlice->setSliceSegmentBits(0);
-#endif
-
-  uint32_t startCtuTsAddr, boundingCtuTsAddr;
-  xDetermineStartAndBoundingCtuTsAddr ( startCtuTsAddr, boundingCtuTsAddr, pcPic );
 
-#if HEVC_TILES_WPP
-  for( uint32_t ctuTsAddr = startCtuTsAddr, ctuRsAddr = tileMap.getCtuTsToRsAddrMap( startCtuTsAddr);
-       ctuTsAddr < boundingCtuTsAddr;
-       ctuRsAddr = tileMap.getCtuTsToRsAddrMap(++ctuTsAddr) )
-#else
-  for( uint32_t ctuTsAddr = startCtuTsAddr, ctuRsAddr = startCtuTsAddr;
-       ctuTsAddr < boundingCtuTsAddr;
-       ctuRsAddr = ++ctuTsAddr )
-#endif
+  for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ )
   {
+    uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
     Position pos( (ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight);
 
     const int height  = std::min( pcv.maxCUHeight, pcv.lumaHeight - pos.y );
@@ -1345,20 +1180,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
   //   effectively disabling the slice-segment-mode.
 
   Slice* const pcSlice    = pcPic->slices[getSliceSegmentIdx()];
-  uint32_t  startCtuTsAddr;
-  uint32_t  boundingCtuTsAddr;
-
-#if HEVC_DEPENDENT_SLICES
-  pcSlice->setSliceSegmentBits(0);
-#endif
-  xDetermineStartAndBoundingCtuTsAddr ( startCtuTsAddr, boundingCtuTsAddr, pcPic );
-  if (bCompressEntireSlice)
-  {
-    boundingCtuTsAddr = pcSlice->getSliceCurEndCtuTsAddr();
-#if HEVC_DEPENDENT_SLICES
-    pcSlice->setSliceSegmentCurEndCtuTsAddr(boundingCtuTsAddr);
-#endif
-  }
 
   // initialize cost values - these are used by precompressSlice (they should be parameters).
   m_uiPicTotalBits  = 0;
@@ -1368,7 +1189,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
 
   m_CABACEstimator->initCtxModels( *pcSlice );
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   for( int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++ )
   {
     CABACWriter* cw = m_pcLib->getCABACEncoder( jId )->getCABACEstimator( pcSlice->getSPS() );
@@ -1392,17 +1213,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
 
   if ( bWp_explicit )
   {
-    //------------------------------------------------------------------------------
-    //  Weighted Prediction implemented at Slice level. SliceMode=2 is not supported yet.
-    //------------------------------------------------------------------------------
-#if HEVC_DEPENDENT_SLICES
-    if ( pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES || pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES )
-#else
-    if(pcSlice->getSliceMode() == FIXED_NUMBER_OF_BYTES)
-#endif
-    {
-      EXIT("Weighted Prediction is not yet supported with slice mode determined by max number of bins.");
-    }
 
     xEstimateWPParamSlice( pcSlice, m_pcCfg->getWeightedPredictionMethod() );
     pcSlice->initWpScaling(pcSlice->getSPS());
@@ -1412,45 +1222,8 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
   }
 
 
-#if HEVC_DEPENDENT_SLICES
-#if HEVC_TILES_WPP
-  // Adjust initial state if this is the start of a dependent slice.
-  {
-    const TileMap&  tileMap                 = *pcPic->tileMap;
-    const uint32_t      ctuRsAddr               = tileMap.getCtuTsToRsAddrMap( startCtuTsAddr);
-    const uint32_t      currentTileIdx          = tileMap.getTileIdxMap(ctuRsAddr);
-    const Tile&     currentTile             = tileMap.tiles[currentTileIdx];
-    const uint32_t      firstCtuRsAddrOfTile    = currentTile.getFirstCtuRsAddr();
-    if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != firstCtuRsAddrOfTile )
-    {
-      // This will only occur if dependent slice-segments (m_entropyCodingSyncContextState=true) are being used.
-      if( currentTile.getTileWidthInCtus() >= 2 || !m_pcCfg->getEntropyCodingSyncEnabledFlag() )
-      {
-        m_CABACEstimator->getCtx() = m_lastSliceSegmentEndContextState;
-        m_CABACEstimator->start();
-      }
-    }
-  }
-#else
-  // KJS: not sure if this works (but both dep slices and tiles shall be removed in VTM, so this code should not be used)
-  if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != startCtuTsAddr )
-  {
-    if( pcPic->cs->pcv->widthInCtus >= 2 || !m_pcCfg->getEntropyCodingSyncEnabledFlag() )
-    {
-      m_CABACEstimator->getCtx() = m_lastSliceSegmentEndContextState;
-      m_CABACEstimator->start();
-    }
-#endif
-#endif
 
-#if HEVC_DEPENDENT_SLICES
-  if( !pcSlice->getDependentSliceSegmentFlag() )
-  {
-#endif
     pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
 
   CHECK( pcPic->m_prevQP[0] == std::numeric_limits<int>::max(), "Invalid previous QP" );
 
@@ -1459,34 +1232,27 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
   cs.pcv      = pcSlice->getPPS()->pcv;
   cs.fracBits = 0;
 
-  if( startCtuTsAddr == 0 && ( pcSlice->getPOC() != m_pcCfg->getSwitchPOC() || -1 == m_pcCfg->getDebugCTU() ) )
+  if( pcSlice->getFirstCtuRsAddrInSlice() == 0 && ( pcSlice->getPOC() != m_pcCfg->getSwitchPOC() || -1 == m_pcCfg->getDebugCTU() ) )
   {
-    cs.initStructData (pcSlice->getSliceQp(), pcSlice->getPPS()->getTransquantBypassEnabledFlag());
+    cs.initStructData (pcSlice->getSliceQp());
   }
 
 #if ENABLE_QPA
-  if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && (boundingCtuTsAddr > startCtuTsAddr))
+  if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl())
   {
-    if (applyQPAdaptation (pcPic, pcSlice, *cs.pcv, startCtuTsAddr, boundingCtuTsAddr, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES,
+    if (applyQPAdaptation (pcPic, pcSlice, *cs.pcv, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES,
                            (m_pcCfg->getBaseQP() >= 38) || (m_pcCfg->getSourceWidth() <= 512 && m_pcCfg->getSourceHeight() <= 320), m_adaptedLumaQP))
     {
       m_CABACEstimator->initCtxModels (*pcSlice);
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
       for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++)
       {
         CABACWriter* cw = m_pcLib->getCABACEncoder (jId)->getCABACEstimator (pcSlice->getSPS());
         cw->initCtxModels (*pcSlice);
       }
-#endif
-#if HEVC_DEPENDENT_SLICES
-      if (!pcSlice->getDependentSliceSegmentFlag())
-      {
 #endif
         pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
-#if HEVC_DEPENDENT_SLICES
-      }
-#endif
-      if (startCtuTsAddr == 0)
+      if (pcSlice->getFirstCtuRsAddrInSlice() == 0)
       {
         cs.currQP[0] = cs.currQP[1] = pcSlice->getSliceQp(); // cf code above
       }
@@ -1494,46 +1260,27 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
   }
 #endif // ENABLE_QPA
 
-#if ENABLE_WPP_PARALLELISM
-  bool bUseThreads = m_pcCfg->getNumWppThreads() > 1;
-  if( bUseThreads )
+  bool checkPLTRatio = m_pcCfg->getIntraPeriod() != 1 && pcSlice->isIRAP();
+  if (checkPLTRatio)
   {
-    CHECK( startCtuTsAddr != 0 || boundingCtuTsAddr != pcPic->cs->pcv->sizeInCtus, "not intended" );
-
-    pcPic->cs->allocateVectorsAtPicLevel();
-
-    omp_set_num_threads( m_pcCfg->getNumWppThreads() + m_pcCfg->getNumWppExtraLines() );
-
-    #pragma omp parallel for schedule(static,1) if(bUseThreads)
-    for( int ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr += widthInCtus )
-    {
-      // wpp thread start
-      pcPic->scheduler.setWppThreadId();
-#if ENABLE_SPLIT_PARALLELISM
-      pcPic->scheduler.setSplitThreadId( 0 );
-#endif
-      encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, ctuTsAddr, ctuTsAddr + widthInCtus, m_pcLib );
-      // wpp thread stop
-    }
+    m_pcCuEncoder->getModeCtrl()->setPltEnc(true);
   }
   else
-#endif
+  {
+    bool doPlt = m_pcLib->getPltEnc();
+    m_pcCuEncoder->getModeCtrl()->setPltEnc(doPlt);
+  }
+
 #if K0149_BLOCK_STATISTICS
   const SPS *sps = pcSlice->getSPS();
   CHECK(sps == 0, "No SPS present");
   writeBlockStatisticsHeader(sps);
 #endif
   m_pcInterSearch->resetAffineMVList();
-  encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, startCtuTsAddr, boundingCtuTsAddr, m_pcLib );
-
-#if HEVC_DEPENDENT_SLICES
-  // store context state at the end of this slice-segment, in case the next slice is a dependent slice and continues using the CABAC contexts.
-  if( pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag() )
-  {
-    m_lastSliceSegmentEndContextState = m_CABACEstimator->getCtx();//ctx end of dep.slice
-  }
-#endif
-
+  m_pcInterSearch->resetUniMvList();
+  ::memset(g_isReusedUniMVsFilled, 0, sizeof(g_isReusedUniMVsFilled));
+  encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, m_pcLib );
+  if (checkPLTRatio) m_pcLib->checkPltStats( pcPic );
 }
 
 void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr )
@@ -1542,25 +1289,18 @@ void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32
   Slice* pcSlice                  = cs.slice;
   const PreCalcValues& pcv        = *cs.pcv;
   const uint32_t    widthInCtus   = pcv.widthInCtus;
-#if HEVC_TILES_WPP
-  const TileMap&  tileMap         = *pcPic->tileMap;
-#endif
   const uint32_t hashThreshold    = 20;
   uint32_t totalCtu               = 0;
   uint32_t hashRatio              = 0;
 
-  if ( !pcSlice->getSPS()->getDisFracMmvdEnabledFlag() )
+  if ( !pcSlice->getSPS()->getFpelMmvdEnabledFlag() )
   {
     return;
   }
 
-  for ( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )
+  for ( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ )
   {
-#if HEVC_TILES_WPP
-    const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( ctuTsAddr );
-#else
-    const uint32_t ctuRsAddr = ctuTsAddr;
-#endif
+    const uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
     const uint32_t ctuXPosInCtus        = ctuRsAddr % widthInCtus;
     const uint32_t ctuYPosInCtus        = ctuRsAddr / widthInCtus;
 
@@ -1573,32 +1313,64 @@ void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32
 
   if ( hashRatio > totalCtu * hashThreshold )
   {
-    pcSlice->setDisFracMMVD( true );
+    pcPic->cs->picHeader->setDisFracMMVD( true );
   }
-  if (!pcSlice->getDisFracMMVD()) {
+  if (!pcPic->cs->picHeader->getDisFracMMVD()) {
     bool useIntegerMVD = (pcPic->lwidth()*pcPic->lheight() > 1920 * 1080);
-    pcSlice->setDisFracMMVD( useIntegerMVD );
+    pcPic->cs->picHeader->setDisFracMMVD( useIntegerMVD );
   }
 }
 
-void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr, EncLib* pEncLib )
+
+void EncSlice::setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma )
+{
+  bool              sgnFlag = true;
+
+  if( isChromaEnabled( cs.picture->chromaFormat) )
+  {
+    const CompArea  cbArea  = CompArea( COMPONENT_Cb, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true );
+    const CompArea  crArea  = CompArea( COMPONENT_Cr, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true );
+    const CPelBuf   orgCb   = cs.picture->getOrigBuf( cbArea );
+    const CPelBuf   orgCr   = cs.picture->getOrigBuf( crArea );
+    const int       x0      = ( cbArea.x > 0 ? 0 : 1 );
+    const int       y0      = ( cbArea.y > 0 ? 0 : 1 );
+    const int       x1      = ( cbArea.x + cbArea.width  < cs.picture->Cb().width  ? cbArea.width  : cbArea.width  - 1 );
+    const int       y1      = ( cbArea.y + cbArea.height < cs.picture->Cb().height ? cbArea.height : cbArea.height - 1 );
+    const int       cbs     = orgCb.stride;
+    const int       crs     = orgCr.stride;
+    const Pel*      pCb     = orgCb.buf + y0 * cbs;
+    const Pel*      pCr     = orgCr.buf + y0 * crs;
+    int64_t         sumCbCr = 0;
+
+    // determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
+    for( int y = y0; y < y1; y++, pCb += cbs, pCr += crs )
+    {
+      for( int x = x0; x < x1; x++ )
+      {
+        int cb = ( 12*(int)pCb[x] - 2*((int)pCb[x-1] + (int)pCb[x+1] + (int)pCb[x-cbs] + (int)pCb[x+cbs]) - ((int)pCb[x-1-cbs] + (int)pCb[x+1-cbs] + (int)pCb[x-1+cbs] + (int)pCb[x+1+cbs]) );
+        int cr = ( 12*(int)pCr[x] - 2*((int)pCr[x-1] + (int)pCr[x+1] + (int)pCr[x-crs] + (int)pCr[x+crs]) - ((int)pCr[x-1-crs] + (int)pCr[x+1-crs] + (int)pCr[x-1+crs] + (int)pCr[x+1+crs]) );
+        sumCbCr += cb*cr;
+      }
+    }
+
+    sgnFlag = ( sumCbCr < 0 );
+  }
+
+  cs.picHeader->setJointCbCrSignFlag( sgnFlag );
+}
+
+
+void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, EncLib* pEncLib )
 {
-  //PROF_ACCUM_AND_START_NEW_SET( getProfilerCTU( pcPic, 0, 0 ), P_PIC_LEVEL );
-  //PROF_START( getProfilerCTU( cs.slice->isIntra(), pcPic->scheduler.getWppThreadId() ), P_PIC_LEVEL, toWSizeIdx( cs.pcv->maxCUWidth ), toHSizeIdx( cs.pcv->maxCUHeight ) );
   CodingStructure&  cs            = *pcPic->cs;
   Slice* pcSlice                  = cs.slice;
   const PreCalcValues& pcv        = *cs.pcv;
   const uint32_t        widthInCtus   = pcv.widthInCtus;
-#if HEVC_TILES_WPP
-  const TileMap&  tileMap         = *pcPic->tileMap;
-#endif
 #if ENABLE_QPA
   const int iQPIndex              = pcSlice->getSliceQpBase();
 #endif
 
-#if ENABLE_WPP_PARALLELISM
-  const int       dataId          = pcPic->scheduler.getWppDataId();
-#elif ENABLE_SPLIT_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
   const int       dataId          = 0;
 #endif
   CABACWriter*    pCABACWriter    = pEncLib->getCABACEncoder( PARL_PARAM0( dataId ) )->getCABACEstimator( pcSlice->getSPS() );
@@ -1606,54 +1378,42 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
   RdCost*         pRdCost         = pEncLib->getRdCost( PARL_PARAM0( dataId ) );
   EncCfg*         pCfg            = pEncLib;
   RateCtrl*       pRateCtrl       = pEncLib->getRateCtrl();
-#if ENABLE_WPP_PARALLELISM
-  // first version dont use ctx from above
-  pCABACWriter->initCtxModels( *pcSlice );
-#endif
 #if RDOQ_CHROMA_LAMBDA
   pTrQuant    ->setLambdas( pcSlice->getLambdas() );
 #else
   pTrQuant    ->setLambda ( pcSlice->getLambdas()[0] );
 #endif
   pRdCost     ->setLambda ( pcSlice->getLambdas()[0], pcSlice->getSPS()->getBitDepths() );
+#if WCG_EXT && ER_CHROMA_QP_WCG_PPS && ENABLE_QPA
+  if (!pCfg->getWCGChromaQPControl().isEnabled() && pCfg->getUsePerceptQPA() && !pCfg->getUseRateCtrl())
+  {
+    pRdCost->saveUnadjustedLambda();
+  }
+#endif
 
   int prevQP[2];
   int currQP[2];
   prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
   currQP[0] = currQP[1] = pcSlice->getSliceQp();
 
-#if HEVC_DEPENDENT_SLICES
-  if( !pcSlice->getDependentSliceSegmentFlag() )
-  {
-#endif
     prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
-  if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() ||
+  if ( pcSlice->getSPS()->getFpelMmvdEnabledFlag() ||
       (pcSlice->getSPS()->getIBCFlag() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch()))
   {
-    if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
-      cs.picture->getOrigBuf(COMPONENT_Y).rspSignal(m_pcLib->getReshaper()->getFwdLUT());
-    m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap( cs.picture->getOrigBuf() );
-    if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
-      cs.picture->getOrigBuf().copyFrom(cs.picture->getTrueOrigBuf());
+    m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap(cs.picture->getTrueOrigBuf());
+    if (m_pcCfg->getIntraPeriod() != -1)
+    {
+      int hashBlkHitPerc = m_pcCuEncoder->getIbcHashMap().calHashBlkMatchPerc(cs.area.Y());
+      cs.slice->setDisableSATDForRD(hashBlkHitPerc > 59);
+    }
   }
-  checkDisFracMmvd( pcPic, startCtuTsAddr, boundingCtuTsAddr );
-  // for every CTU in the slice segment (may terminate sooner if there is a byte limit on the slice-segment)
-  for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )
+
+  // for every CTU in the slice
+  for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ )
   {
-#if HEVC_TILES_WPP
-    const int32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( ctuTsAddr );
-#else
-    const int32_t ctuRsAddr = ctuTsAddr;
-#endif
+    const int32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
 
-#if HEVC_TILES_WPP
     // update CABAC state
-    const uint32_t firstCtuRsAddrOfTile = tileMap.tiles[tileMap.getTileIdxMap(ctuRsAddr)].getFirstCtuRsAddr();
-    const uint32_t tileXPosInCtus       = firstCtuRsAddrOfTile % widthInCtus;
-#endif
     const uint32_t ctuXPosInCtus        = ctuRsAddr % widthInCtus;
     const uint32_t ctuYPosInCtus        = ctuRsAddr / widthInCtus;
 
@@ -1662,44 +1422,32 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
 
     if( pCfg->getSwitchPOC() != pcPic->poc || -1 == pCfg->getDebugCTU() )
-    if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && ctuXPosInCtus == 0)
+    if ((cs.slice->getSliceType() != I_SLICE || cs.sps->getIBCFlag()) && cs.pps->ctuIsTileColBd( ctuXPosInCtus ))
     {
       cs.motionLut.lut.resize(0);
       cs.motionLut.lutIbc.resize(0);
-      cs.motionLut.lutShare.resize(0);
-      cs.motionLut.lutShareIbc.resize(0);
     }
 
-#if ENABLE_WPP_PARALLELISM
-    pcPic->scheduler.wait( ctuXPosInCtus, ctuYPosInCtus );
-#endif
 
-#if HEVC_TILES_WPP
-    if (ctuRsAddr == firstCtuRsAddrOfTile)
+    if (cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && cs.pps->ctuIsTileRowBd( ctuYPosInCtus ))
     {
       pCABACWriter->initCtxModels( *pcSlice );
+      cs.resetPrevPLT(cs.prevPLT);
       prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
     }
-    else if (ctuXPosInCtus == tileXPosInCtus && pEncLib->getEntropyCodingSyncEnabledFlag())
+    else if (cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && pEncLib->getEntropyCodingSyncEnabledFlag())
     {
-      // reset and then update contexts to the state at the end of the top-right CTU (if within current slice and tile).
+      // reset and then update contexts to the state at the end of the top CTU (if within current slice and tile).
       pCABACWriter->initCtxModels( *pcSlice );
-      if( cs.getCURestricted( pos.offset(pcv.maxCUWidth, -1), pcSlice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) )
+      cs.resetPrevPLT(cs.prevPLT);
+      if( cs.getCURestricted( pos.offset(0, -1), pos, pcSlice->getIndependentSliceIdx(), cs.pps->getTileIdx( pos ), CH_L ) )
       {
-        // Top-right is available, we use it.
+        // Top is available, we use it.
         pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextState;
       }
       prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
     }
-#endif
 
-#if ENABLE_WPP_PARALLELISM
-    if( ctuXPosInCtus == 0 && ctuYPosInCtus > 0 && widthInCtus > 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) )
-    {
-      pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus-1];  // last line
-    }
-#else
-#endif
 
 #if RDOQ_CHROMA_LAMBDA && ENABLE_QPA && !ENABLE_QPA_SUB_CTU
     double oldLambdaArray[MAX_NUM_COMPONENT] = {0.0};
@@ -1731,11 +1479,14 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
         estQP     = Clip3( -pcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, estQP );
 
         pRdCost->setLambda(estLambda, pcSlice->getSPS()->getBitDepths());
+#if WCG_EXT
+        pRdCost->saveUnadjustedLambda();
+#endif
 
 #if RDOQ_CHROMA_LAMBDA
-        // set lambda for RDOQ
-        const double chromaLambda = estLambda / pRdCost->getChromaWeight();
-        const double lambdaArray[MAX_NUM_COMPONENT] = { estLambda, chromaLambda, chromaLambda };
+        const double lambdaArray[MAX_NUM_COMPONENT] = {estLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y),
+                                                       estLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb),
+                                                       estLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)};
         pTrQuant->setLambdas( lambdaArray );
 #else
         pTrQuant->setLambda( estLambda );
@@ -1757,8 +1508,9 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
 #if !ENABLE_QPA_SUB_CTU
 #if RDOQ_CHROMA_LAMBDA
       pTrQuant->getLambdas (oldLambdaArray); // save the old lambdas
-      const double chromaLambda = newLambda / pRdCost->getChromaWeight();
-      const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda};
+      const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Y),
+                                                     newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cb),
+                                                     newLambda / m_pcRdCost->getDistortionWeight (COMPONENT_Cr)};
       pTrQuant->setLambdas (lambdaArray);
 #else
       pTrQuant->setLambda (newLambda);
@@ -1769,17 +1521,17 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     }
 #endif
 
-    bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr;
-    if( updateGbiCodingOrder )
+    bool updateBcwCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuIdx == 0;
+    if( updateBcwCodingOrder )
     {
-      resetGbiCodingOrder(false, cs);
+      resetBcwCodingOrder(false, cs);
       m_pcInterSearch->initWeightIdxBits();
     }
-    if (pcSlice->getSPS()->getUseReshaper())
+    if (pcSlice->getSPS()->getUseLmcs())
     {
       m_pcCuEncoder->setDecCuReshaperInEncCU(m_pcLib->getReshaper(), pcSlice->getSPS()->getChromaFormatIdc());
 
-#if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
       for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++)
       {
         m_pcLib->getCuEncoder(jId)->setDecCuReshaperInEncCU(m_pcLib->getReshaper(jId), pcSlice->getSPS()->getChromaFormatIdc());
@@ -1792,79 +1544,34 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     }
 
   if (pCfg->getSwitchPOC() != pcPic->poc || ctuRsAddr >= pCfg->getDebugCTU())
-#if ENABLE_WPP_PARALLELISM
-    pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
-#else
     m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
-#endif
 
 #if K0149_BLOCK_STATISTICS
     getAndStoreBlockStatistics(cs, ctuArea);
 #endif
 
     pCABACWriter->resetBits();
-    pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true );
+    pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true, true );
     const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS );
 
-    // Calculate if this CTU puts us over slice bit size.
-    // cannot terminate if current slice/slice-segment would be 0 Ctu in size,
-    const uint32_t validEndOfSliceCtuTsAddr = ctuTsAddr + (ctuTsAddr == startCtuTsAddr ? 1 : 0);
-    // Set slice end parameter
-    if(pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceBits()+numberOfWrittenBits > (pcSlice->getSliceArgument()<<3))
-    {
-#if HEVC_DEPENDENT_SLICES
-      pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
-#endif
-      pcSlice->setSliceCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
-      boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
-    }
-#if HEVC_DEPENDENT_SLICES
-    else if((!bCompressEntireSlice) && pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceSegmentBits()+numberOfWrittenBits > (pcSlice->getSliceSegmentArgument()<<3))
-    {
-      pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
-      boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
-    }
-#endif
-    if (boundingCtuTsAddr <= ctuTsAddr)
-    {
-      break;
-    }
-
-#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 #pragma omp critical
 #endif
     pcSlice->setSliceBits( ( uint32_t ) ( pcSlice->getSliceBits() + numberOfWrittenBits ) );
-#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
+#if ENABLE_SPLIT_PARALLELISM
 #pragma omp critical
 #endif
-#if HEVC_DEPENDENT_SLICES
-    pcSlice->setSliceSegmentBits( pcSlice->getSliceSegmentBits() + numberOfWrittenBits );
-#endif
 
-#if HEVC_TILES_WPP
-    // Store probabilities of second CTU in line into buffer - used only if wavefront-parallel-processing is enabled.
-    if( ctuXPosInCtus == tileXPosInCtus + 1 && pEncLib->getEntropyCodingSyncEnabledFlag() )
+    // Store probabilities of first CTU in line into buffer - used only if wavefront-parallel-processing is enabled.
+    if( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && pEncLib->getEntropyCodingSyncEnabledFlag() )
     {
       pEncLib->m_entropyCodingSyncContextState = pCABACWriter->getCtx();
     }
-#endif
-#if ENABLE_WPP_PARALLELISM
-    if( ctuXPosInCtus == 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) )
-    {
-      pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus] = pCABACWriter->getCtx();
-    }
-#endif
 
-#if !ENABLE_WPP_PARALLELISM
     int actualBits = int(cs.fracBits >> SCALE_BITS);
     actualBits    -= (int)m_uiPicTotalBits;
-#endif
     if ( pCfg->getUseRateCtrl() )
     {
-#if ENABLE_WPP_PARALLELISM
-      int actualBits      = int( cs.fracBits >> SCALE_BITS );
-      actualBits         -= (int)m_uiPicTotalBits;
-#endif
       int actualQP        = g_RCInvalidQPValue;
       double actualLambda = pRdCost->getLambda();
       int numberOfEffectivePixels    = 0;
@@ -1910,13 +1617,8 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     }
 #endif
 
-#if !ENABLE_WPP_PARALLELISM
     m_uiPicTotalBits += actualBits;
     m_uiPicDist       = cs.dist;
-#endif
-#if ENABLE_WPP_PARALLELISM
-    pcPic->scheduler.setReady( ctuXPosInCtus, ctuYPosInCtus );
-#endif
   }
 
   // this is wpp exclusive section
@@ -1930,20 +1632,7 @@ void EncSlice::encodeSlice   ( Picture* pcPic, OutputBitstream* pcSubstreams, ui
 {
 
   Slice *const pcSlice               = pcPic->slices[getSliceSegmentIdx()];
-#if HEVC_TILES_WPP
-  const TileMap& tileMap             = *pcPic->tileMap;
-#endif
-#if HEVC_DEPENDENT_SLICES
-  const uint32_t startCtuTsAddr          = pcSlice->getSliceSegmentCurStartCtuTsAddr();
-  const uint32_t boundingCtuTsAddr       = pcSlice->getSliceSegmentCurEndCtuTsAddr();
-  const bool depSliceSegmentsEnabled = pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag();
-#else
-  const uint32_t startCtuTsAddr          = pcSlice->getSliceCurStartCtuTsAddr();
-  const uint32_t boundingCtuTsAddr       = pcSlice->getSliceCurEndCtuTsAddr();
-#endif
-#if HEVC_TILES_WPP
   const bool wavefrontsEnabled       = pcSlice->getPPS()->getEntropyCodingSyncEnabledFlag();
-#endif
 
 
   // setup coding structure
@@ -1954,65 +1643,18 @@ void EncSlice::encodeSlice   ( Picture* pcPic, OutputBitstream* pcSubstreams, ui
 
   DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", pcSlice->getPOC() );
 
-#if HEVC_DEPENDENT_SLICES
-  if (depSliceSegmentsEnabled)
-  {
-#if HEVC_TILES_WPP
-    // modify initial contexts with previous slice segment if this is a dependent slice.
-    const uint32_t ctuRsAddr            = tileMap.getCtuTsToRsAddrMap( startCtuTsAddr );
-    const uint32_t currentTileIdx       = tileMap.getTileIdxMap(ctuRsAddr);
-    const Tile& currentTile         = tileMap.tiles[currentTileIdx];
-    const uint32_t firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr();
-
-    if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != firstCtuRsAddrOfTile )
-    {
-      if( currentTile.getTileWidthInCtus() >= 2 || !wavefrontsEnabled )
-      {
-        m_CABACWriter->getCtx() = m_lastSliceSegmentEndContextState;
-      }
-    }
-#else
-  // KJS: not sure if this works (but both dep slices and tiles shall be removed in VTM, so this code should not be used)
-  if( pcSlice->getDependentSliceSegmentFlag() && ctuRsAddr != startCtuTsAddr )
-  {
-    if( pcPic->cs->pcv->widthInCtus >= 2 || !m_pcCfg->getEntropyCodingSyncEnabledFlag() )
-    {
-        m_CABACWriter->getCtx() = m_lastSliceSegmentEndContextState;
-    }
-#endif
-  }
-
-  if( !pcSlice->getDependentSliceSegmentFlag() )
-  {
-#endif
     pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
 
   const PreCalcValues& pcv = *cs.pcv;
   const uint32_t widthInCtus   = pcv.widthInCtus;
+  uint32_t uiSubStrm = 0;
 
-  // for every CTU in the slice segment...
-
-  for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )
+  // for every CTU in the slice...
+  for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ )
   {
-#if HEVC_TILES_WPP
-    const uint32_t ctuRsAddr            = tileMap.getCtuTsToRsAddrMap(ctuTsAddr);
-    const Tile& currentTile         = tileMap.tiles[tileMap.getTileIdxMap(ctuRsAddr)];
-    const uint32_t firstCtuRsAddrOfTile = currentTile.getFirstCtuRsAddr();
-    const uint32_t tileXPosInCtus       = firstCtuRsAddrOfTile % widthInCtus;
-    const uint32_t tileYPosInCtus       = firstCtuRsAddrOfTile / widthInCtus;
-#else
-    const uint32_t ctuRsAddr            = ctuTsAddr;
-#endif
+    const uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
     const uint32_t ctuXPosInCtus        = ctuRsAddr % widthInCtus;
     const uint32_t ctuYPosInCtus        = ctuRsAddr / widthInCtus;
-#if HEVC_TILES_WPP
-    const uint32_t uiSubStrm            = tileMap.getSubstreamForCtuAddr(ctuRsAddr, true, pcSlice);
-#else
-    const uint32_t uiSubStrm            = 0;
-#endif
 
     DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
 
@@ -2020,82 +1662,66 @@ void EncSlice::encodeSlice   ( Picture* pcPic, OutputBitstream* pcSubstreams, ui
     const UnitArea ctuArea (cs.area.chromaFormat, Area(pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight));
     m_CABACWriter->initBitstream( &pcSubstreams[uiSubStrm] );
 
-#if HEVC_TILES_WPP
     // set up CABAC contexts' state for this CTU
-    if (ctuRsAddr == firstCtuRsAddrOfTile)
+    if ( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && cs.pps->ctuIsTileRowBd( ctuYPosInCtus ) )
     {
-      if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset
+      if (ctuIdx != 0) // if it is the first CTU, then the entropy coder has already been reset
       {
         m_CABACWriter->initCtxModels( *pcSlice );
+        cs.resetPrevPLT(cs.prevPLT);
       }
     }
-    else if (ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled)
+    else if (cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && wavefrontsEnabled)
     {
-      // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line.
-      if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset
+      // Synchronize cabac probabilities with upper CTU if it's available and at the start of a line.
+      if (ctuIdx != 0) // if it is the first CTU, then the entropy coder has already been reset
       {
         m_CABACWriter->initCtxModels( *pcSlice );
+        cs.resetPrevPLT(cs.prevPLT);
       }
-      if( cs.getCURestricted( pos.offset( pcv.maxCUWidth, -1 ), pcSlice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) )
+      if( cs.getCURestricted( pos.offset( 0, -1 ), pos, pcSlice->getIndependentSliceIdx(), cs.pps->getTileIdx( pos ), CH_L ) )
       {
-        // Top-right is available, so use it.
+        // Top is available, so use it.
         m_CABACWriter->getCtx() = m_entropyCodingSyncContextState;
       }
     }
-#endif
 
-    bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr;
-    if( updateGbiCodingOrder )
+    bool updateBcwCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuIdx == 0;
+    if( updateBcwCodingOrder )
     {
-      resetGbiCodingOrder(false, cs);
+      resetBcwCodingOrder(false, cs);
     }
 
     m_CABACWriter->coding_tree_unit( cs, ctuArea, pcPic->m_prevQP, ctuRsAddr );
 
-#if HEVC_TILES_WPP
-    // store probabilities of second CTU in line into buffer
-    if( ctuXPosInCtus == tileXPosInCtus + 1 && wavefrontsEnabled )
+    // store probabilities of first CTU in line into buffer
+    if( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && wavefrontsEnabled )
     {
       m_entropyCodingSyncContextState = m_CABACWriter->getCtx();
     }
-#endif
 
     // terminate the sub-stream, if required (end of slice-segment, end of tile, end of wavefront-CTU-row):
-#if HEVC_TILES_WPP
-    if( ctuTsAddr + 1 == boundingCtuTsAddr ||
-         (  ctuXPosInCtus + 1 == tileXPosInCtus + currentTile.getTileWidthInCtus () &&
-          ( ctuYPosInCtus + 1 == tileYPosInCtus + currentTile.getTileHeightInCtus() || wavefrontsEnabled )
-         )
-       )
-#else
-    if( ctuTsAddr + 1 == boundingCtuTsAddr )
-#endif
+    bool isLastCTUsinSlice = ctuIdx == pcSlice->getNumCtuInSlice()-1;
+    bool isLastCTUinTile  = !isLastCTUsinSlice && cs.pps->getTileIdx( ctuRsAddr ) != cs.pps->getTileIdx( pcSlice->getCtuAddrInSlice( ctuIdx + 1 ) );
+    bool isLastCTUinWPP    = !isLastCTUsinSlice && !isLastCTUinTile && wavefrontsEnabled && cs.pps->ctuIsTileColBd( pcSlice->getCtuAddrInSlice( ctuIdx + 1 ) % cs.pps->getPicWidthInCtu() );
+    if (isLastCTUsinSlice || isLastCTUinTile || isLastCTUinWPP )         // this the the last CTU of the slice, tile, or WPP
     {
-      m_CABACWriter->end_of_slice();
+      m_CABACWriter->end_of_slice();  // end_of_slice_one_bit, end_of_tile_one_bit, or end_of_subset_one_bit
 
       // Byte-alignment in slice_data() when new tile
       pcSubstreams[uiSubStrm].writeByteAlignment();
 
-      // write sub-stream size
-      if( ctuTsAddr + 1 != boundingCtuTsAddr )
+      if (!isLastCTUsinSlice) //Byte alignment only when it is not the last substream in the slice
       {
-        pcSlice->addSubstreamSize( (pcSubstreams[uiSubStrm].getNumberOfWrittenBits() >> 3) + pcSubstreams[uiSubStrm].countStartCodeEmulations() );
+        // write sub-stream size
+        pcSlice->addSubstreamSize((pcSubstreams[uiSubStrm].getNumberOfWrittenBits() >> 3) + pcSubstreams[uiSubStrm].countStartCodeEmulations());
       }
+      uiSubStrm++;
     }
   } // CTU-loop
 
-#if HEVC_DEPENDENT_SLICES
-  if( depSliceSegmentsEnabled )
-  {
-    m_lastSliceSegmentEndContextState = m_CABACWriter->getCtx();//ctx end of dep.slice
-  }
-#endif
 
-#if HEVC_DEPENDENT_SLICES
-  if (pcSlice->getPPS()->getCabacInitPresentFlag() && !pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag())
-#else
   if(pcSlice->getPPS()->getCabacInitPresentFlag())
-#endif
   {
     m_encCABACTableIdx = m_CABACWriter->getCtxInitId( *pcSlice );
   }
@@ -2107,160 +1733,6 @@ void EncSlice::encodeSlice   ( Picture* pcPic, OutputBitstream* pcSubstreams, ui
 
 }
 
-#if HEVC_TILES_WPP
-void EncSlice::calculateBoundingCtuTsAddrForSlice(uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, bool &haveReachedTileBoundary,
-                                                   Picture* pcPic, const int sliceMode, const int sliceArgument)
-#else
-void EncSlice::calculateBoundingCtuTsAddrForSlice(uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice,
-                                                   Picture* pcPic, const int sliceMode, const int sliceArgument)
-#endif
-{
-#if HEVC_TILES_WPP
-  Slice* pcSlice = pcPic->slices[getSliceSegmentIdx()];
-  const TileMap& tileMap = *( pcPic->tileMap );
-  const PPS &pps         = *( pcSlice->getPPS() );
-#endif
-  const uint32_t numberOfCtusInFrame = pcPic->cs->pcv->sizeInCtus;
-  boundingCtuTSAddrSlice=0;
-#if HEVC_TILES_WPP
-  haveReachedTileBoundary=false;
-#endif
-
-  switch (sliceMode)
-  {
-    case FIXED_NUMBER_OF_CTU:
-      {
-        uint32_t ctuAddrIncrement    = sliceArgument;
-        boundingCtuTSAddrSlice  = ((startCtuTSAddrSlice + ctuAddrIncrement) < numberOfCtusInFrame) ? (startCtuTSAddrSlice + ctuAddrIncrement) : numberOfCtusInFrame;
-      }
-      break;
-    case FIXED_NUMBER_OF_BYTES:
-      boundingCtuTSAddrSlice  = numberOfCtusInFrame; // This will be adjusted later if required.
-      break;
-#if HEVC_TILES_WPP
-    case FIXED_NUMBER_OF_TILES:
-      {
-        const uint32_t tileIdx        = tileMap.getTileIdxMap( tileMap.getCtuTsToRsAddrMap(startCtuTSAddrSlice) );
-        const uint32_t tileTotalCount = (pps.getNumTileColumnsMinus1()+1) * (pps.getNumTileRowsMinus1()+1);
-        uint32_t ctuAddrIncrement   = 0;
-
-        for(uint32_t tileIdxIncrement = 0; tileIdxIncrement < sliceArgument; tileIdxIncrement++)
-        {
-          if((tileIdx + tileIdxIncrement) < tileTotalCount)
-          {
-            uint32_t tileWidthInCtus    = tileMap.tiles[tileIdx + tileIdxIncrement].getTileWidthInCtus();
-            uint32_t tileHeightInCtus   = tileMap.tiles[tileIdx + tileIdxIncrement].getTileHeightInCtus();
-            ctuAddrIncrement       += (tileWidthInCtus * tileHeightInCtus);
-          }
-        }
-
-        boundingCtuTSAddrSlice  = ((startCtuTSAddrSlice + ctuAddrIncrement) < numberOfCtusInFrame) ? (startCtuTSAddrSlice + ctuAddrIncrement) : numberOfCtusInFrame;
-      }
-      break;
-#endif
-    default:
-      boundingCtuTSAddrSlice    = numberOfCtusInFrame;
-      break;
-  }
-
-#if HEVC_TILES_WPP
-  // Adjust for tiles and wavefronts.
-  const bool wavefrontsAreEnabled = pps.getEntropyCodingSyncEnabledFlag();
-
-  if ((sliceMode == FIXED_NUMBER_OF_CTU || sliceMode == FIXED_NUMBER_OF_BYTES) &&
-      (pps.getNumTileRowsMinus1() > 0 || pps.getNumTileColumnsMinus1() > 0))
-  {
-    const uint32_t ctuRsAddr                   = tileMap.getCtuTsToRsAddrMap(startCtuTSAddrSlice);
-    const uint32_t startTileIdx                = tileMap.getTileIdxMap(ctuRsAddr);
-    const Tile& startingTile               = tileMap.tiles[startTileIdx];
-    const uint32_t  tileStartTsAddr            = tileMap.getCtuRsToTsAddrMap(startingTile.getFirstCtuRsAddr());
-    const uint32_t  tileStartWidth             = startingTile.getTileWidthInCtus();
-    const uint32_t  tileStartHeight            = startingTile.getTileHeightInCtus();
-    const uint32_t tileLastTsAddr_excl        = tileStartTsAddr + tileStartWidth*tileStartHeight;
-    const uint32_t tileBoundingCtuTsAddrSlice = tileLastTsAddr_excl;
-    const uint32_t ctuColumnOfStartingTile     = ((startCtuTSAddrSlice-tileStartTsAddr)%tileStartWidth);
-    if (wavefrontsAreEnabled && ctuColumnOfStartingTile!=0)
-    {
-      // WPP: if a slice does not start at the beginning of a CTB row, it must end within the same CTB row
-      const uint32_t numberOfCTUsToEndOfRow            = tileStartWidth - ctuColumnOfStartingTile;
-      const uint32_t wavefrontTileBoundingCtuAddrSlice = startCtuTSAddrSlice + numberOfCTUsToEndOfRow;
-      if (wavefrontTileBoundingCtuAddrSlice < boundingCtuTSAddrSlice)
-      {
-        boundingCtuTSAddrSlice = wavefrontTileBoundingCtuAddrSlice;
-      }
-    }
-
-    if (tileBoundingCtuTsAddrSlice < boundingCtuTSAddrSlice)
-    {
-      boundingCtuTSAddrSlice = tileBoundingCtuTsAddrSlice;
-      haveReachedTileBoundary = true;
-    }
-  }
-  else if ((sliceMode == FIXED_NUMBER_OF_CTU || sliceMode == FIXED_NUMBER_OF_BYTES) && wavefrontsAreEnabled && ((startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) != 0))
-  {
-    // Adjust for wavefronts (no tiles).
-    // WPP: if a slice does not start at the beginning of a CTB row, it must end within the same CTB row
-    boundingCtuTSAddrSlice = std::min(boundingCtuTSAddrSlice, startCtuTSAddrSlice - (startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) + (pcPic->cs->pcv->widthInCtus));
-  }
-#endif
-}
-
-/** Determines the starting and bounding CTU address of current slice / dependent slice
- * \param [out] startCtuTsAddr
- * \param [out] boundingCtuTsAddr
- * \param [in]  pcPic
-
- * Updates startCtuTsAddr, boundingCtuTsAddr with appropriate CTU address
- */
-void EncSlice::xDetermineStartAndBoundingCtuTsAddr  ( uint32_t& startCtuTsAddr, uint32_t& boundingCtuTsAddr, Picture* pcPic )
-{
-  Slice* pcSlice                 = pcPic->slices[getSliceSegmentIdx()];
-
-  // Non-dependent slice
-  uint32_t startCtuTsAddrSlice           = pcSlice->getSliceCurStartCtuTsAddr();
-#if HEVC_TILES_WPP
-  bool haveReachedTileBoundarySlice  = false;
-#endif
-  uint32_t boundingCtuTsAddrSlice;
-#if HEVC_TILES_WPP
-  calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSlice, boundingCtuTsAddrSlice, haveReachedTileBoundarySlice, pcPic,
-                                     m_pcCfg->getSliceMode(), m_pcCfg->getSliceArgument());
-#else
-  calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSlice, boundingCtuTsAddrSlice, pcPic,
-                                     m_pcCfg->getSliceMode(), m_pcCfg->getSliceArgument());
-#endif
-  pcSlice->setSliceCurEndCtuTsAddr(   boundingCtuTsAddrSlice );
-  pcSlice->setSliceCurStartCtuTsAddr( startCtuTsAddrSlice    );
-
-#if HEVC_DEPENDENT_SLICES
-  // Dependent slice
-  uint32_t startCtuTsAddrSliceSegment          = pcSlice->getSliceSegmentCurStartCtuTsAddr();
-#if HEVC_TILES_WPP
-  bool haveReachedTileBoundarySliceSegment = false;
-#endif
-  uint32_t boundingCtuTsAddrSliceSegment;
-#if HEVC_TILES_WPP
-  calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSliceSegment, boundingCtuTsAddrSliceSegment, haveReachedTileBoundarySliceSegment, pcPic,
-                                     m_pcCfg->getSliceSegmentMode(), m_pcCfg->getSliceSegmentArgument());
-#else
-  calculateBoundingCtuTsAddrForSlice(startCtuTsAddrSliceSegment, boundingCtuTsAddrSliceSegment, pcPic,
-                                     m_pcCfg->getSliceSegmentMode(), m_pcCfg->getSliceSegmentArgument());
-#endif
-  if (boundingCtuTsAddrSliceSegment>boundingCtuTsAddrSlice)
-  {
-    boundingCtuTsAddrSliceSegment = boundingCtuTsAddrSlice;
-  }
-  pcSlice->setSliceSegmentCurEndCtuTsAddr( boundingCtuTsAddrSliceSegment );
-  pcSlice->setSliceSegmentCurStartCtuTsAddr(startCtuTsAddrSliceSegment);
-
-  // Make a joint decision based on reconstruction and dependent slice bounds
-  startCtuTsAddr    = std::max(startCtuTsAddrSlice, startCtuTsAddrSliceSegment);
-  boundingCtuTsAddr = boundingCtuTsAddrSliceSegment;
-#else
-  startCtuTsAddr = startCtuTsAddrSlice;
-  boundingCtuTsAddr = boundingCtuTsAddrSlice;
-#endif
-}
 
 double EncSlice::xGetQPValueAccordingToLambda ( double lambda )
 {
diff --git a/source/Lib/EncoderLib/EncSlice.h b/source/Lib/EncoderLib/EncSlice.h
index 3b802180ef35b0b7902c35a3710e2374bfc1c54c..ed88068075c5a87db40fb86e6750f62e394f6937 100644
--- a/source/Lib/EncoderLib/EncSlice.h
+++ b/source/Lib/EncoderLib/EncSlice.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -90,33 +90,20 @@ private:
   std::vector<int>        m_viRdPicQp;                          ///< array of picture QP candidates (int-type)
   RateCtrl*               m_pcRateCtrl;                         ///< Rate control manager
   uint32_t                    m_uiSliceSegmentIdx;
-#if HEVC_DEPENDENT_SLICES
-  Ctx                     m_lastSliceSegmentEndContextState;    ///< context storage for state at the end of the previous slice-segment (used for dependent slices only).
-#endif
-#if HEVC_TILES_WPP
   Ctx                     m_entropyCodingSyncContextState;      ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row
-#endif
   SliceType               m_encCABACTableIdx;
-#if SHARP_LUMA_DELTA_QP
+#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
   int                     m_gopID;
 #endif
 
-#if SHARP_LUMA_DELTA_QP
 public:
-  int getGopId()        const { return m_gopID; }
-  double  calculateLambda( const Slice* slice, const int GOPid, const int depth, const double refQP, const double dQP, int &iQP );
-  void    setUpLambda( Slice* slice, const double dLambda, int iQP );
-
-private:
+  double  initializeLambda(const Slice* slice, const int GOPid, const int refQP, const double dQP); // called by calculateLambda() and updateLambda()
+#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
+  int     getGopId() const { return m_gopID; }
+  double  calculateLambda( const Slice* slice, const int GOPid, const double refQP, const double dQP, int &iQP );
 #endif
-#if HEVC_TILES_WPP
-  void    calculateBoundingCtuTsAddrForSlice( uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, bool &haveReachedTileBoundary, Picture* pcPic, const int sliceMode, const int sliceArgument );
-#else
-  void    calculateBoundingCtuTsAddrForSlice( uint32_t &startCtuTSAddrSlice, uint32_t &boundingCtuTSAddrSlice, Picture* pcPic, const int sliceMode, const int sliceArgument );
-#endif
-
+  void    setUpLambda( Slice* slice, const double dLambda, int iQP );
 
-public:
 #if ENABLE_QPA
   int                     m_adaptedLumaQP;
 
@@ -142,17 +129,14 @@ public:
   void    calCostSliceI       ( Picture* pcPic );
 
   void    encodeSlice         ( Picture* pcPic, OutputBitstream* pcSubstreams, uint32_t &numBinsCoded );
-#if ENABLE_WPP_PARALLELISM
-  static
-#endif
-  void    encodeCtus          ( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr, EncLib* pcEncLib );
+  void    encodeCtus          ( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, EncLib* pcEncLib );
   void    checkDisFracMmvd    ( Picture* pcPic, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr );
+  void    setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma );
 
   // misc. functions
   void    setSearchRange      ( Slice* pcSlice  );                                  ///< set ME range adaptively
 
   EncCu*  getCUEncoder        ()                    { return m_pcCuEncoder; }                        ///< CU encoder
-  void    xDetermineStartAndBoundingCtuTsAddr  ( uint32_t& startCtuTsAddr, uint32_t& boundingCtuTsAddr, Picture* pcPic );
   uint32_t    getSliceSegmentIdx  ()                    { return m_uiSliceSegmentIdx;       }
   void    setSliceSegmentIdx  (uint32_t i)              { m_uiSliceSegmentIdx = i;          }
 
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c9c74336ff102cc7074193b3fbf0262267fea1ab
--- /dev/null
+++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp
@@ -0,0 +1,626 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** \file     EncTemporalFilter.cpp
+\brief    EncTemporalFilter class
+*/
+
+#include "EncTemporalFilter.h"
+#include <math.h>
+
+
+// ====================================================================================================================
+// Constructor / destructor / initialization / destroy
+// ====================================================================================================================
+
+const int EncTemporalFilter::m_range = 2;
+const double EncTemporalFilter::m_chromaFactor = 0.55;
+const double EncTemporalFilter::m_sigmaMultiplier = 9.0;
+const double EncTemporalFilter::m_sigmaZeroPoint = 10.0;
+const int EncTemporalFilter::m_motionVectorFactor = 16;
+const int EncTemporalFilter::m_padding = 128;
+const int EncTemporalFilter::m_interpolationFilter[16][8] =
+{
+  {   0,   0,   0,  64,   0,   0,   0,   0 },   //0
+  {   0,   1,  -3,  64,   4,  -2,   0,   0 },   //1 -->-->
+  {   0,   1,  -6,  62,   9,  -3,   1,   0 },   //2 -->
+  {   0,   2,  -8,  60,  14,  -5,   1,   0 },   //3 -->-->
+  {   0,   2,  -9,  57,  19,  -7,   2,   0 },   //4
+  {   0,   3, -10,  53,  24,  -8,   2,   0 },   //5 -->-->
+  {   0,   3, -11,  50,  29,  -9,   2,   0 },   //6 -->
+  {   0,   3, -11,  44,  35, -10,   3,   0 },   //7 -->-->
+  {   0,   1,  -7,  38,  38,  -7,   1,   0 },   //8
+  {   0,   3, -10,  35,  44, -11,   3,   0 },   //9 -->-->
+  {   0,   2,  -9,  29,  50, -11,   3,   0 },   //10-->
+  {   0,   2,  -8,  24,  53, -10,   3,   0 },   //11-->-->
+  {   0,   2,  -7,  19,  57,  -9,   2,   0 },   //12
+  {   0,   1,  -5,  14,  60,  -8,   2,   0 },   //13-->-->
+  {   0,   1,  -3,   9,  62,  -6,   1,   0 },   //14-->
+  {   0,   0,  -2,   4,  64,  -3,   1,   0 }    //15-->-->
+};
+
+const double EncTemporalFilter::m_refStrengths[3][2] =
+{ // abs(POC offset)
+  //  1,    2
+  {0.85, 0.60},  // m_range * 2
+  {1.20, 1.00},  // m_range
+  {0.30, 0.30}   // otherwise
+};
+
+EncTemporalFilter::EncTemporalFilter() :
+  m_FrameSkip(0),
+  m_chromaFormatIDC(NUM_CHROMA_FORMAT),
+  m_sourceWidth(0),
+  m_sourceHeight(0),
+  m_QP(0),
+  m_clipInputVideoToRec709Range(false),
+  m_inputColourSpaceConvert(NUMBER_INPUT_COLOUR_SPACE_CONVERSIONS)
+{}
+
+void EncTemporalFilter::init(const int frameSkip,
+  const int inputBitDepth[MAX_NUM_CHANNEL_TYPE],
+  const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE],
+  const int internalBitDepth[MAX_NUM_CHANNEL_TYPE],
+  const int width,
+  const int height,
+  const int *pad,
+  const bool rec709,
+  const std::string &filename,
+  const ChromaFormat inputChromaFormatIDC,
+  const InputColourSpaceConversion colorSpaceConv,
+  const int qp,
+  const std::map<int, double> &temporalFilterStrengths,
+  const bool gopBasedTemporalFilterFutureReference)
+{
+  m_FrameSkip = frameSkip;
+  for (int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++)
+  {
+    m_inputBitDepth[i] = inputBitDepth[i];
+    m_MSBExtendedBitDepth[i] = msbExtendedBitDepth[i];
+    m_internalBitDepth[i] = internalBitDepth[i];
+  }
+
+  m_sourceWidth = width;
+  m_sourceHeight = height;
+  for (int i = 0; i < 2; i++)
+  {
+    m_pad[i] = pad[i];
+  }
+  m_clipInputVideoToRec709Range = rec709;
+  m_inputFileName = filename;
+  m_chromaFormatIDC = inputChromaFormatIDC;
+  m_inputColourSpaceConvert = colorSpaceConv;
+  m_area = Area(0, 0, width, height);
+  m_QP = qp;
+  m_temporalFilterStrengths = temporalFilterStrengths;
+  m_gopBasedTemporalFilterFutureReference = gopBasedTemporalFilterFutureReference;
+}
+
+// ====================================================================================================================
+// Public member functions
+// ====================================================================================================================
+
+bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
+{
+  bool isFilterThisFrame = false;
+  if (m_QP >= 17)  // disable filter for QP < 17
+  {
+    for (map<int, double>::iterator it = m_temporalFilterStrengths.begin(); it != m_temporalFilterStrengths.end(); ++it)
+    {
+      int filteredFrame = it->first;
+      if (receivedPoc % filteredFrame == 0)
+      {
+        isFilterThisFrame = true;
+        break;
+      }
+    }
+  }
+
+  if (isFilterThisFrame)
+  {
+    int offset = m_FrameSkip;
+    VideoIOYuv yuvFrames;
+    yuvFrames.open(m_inputFileName, false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth);
+    yuvFrames.skipFrames(std::max(offset + receivedPoc - m_range, 0), m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC);
+
+
+    std::deque<TemporalFilterSourcePicInfo> srcFrameInfo;
+
+    int firstFrame = receivedPoc + offset - m_range;
+    int lastFrame = receivedPoc + offset + m_range;
+    if (!m_gopBasedTemporalFilterFutureReference)
+    {
+      lastFrame = receivedPoc + offset - 1;
+    }
+    int origOffset = -m_range;
+
+    // subsample original picture so it only needs to be done once
+    PelStorage origPadded;
+
+    origPadded.create(m_chromaFormatIDC, m_area, 0, m_padding);
+    origPadded.copyFrom(*orgPic);
+    origPadded.extendBorderPel(m_padding, m_padding);
+
+    PelStorage origSubsampled2;
+    PelStorage origSubsampled4;
+
+    subsampleLuma(origPadded, origSubsampled2);
+    subsampleLuma(origSubsampled2, origSubsampled4);
+
+    // determine motion vectors
+    for (int poc = firstFrame; poc <= lastFrame; poc++)
+    {
+      if (poc < 0)
+      {
+        origOffset++;
+        continue; // frame not available
+      }
+      else if (poc == offset + receivedPoc)
+      { // hop over frame that will be filtered
+        yuvFrames.skipFrames(1, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC);
+        origOffset++;
+        continue;
+      }
+      srcFrameInfo.push_back(TemporalFilterSourcePicInfo());
+      TemporalFilterSourcePicInfo &srcPic=srcFrameInfo.back();
+
+      PelStorage dummyPicBufferTO; // Only used temporary in yuvFrames.read
+      srcPic.picBuffer.create(m_chromaFormatIDC, m_area, 0, m_padding);
+      dummyPicBufferTO.create(m_chromaFormatIDC, m_area, 0, m_padding);
+      if (!yuvFrames.read(srcPic.picBuffer, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIDC, m_clipInputVideoToRec709Range))
+      {
+        return false; // eof or read fail
+      }
+      srcPic.picBuffer.extendBorderPel(m_padding, m_padding);
+      srcPic.mvs.allocate(m_sourceWidth / 4, m_sourceHeight / 4);
+
+      motionEstimation(srcPic.mvs, origPadded, srcPic.picBuffer, origSubsampled2, origSubsampled4);
+      srcPic.origOffset = origOffset;
+      origOffset++;
+    }
+
+    // filter
+    PelStorage newOrgPic;
+    newOrgPic.create(m_chromaFormatIDC, m_area, 0, m_padding);
+    double overallStrength = -1.0;
+    for (map<int, double>::iterator it = m_temporalFilterStrengths.begin(); it != m_temporalFilterStrengths.end(); ++it)
+    {
+      int frame = it->first;
+      double strength = it->second;
+      if (receivedPoc % frame == 0)
+      {
+        overallStrength = strength;
+      }
+    }
+
+    bilateralFilter(origPadded, srcFrameInfo, newOrgPic, overallStrength);
+
+    // move filtered to orgPic
+    orgPic->copyFrom(newOrgPic);
+
+    yuvFrames.close();
+    return true;
+  }
+  return false;
+}
+
+// ====================================================================================================================
+// Private member functions
+// ====================================================================================================================
+
+void EncTemporalFilter::subsampleLuma(const PelStorage &input, PelStorage &output, const int factor) const
+{
+  const int newWidth = input.Y().width / factor;
+  const int newHeight = input.Y().height / factor;
+  output.create(m_chromaFormatIDC, Area(0, 0, newWidth, newHeight), 0, m_padding);
+
+  const Pel* srcRow = input.Y().buf;
+  const int srcStride = input.Y().stride;
+  Pel *dstRow = output.Y().buf;
+  const int dstStride = output.Y().stride;
+
+  for (int y = 0; y < newHeight; y++, srcRow+=factor*srcStride, dstRow+=dstStride)
+  {
+    const Pel *inRow      = srcRow;
+    const Pel *inRowBelow = srcRow+srcStride;
+    Pel *target     = dstRow;
+
+    for (int x = 0; x < newWidth; x++)
+    {
+      target[x] = (inRow[0] + inRowBelow[0] + inRow[1] + inRowBelow[1] + 2) >> 2;
+      inRow += 2;
+      inRowBelow += 2;
+    }
+  }
+  output.extendBorderPel(m_padding, m_padding);
+}
+
+int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
+  const PelStorage &buffer,
+  const int x,
+  const int y,
+  int dx,
+  int dy,
+  const int bs,
+  const int besterror = 8 * 8 * 1024 * 1024) const
+{
+  const Pel* origOrigin = orig.Y().buf;
+  const int origStride  = orig.Y().stride;
+  const Pel *buffOrigin = buffer.Y().buf;
+  const int buffStride  = buffer.Y().stride;
+
+  int error = 0;// dx * 10 + dy * 10;
+  if (((dx | dy) & 0xF) == 0)
+  {
+    dx /= m_motionVectorFactor;
+    dy /= m_motionVectorFactor;
+    for (int y1 = 0; y1 < bs; y1++)
+    {
+      const Pel* origRowStart = origOrigin + (y+y1)*origStride + x;
+      const Pel* bufferRowStart = buffOrigin + (y+y1+dy)*buffStride + (x+dx);
+      for (int x1 = 0; x1 < bs; x1 += 2)
+      {
+        int diff = origRowStart[x1] - bufferRowStart[x1];
+        error += diff * diff;
+        diff = origRowStart[x1 + 1] - bufferRowStart[x1 + 1];
+        error += diff * diff;
+      }
+      if (error > besterror)
+      {
+        return error;
+      }
+    }
+  }
+  else
+  {
+    const int *xFilter = m_interpolationFilter[dx & 0xF];
+    const int *yFilter = m_interpolationFilter[dy & 0xF];
+    int tempArray[64 + 8][64];
+
+    int sum, base;
+    for (int y1 = 1; y1 < bs + 7; y1++)
+    {
+      const int yOffset = y + y1 + (dy >> 4) - 3;
+      const Pel *sourceRow = buffOrigin + (yOffset)*buffStride + 0;
+      for (int x1 = 0; x1 < bs; x1++)
+      {
+        sum = 0;
+        base = x + x1 + (dx >> 4) - 3;
+        const Pel *rowStart = sourceRow + base;
+
+        sum += xFilter[1] * rowStart[1];
+        sum += xFilter[2] * rowStart[2];
+        sum += xFilter[3] * rowStart[3];
+        sum += xFilter[4] * rowStart[4];
+        sum += xFilter[5] * rowStart[5];
+        sum += xFilter[6] * rowStart[6];
+
+        tempArray[y1][x1] = sum;
+      }
+    }
+
+    const Pel maxSampleValue = (1<<m_internalBitDepth[CHANNEL_TYPE_LUMA])-1;
+    for (int y1 = 0; y1 < bs; y1++)
+    {
+      const Pel *origRow = origOrigin + (y+y1)*origStride + 0;
+      for (int x1 = 0; x1 < bs; x1++)
+      {
+        sum = 0;
+        sum += yFilter[1] * tempArray[y1 + 1][x1];
+        sum += yFilter[2] * tempArray[y1 + 2][x1];
+        sum += yFilter[3] * tempArray[y1 + 3][x1];
+        sum += yFilter[4] * tempArray[y1 + 4][x1];
+        sum += yFilter[5] * tempArray[y1 + 5][x1];
+        sum += yFilter[6] * tempArray[y1 + 6][x1];
+
+        sum = (sum + (1 << 11)) >> 12;
+        sum = sum < 0 ? 0 : (sum > maxSampleValue ? maxSampleValue : sum);
+
+        error += (sum - origRow[x + x1]) * (sum - origRow[x + x1]);
+      }
+      if (error > besterror)
+      {
+        return error;
+      }
+    }
+  }
+  return error;
+}
+
+void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int blockSize,
+  const Array2D<MotionVector> *previous, const int factor, const bool doubleRes) const
+{
+  int range = 5;
+  const int stepSize = blockSize;
+
+  const int origWidth  = orig.Y().width;
+  const int origHeight = orig.Y().height;
+
+  for (int blockY = 0; blockY + blockSize < origHeight; blockY += stepSize)
+  {
+    for (int blockX = 0; blockX + blockSize < origWidth; blockX += stepSize)
+    {
+      MotionVector best;
+
+      if (previous == NULL)
+      {
+        range = 8;
+      }
+      else
+      {
+        for (int py = -2; py <= 2; py++)
+        {
+          int testy = blockY / (2 * blockSize) + py;
+          for (int px = -2; px <= 2; px++)
+          {
+            int testx = blockX / (2 * blockSize) + px;
+            if ((testx >= 0) && (testx < origWidth / (2 * blockSize)) && (testy >= 0) && (testy < origHeight / (2 * blockSize)))
+            {
+              MotionVector old = previous->get(testx, testy);
+              int error = motionErrorLuma(orig, buffer, blockX, blockY, old.x * factor, old.y * factor, blockSize, best.error);
+              if (error < best.error)
+              {
+                best.set(old.x * factor, old.y * factor, error);
+              }
+            }
+          }
+        }
+      }
+      MotionVector prevBest = best;
+      for (int y2 = prevBest.y / m_motionVectorFactor - range; y2 <= prevBest.y / m_motionVectorFactor + range; y2++)
+      {
+        for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
+        {
+          int error = motionErrorLuma(orig, buffer, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, best.error);
+          if (error < best.error)
+          {
+            best.set(x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, error);
+          }
+        }
+      }
+      if (doubleRes)
+      { // merge into one loop, probably with precision array (here [12, 3] or maybe [4, 1]) with setable number of iterations
+        prevBest = best;
+        int doubleRange = 3 * 4;
+        for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2 += 4)
+        {
+          for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2 += 4)
+          {
+            int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error);
+            if (error < best.error)
+            {
+              best.set(x2, y2, error);
+            }
+
+          }
+        }
+
+        prevBest = best;
+        doubleRange = 3;
+        for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2++)
+        {
+          for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2++)
+          {
+            int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error);
+            if (error < best.error)
+            {
+              best.set(x2, y2, error);
+            }
+
+          }
+        }
+
+      }
+      mvs.get(blockX / stepSize, blockY / stepSize) = best;
+    }
+  }
+}
+
+void EncTemporalFilter::motionEstimation(Array2D<MotionVector> &mv, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const
+{
+  const int width = m_sourceWidth;
+  const int height = m_sourceHeight;
+  Array2D<MotionVector> mv_0(width / 16, height / 16);
+  Array2D<MotionVector> mv_1(width / 16, height / 16);
+  Array2D<MotionVector> mv_2(width / 16, height / 16);
+
+  PelStorage bufferSub2;
+  PelStorage bufferSub4;
+
+  subsampleLuma(buffer, bufferSub2);
+  subsampleLuma(bufferSub2, bufferSub4);
+
+  motionEstimationLuma(mv_0, origSubsampled4, bufferSub4, 16);
+  motionEstimationLuma(mv_1, origSubsampled2, bufferSub2, 16, &mv_0, 2);
+  motionEstimationLuma(mv_2, orgPic, buffer, 16, &mv_1, 2);
+
+  motionEstimationLuma(mv, orgPic, buffer, 8, &mv_2, 1, true);
+}
+
+void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const
+{
+  static const int lumaBlockSize=8;
+
+  for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++)
+  {
+    const ComponentID compID=(ComponentID)c;
+    const int csx=getComponentScaleX(compID, m_chromaFormatIDC);
+    const int csy=getComponentScaleY(compID, m_chromaFormatIDC);
+    const int blockSizeX = lumaBlockSize>>csx;
+    const int blockSizeY = lumaBlockSize>>csy;
+    const int height = input.bufs[c].height;
+    const int width  = input.bufs[c].width;
+
+    const Pel maxValue = (1<<m_internalBitDepth[toChannelType(compID)])-1;
+
+    const Pel *srcImage = input.bufs[c].buf;
+    const int srcStride  = input.bufs[c].stride;
+
+    Pel *dstImage = output.bufs[c].buf;
+    int dstStride  = output.bufs[c].stride;
+
+    for (int y = 0, blockNumY = 0; y + blockSizeY <= height; y += blockSizeY, blockNumY++)
+    {
+      for (int x = 0, blockNumX = 0; x + blockSizeX <= width; x += blockSizeX, blockNumX++)
+      {
+        const MotionVector &mv = mvs.get(blockNumX,blockNumY);
+        const int dx = mv.x >> csx ;
+        const int dy = mv.y >> csy ;
+        const int xInt = mv.x >> (4+csx) ;
+        const int yInt = mv.y >> (4+csy) ;
+
+        const int *xFilter = m_interpolationFilter[dx & 0xf];
+        const int *yFilter = m_interpolationFilter[dy & 0xf]; // will add 6 bit.
+        const int numFilterTaps=7;
+        const int centreTapOffset=3;
+
+        int tempArray[lumaBlockSize + numFilterTaps][lumaBlockSize];
+
+        for (int by = 1; by < blockSizeY + numFilterTaps; by++)
+        {
+          const int yOffset = y + by + yInt - centreTapOffset;
+          const Pel *sourceRow = srcImage+yOffset*srcStride;
+          for (int bx = 0; bx < blockSizeX; bx++)
+          {
+            int base = x + bx + xInt - centreTapOffset;
+            const Pel *rowStart = sourceRow + base;
+
+            int sum = 0;
+            sum += xFilter[1] * rowStart[1];
+            sum += xFilter[2] * rowStart[2];
+            sum += xFilter[3] * rowStart[3];
+            sum += xFilter[4] * rowStart[4];
+            sum += xFilter[5] * rowStart[5];
+            sum += xFilter[6] * rowStart[6];
+
+            tempArray[by][bx] = sum;
+          }
+        }
+
+        Pel *dstRow = dstImage+y*dstStride;
+        for (int by = 0; by < blockSizeY; by++, dstRow+=dstStride)
+        {
+          Pel *dstPel=dstRow+x;
+          for (int bx = 0; bx < blockSizeX; bx++, dstPel++)
+          {
+            int sum = 0;
+
+            sum += yFilter[1] * tempArray[by + 1][bx];
+            sum += yFilter[2] * tempArray[by + 2][bx];
+            sum += yFilter[3] * tempArray[by + 3][bx];
+            sum += yFilter[4] * tempArray[by + 4][bx];
+            sum += yFilter[5] * tempArray[by + 5][bx];
+            sum += yFilter[6] * tempArray[by + 6][bx];
+
+            sum = (sum + (1 << 11)) >> 12;
+            sum = sum < 0 ? 0 : (sum > maxValue ? maxValue : sum);
+            *dstPel = sum;
+          }
+        }
+      }
+    }
+  }
+}
+
+void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
+  const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo,
+  PelStorage &newOrgPic,
+  double overallStrength) const
+{
+  const int numRefs = int(srcFrameInfo.size());
+  std::vector<PelStorage> correctedPics(numRefs);
+  for (int i = 0; i < numRefs; i++)
+  {
+    correctedPics[i].create(m_chromaFormatIDC, m_area, 0, m_padding);
+    applyMotion(srcFrameInfo[i].mvs, srcFrameInfo[i].picBuffer, correctedPics[i]);
+  }
+
+  int refStrengthRow = 2;
+  if (numRefs == m_range*2)
+  {
+    refStrengthRow = 0;
+  }
+  else if (numRefs == m_range)
+  {
+    refStrengthRow = 1;
+  }
+
+  const double lumaSigmaSq = (m_QP - m_sigmaZeroPoint) * (m_QP - m_sigmaZeroPoint) * m_sigmaMultiplier;
+  const double chromaSigmaSq = 30 * 30;
+
+  for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++)
+  {
+    const ComponentID compID=(ComponentID)c;
+    const int height = orgPic.bufs[c].height;
+    const int width  = orgPic.bufs[c].width;
+    const Pel *srcPelRow = orgPic.bufs[c].buf;
+    const int srcStride = orgPic.bufs[c].stride;
+    Pel *dstPelRow = newOrgPic.bufs[c].buf;
+    const int dstStride = newOrgPic.bufs[c].stride;
+    const double sigmaSq = isChroma(compID)? chromaSigmaSq : lumaSigmaSq;
+    const double weightScaling = overallStrength * (isChroma(compID) ? m_chromaFactor : 0.4);
+    const Pel maxSampleValue = (1<<m_internalBitDepth[toChannelType(compID)])-1;
+    const double bitDepthDiffWeighting=1024.0 / (maxSampleValue+1);
+
+    for (int y = 0; y < height; y++, srcPelRow+=srcStride, dstPelRow+=dstStride)
+    {
+      const Pel *srcPel=srcPelRow;
+      Pel *dstPel=dstPelRow;
+      for (int x = 0; x < width; x++, srcPel++, dstPel++)
+      {
+        const int orgVal = (int) *srcPel;
+        double temporalWeightSum = 1.0;
+        double newVal = (double) orgVal;
+        for (int i = 0; i < numRefs; i++)
+        {
+          const Pel *pCorrectedPelPtr=correctedPics[i].bufs[c].buf+(y*correctedPics[i].bufs[c].stride+x);
+          const int refVal = (int) *pCorrectedPelPtr;
+          double diff = (double)(refVal - orgVal);
+          diff *= bitDepthDiffWeighting;
+          double diffSq = diff * diff;
+          const int index = std::min(1, std::abs(srcFrameInfo[i].origOffset) - 1);
+          const double weight = weightScaling * m_refStrengths[refStrengthRow][index] * exp(-diffSq / (2 * sigmaSq));
+          newVal += weight * refVal;
+          temporalWeightSum += weight;
+        }
+        newVal /= temporalWeightSum;
+        Pel sampleVal = (Pel)round(newVal);
+        sampleVal=(sampleVal<0?0 : (sampleVal>maxSampleValue ? maxSampleValue : sampleVal));
+        *dstPel = sampleVal;
+      }
+    }
+  }
+}
+
+//! \}
+
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h
new file mode 100644
index 0000000000000000000000000000000000000000..b46b265dc783b548290155278be0aa3708ab5f41
--- /dev/null
+++ b/source/Lib/EncoderLib/EncTemporalFilter.h
@@ -0,0 +1,165 @@
+/* The copyright in this software is being made available under the BSD
+* License, included below. This software may be subject to other third party
+* and contributor rights, including patent rights, and no such rights are
+* granted under this license.
+*
+* Copyright (c) 2010-2020, ITU/ISO/IEC
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+*  * Redistributions of source code must retain the above copyright notice,
+*    this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above copyright notice,
+*    this list of conditions and the following disclaimer in the documentation
+*    and/or other materials provided with the distribution.
+*  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+*    be used to endorse or promote products derived from this software without
+*    specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+* THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** \file     EncTemporalFilter.h
+\brief    EncTemporalFilter class (header)
+*/
+
+#ifndef __TEMPORAL_FILTER__
+#define __TEMPORAL_FILTER__
+#include "EncLib.h"
+#include "CommonLib/Buffer.h"
+#include <sstream>
+#include <map>
+#include <deque>
+
+
+//! \ingroup EncoderLib
+//! \{
+
+struct MotionVector
+{
+  int x, y;
+  int error;
+  MotionVector() : x(0), y(0), error(INT_LEAST32_MAX) {}
+  void set(int vectorX, int vectorY, int errorValue) { x = vectorX; y = vectorY; error = errorValue; }
+};
+
+template <class T>
+struct Array2D
+{
+private:
+  int m_width, m_height;
+  std::vector< T > v;
+public:
+  Array2D() : m_width(0), m_height(0), v() { }
+  Array2D(int width, int height, const T& value=T()) : m_width(0), m_height(0), v() { allocate(width, height, value); }
+
+  void allocate(int width, int height, const T& value=T())
+  {
+    m_width=width;
+    m_height=height;
+    v.resize(std::size_t(m_width*m_height), value);
+  }
+
+  T& get(int x, int y)
+  {
+    assert(x<m_width && y<m_height);
+    return v[y*m_width+x];
+  }
+
+  const T& get(int x, int y) const
+  {
+    assert(x<m_width && y<m_height);
+    return v[y*m_width+x];
+  }
+};
+
+struct TemporalFilterSourcePicInfo
+{
+  TemporalFilterSourcePicInfo() : picBuffer(), mvs(), origOffset(0) { }
+  PelStorage            picBuffer;
+  Array2D<MotionVector> mvs;
+  int                   origOffset;
+};
+
+// ====================================================================================================================
+// Class definition
+// ====================================================================================================================
+
+class EncTemporalFilter
+{
+public:
+  EncTemporalFilter();
+  ~EncTemporalFilter() {}
+
+  void init(const int frameSkip,
+    const int inputBitDepth[MAX_NUM_CHANNEL_TYPE],
+    const int msbExtendedBitDepth[MAX_NUM_CHANNEL_TYPE],
+    const int internalBitDepth[MAX_NUM_CHANNEL_TYPE],
+    const int width,
+    const int height,
+    const int *pad,
+    const bool rec709,
+    const std::string &filename,
+    const ChromaFormat inputChroma,
+    const InputColourSpaceConversion colorSpaceConv,
+    const int qp,
+    const std::map<int, double> &temporalFilterStrengths,
+    const bool gopBasedTemporalFilterFutureReference);
+
+  bool filter(PelStorage *orgPic, int frame);
+
+private:
+  // Private static member variables
+  static const int m_range;
+  static const double m_chromaFactor;
+  static const double m_sigmaMultiplier;
+  static const double m_sigmaZeroPoint;
+  static const int m_motionVectorFactor;
+  static const int m_padding;
+  static const int m_interpolationFilter[16][8];
+  static const double m_refStrengths[3][2];
+
+  // Private member variables
+  int m_FrameSkip;
+  std::string m_inputFileName;
+  int m_inputBitDepth[MAX_NUM_CHANNEL_TYPE];
+  int m_MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE];
+  int m_internalBitDepth[MAX_NUM_CHANNEL_TYPE];
+  ChromaFormat m_chromaFormatIDC;
+  int m_sourceWidth;
+  int m_sourceHeight;
+  int m_QP;
+  std::map<int, double> m_temporalFilterStrengths;
+  int m_pad[2];
+  bool m_clipInputVideoToRec709Range;
+  InputColourSpaceConversion m_inputColourSpaceConvert;
+  Area m_area;
+  bool m_gopBasedTemporalFilterFutureReference;
+
+  // Private functions
+  void subsampleLuma(const PelStorage &input, PelStorage &output, const int factor = 2) const;
+  int motionErrorLuma(const PelStorage &orig, const PelStorage &buffer, const int x, const int y, int dx, int dy, const int bs, const int besterror) const;
+  void motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int bs,
+    const Array2D<MotionVector> *previous=0, const int factor = 1, const bool doubleRes = false) const;
+  void motionEstimation(Array2D<MotionVector> &mvs, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const;
+
+  void bilateralFilter(const PelStorage &orgPic, const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, PelStorage &newOrgPic, double overallStrength) const;
+  void applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const;
+}; // END CLASS DEFINITION EncTemporalFilter
+
+   //! \}
+
+
+#endif // __TEMPORAL_FILTER__
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index dd85f3c41916f7b352698f6d45d73c6c28279c28..2a0f143d8303546ea456cb88082bb046efc50c11 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -112,8 +112,12 @@ InterSearch::InterSearch()
   m_affMVList = nullptr;
   m_affMVListSize = 0;
   m_affMVListIdx = 0;
+  m_uniMvList = nullptr;
+  m_uniMvListSize = 0;
+  m_uniMvListIdx = 0;
   m_histBestSbt    = MAX_UCHAR;
   m_histBestMtsIdx = MAX_UCHAR;
+
 }
 
 
@@ -156,6 +160,13 @@ void InterSearch::destroy()
   }
   m_affMVListIdx = 0;
   m_affMVListSize = 0;
+  if (m_uniMvList)
+  {
+    delete[] m_uniMvList;
+    m_uniMvList = nullptr;
+  }
+  m_uniMvListIdx = 0;
+  m_uniMvListSize = 0;
   m_isInitialized = false;
 }
 
@@ -198,7 +209,11 @@ void InterSearch::init( EncCfg*        pcEncCfg,
 {
   CHECK(m_isInitialized, "Already initialized");
   m_numBVs = 0;
-  m_numBV16s = 0;
+  for (int i = 0; i < IBC_NUM_CANDIDATES; i++)
+  {
+    m_defaultCachedBvs.m_bvCands[i].setZero();
+  }
+  m_defaultCachedBvs.currCnt = 0;
   m_pcEncCfg                     = pcEncCfg;
   m_pcTrQuant                    = pcTrQuant;
   m_iSearchRange                 = iSearchRange;
@@ -234,7 +249,7 @@ void InterSearch::init( EncCfg*        pcEncCfg,
   }
 
   const ChromaFormat cform = pcEncCfg->getChromaFormatIdc();
-  InterPrediction::init( pcRdCost, cform );
+  InterPrediction::init( pcRdCost, cform, maxCUHeight );
 
   for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ )
   {
@@ -251,6 +266,13 @@ void InterSearch::init( EncCfg*        pcEncCfg,
     m_affMVList = new AffineMVInfo[m_affMVListMaxSize];
   m_affMVListIdx = 0;
   m_affMVListSize = 0;
+  m_uniMvListMaxSize = 15;
+  if (!m_uniMvList)
+  {
+    m_uniMvList = new BlkUniMvInfo[m_uniMvListMaxSize];
+  }
+  m_uniMvListIdx = 0;
+  m_uniMvListSize = 0;
   m_isInitialized = true;
 }
 
@@ -276,9 +298,9 @@ void InterSearch::resetSavedAffineMotion()
   m_affineMotion.affine6ParaAvail = false;
 }
 
-void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int gbiIdx )
+void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx )
 {
-  if ( ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine6ParaAvail ) && affineType == AFFINEMODEL_6PARAM )
+  if ( ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine6ParaAvail ) && affineType == AFFINEMODEL_6PARAM )
   {
     for ( int i = 0; i < 2; i++ )
     {
@@ -291,7 +313,7 @@ void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2
     m_affineMotion.affine6ParaAvail = true;
   }
 
-  if ( ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail ) && affineType == AFFINEMODEL_4PARAM )
+  if ( ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail ) && affineType == AFFINEMODEL_4PARAM )
   {
     for ( int i = 0; i < 2; i++ )
     {
@@ -747,7 +769,7 @@ Distortion InterSearch::xGetInterPredictionError( PredictionUnit& pu, PelUnitBuf
   DistParam cDistParam;
   cDistParam.applyWeight = false;
 
-  m_pcRdCost->setDistParam( cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass );
+  m_pcRdCost->setDistParam(cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->slice->getDisableSATDForRD());
 
   return (Distortion)cDistParam.distFunc( cDistParam );
 }
@@ -799,12 +821,17 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu,
   int refStride, orgStride;
   int width, height;
 
-  int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
-  int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
+  int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
+  int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
 
   UnitArea allCompBlocks(pu.chromaFormat, (Area)pu.block(COMPONENT_Y));
   for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
   {
+    if (sadBestCand[cand] == std::numeric_limits<Distortion>::max())
+    {
+      continue;
+    }
+
     if ((!cMVCand[cand].getHor()) && (!cMVCand[cand].getVer()))
       continue;
 
@@ -869,10 +896,14 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu,
   return bestCandIdx;
 }
 
-static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, Mv *src, unsigned int sn)
+static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, unsigned int dstTotalLength, Mv *src, unsigned int sn)
 {
-  for (unsigned int cand = 0; cand < sn && dn<IBC_NUM_CANDIDATES; cand++)
+  for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++)
   {
+    if (src[cand] == Mv())
+    {
+      continue;
+    }
     bool found = false;
     for (int j = 0; j<dn; j++)
     {
@@ -931,9 +962,8 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
   m_cDistParam.useMR = false;
   m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode);
 
-
-  const int picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
-  const int picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
+  const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
+  const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
 
 
   {
@@ -942,20 +972,13 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
     Distortion tempSadBest = 0;
 
     int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom;
+    m_numBVs = 0;
+    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt);
 
-    if (roiWidth>8 || roiHeight>8)
-    {
-      m_numBVs = 0;
-    }
-    else if (roiWidth + roiHeight == 16)
-    {
-      m_numBVs = m_numBV16s;
-    }
-
-    Mv cMvPredEncOnly[16];
+    Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
     int nbPreds = 0;
     PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds);
-    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMvPredEncOnly, nbPreds);
+    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds);
 
     for (unsigned int cand = 0; cand < m_numBVs; cand++)
     {
@@ -966,7 +989,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
         && !((yPred < srTop) || (yPred > srBottom))
         && !((xPred < srLeft) || (xPred > srRight)))
       {
-        bool validCand = PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, xPred, yPred, lcuWidth);
+        bool validCand = searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth);
 
         if (validCand)
         {
@@ -987,7 +1010,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
     const int boundY = (0 - roiHeight - puPelOffsetY);
     for (int y = std::max(srchRngVerTop, 0 - cuPelY); y <= boundY; ++y)
     {
-      if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, 0, y, lcuWidth))
+      if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, y, lcuWidth))
       {
         continue;
       }
@@ -1012,7 +1035,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
     const int boundX = std::max(srchRngHorLeft, -cuPelX);
     for (int x = 0 - roiWidth - puPelOffsetX; x >= boundX; --x)
     {
-      if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, 0, lcuWidth))
+      if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, 0, lcuWidth))
       {
         continue;
       }
@@ -1063,7 +1086,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
           if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
             continue;
 
-          if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth))
+          if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
           {
             continue;
           }
@@ -1103,7 +1126,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
           if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
             continue;
 
-          if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth))
+          if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
           {
             continue;
           }
@@ -1160,7 +1183,7 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
           if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
             continue;
 
-          if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, x, y, lcuWidth))
+          if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
           {
             continue;
           }
@@ -1196,14 +1219,20 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
   ruiCost = sadBest;
 
 end:
-  if (roiWidth + roiHeight > 8)
-  {
-    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
+  m_numBVs = 0;
+  m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt);
 
-    if (roiWidth + roiHeight == 32)
+  m_defaultCachedBvs.currCnt = 0;
+  m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
+  m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs);
+
+  for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
+  {
+    if (cMVCand[cand].getHor() == 0 && cMVCand[cand].getVer() == 0)
     {
-      m_numBV16s = m_numBVs;
+      continue;
     }
+    m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand];
   }
 
   return;
@@ -1218,38 +1247,79 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
   Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY
 )
 {
+  const int iPicWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
+  const int iPicHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
+  const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
+  const int           cuPelX = pu.Y().x;
+  const int           cuPelY = pu.Y().y;
+  int                 iRoiWidth = pu.lwidth();
+  int                 iRoiHeight = pu.lheight();
+
+  PelUnitBuf* pBuf = &origBuf;
+
+  //  Search key pattern initialization
+  CPelBuf  tmpPattern = pBuf->Y();
+  CPelBuf* pcPatternKey = &tmpPattern;
+  PelBuf tmpOrgLuma;
+
+  if ((pu.cs->picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
+  {
+    const CompArea &area = pu.blocks[COMPONENT_Y];
+    CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+    tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea);
+    tmpOrgLuma.copyFrom(tmpPattern);
+    tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT());
+    pcPatternKey = (CPelBuf*)&tmpOrgLuma;
+  }
+
+  m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y);
+  Picture* refPic = pu.cu->slice->getPic();
+  const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
+
+  IntTZSearchStruct cStruct;
+  cStruct.pcPatternKey = pcPatternKey;
+  cStruct.iRefStride = refBuf.stride;
+  cStruct.piRefY = refBuf.buf;
+  CHECK(pu.cu->imv == IMV_HPEL, "IF_IBC");
+  cStruct.imvShift = pu.cu->imv << 1;
+  cStruct.subShiftMode = 0; // used by intra pattern search function
+
+  // disable weighted prediction
+  setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice);
+
+  m_pcRdCost->getMotionCost(0);
+  m_pcRdCost->setPredictors(pcMvPred);
+  m_pcRdCost->setCostScale(0);
+
+  m_cDistParam.useMR = false;
+  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode);
   bool buffered = false;
   if (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_BUFFERBV)
   {
     ruiCost = MAX_UINT;
-    const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
-    const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
-    const int   cuPelX = pu.Y().x;
-    const int   cuPelY = pu.Y().y;
-
-    int          iRoiWidth = pu.lwidth();
-    int          iRoiHeight = pu.lheight();
     std::unordered_map<Mv, Distortion>& history = m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord;
-    const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
     for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++)
     {
       const Mv& bv = p->first;
 
       int xBv = bv.hor;
       int yBv = bv.ver;
-      if (PU::isBlockVectorValid(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, 0, 0, xBv, yBv, lcuWidth))
-       {
-        if (p->second < ruiCost)
+      if (searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth))
+      {
+        buffered = true;
+        Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xBv, yBv, pu.cs->sps->getAMVREnabledFlag());
+        m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv;
+        sad += m_cDistParam.distFunc(m_cDistParam);
+        if (sad < ruiCost)
         {
           rcMv = bv;
-          ruiCost = p->second;
-          buffered = true;
+          ruiCost = sad;
         }
-        else if (p->second == ruiCost)
+        else if (sad == ruiCost)
         {
           // stabilise the search through the unordered list
           if (bv.hor < rcMv.getHor()
-              || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer()))
+            || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer()))
           {
             // update the vector.
             rcMv = bv;
@@ -1257,56 +1327,54 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
         }
       }
     }
-  }
-
-  if (!buffered)
-  {
-    Mv        cMvSrchRngLT;
-    Mv        cMvSrchRngRB;
 
-    //cMvSrchRngLT.highPrec = false;
-    //cMvSrchRngRB.highPrec = false;
+    if (buffered)
+    {
+      Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
+      int nbPreds = 0;
+      PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds);
 
-    PelUnitBuf* pBuf = &origBuf;
+      for (unsigned int cand = 0; cand < nbPreds; cand++)
+      {
+        int xPred = cMvPredEncOnly[cand].getHor();
+        int yPred = cMvPredEncOnly[cand].getVer();
 
-    //  Search key pattern initialization
-    CPelBuf  tmpPattern = pBuf->Y();
-    CPelBuf* pcPatternKey = &tmpPattern;
+        if (searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth))
+        {
+          Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag());
+          m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred;
+          sad += m_cDistParam.distFunc(m_cDistParam);
+          if (sad < ruiCost)
+          {
+            rcMv.set(xPred, yPred);
+            ruiCost = sad;
+          }
+          else if (sad == ruiCost)
+          {
+            // stabilise the search through the unordered list
+            if (xPred < rcMv.getHor()
+              || (xPred == rcMv.getHor() && yPred < rcMv.getVer()))
+            {
+              // update the vector.
+              rcMv.set(xPred, yPred);
+            }
+          }
 
-    if ((pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()))
-    {
-      const CompArea &area = pu.blocks[COMPONENT_Y];
-      CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-      PelBuf tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea);
-      tmpOrgLuma.copyFrom(tmpPattern);
-      tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT());
-      pcPatternKey = (CPelBuf*)&tmpOrgLuma;
+          m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad;
+        }
+      }
     }
+  }
 
-    m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y);
-    Picture* refPic = pu.cu->slice->getPic();
-
-    const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
-
-    IntTZSearchStruct cStruct;
-    cStruct.pcPatternKey = pcPatternKey;
-    cStruct.iRefStride = refBuf.stride;
-    cStruct.piRefY = refBuf.buf;
-    cStruct.imvShift = pu.cu->imv << 1;
-    cStruct.subShiftMode = 0; // used by intra pattern search function
+  if (!buffered)
+  {
+    Mv        cMvSrchRngLT;
+    Mv        cMvSrchRngRB;
 
-                              // assume that intra BV is integer-pel precision
+    // assume that intra BV is integer-pel precision
     xSetIntraSearchRange(pu, pu.lwidth(), pu.lheight(), localSearchRangeX, localSearchRangeY, cMvSrchRngLT, cMvSrchRngRB);
 
-    // disable weighted prediction
-    setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice);
-
-    m_pcRdCost->getMotionCost(0, pu.cu->transQuantBypass);
-    m_pcRdCost->setPredictors(pcMvPred);
-    m_pcRdCost->setCostScale(0);
-
     //  Do integer search
-
     xIntraPatternSearch(pu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred);
   }
 }
@@ -1321,14 +1389,15 @@ void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iR
   const int cuPelX = pu.Y().x;
   const int cuPelY = pu.Y().y;
 
-  const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
-  const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
+  const int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
+  const int ctuSizeLog2 = floorLog2(lcuWidth);
+  int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
 
-  srLeft = -std::min(cuPelX, localSearchRangeX);
-  srTop = -std::min(cuPelY, localSearchRangeY);
+  srLeft = -(numLeftCTUs * lcuWidth + (cuPelX % lcuWidth));
+  srTop = -(cuPelY % lcuWidth);
 
-  srRight = std::min(iPicWidth - cuPelX - iRoiWidth, localSearchRangeX);
-  srBottom = std::min(iPicHeight - cuPelY - iRoiHeight, localSearchRangeY);
+  srRight = lcuWidth - (cuPelX % lcuWidth) - iRoiWidth;
+  srBottom = lcuWidth - (cuPelY % lcuWidth) - iRoiHeight;
 
   rcMvSrchRngLT.setHor(srLeft);
   rcMvSrchRngLT.setVer(srTop);
@@ -1339,10 +1408,14 @@ void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iR
   rcMvSrchRngRB <<= 2;
   xClipMv(rcMvSrchRngLT, pu.cu->lumaPos(),
          pu.cu->lumaSize(),
-         sps);
+         sps
+      , *pu.cs->pps
+  );
   xClipMv(rcMvSrchRngRB, pu.cu->lumaPos(),
          pu.cu->lumaSize(),
-         sps);
+         sps
+      , *pu.cs->pps
+  );
   rcMvSrchRngLT >>= 2;
   rcMvSrchRngRB >>= 2;
 }
@@ -1370,14 +1443,23 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
     Mv    cMv, cMvPred[2];
     AMVPInfo amvpInfo;
     PU::fillIBCMvpCand(pu, amvpInfo);
-    cMvPred[0].set(amvpInfo.mvCand[0].getHor() >> (2), amvpInfo.mvCand[0].getVer() >> (2)); // store in full pel accuracy, shift before use in search
-    cMvPred[1].set(amvpInfo.mvCand[1].getHor() >> (2), amvpInfo.mvCand[1].getVer() >> (2));
+    // store in full pel accuracy, shift before use in search
+    cMvPred[0] = amvpInfo.mvCand[0];
+    cMvPred[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
+    cMvPred[1] = amvpInfo.mvCand[1];
+    cMvPred[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
 
     int iBvpNum = 2;
     int bvpIdxBest = 0;
     cMv.setZero();
     Distortion cost = 0;
 
+    if ( pu.cu->slice->getPicHeader()->getMaxNumIBCMergeCand() == 1 )
+    {
+      iBvpNum = 1;
+      cMvPred[1] = cMvPred[0];
+    }
+
     if (m_pcEncCfg->getIBCHashSearch())
     {
       xxIBCHashSearch(pu, cMvPred, iBvpNum, cMv, bvpIdxBest, ibcHashMap);
@@ -1426,14 +1508,14 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
       {
         mvPredQuadPel = amvpInfo4Pel.mvCand[bvpIdxTemp];// cMvPred[bvpIdxTemp];
 
-        mvPredQuadPel >>= (4);
+        mvPredQuadPel.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_4PEL);
 
         m_pcRdCost->setPredictor(mvPredQuadPel);
 
         bitsBVPQP = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor() >> 2, cMv.getVer() >> 2, 0);
 
       }
-      mvPredQuadPel <<= (2);
+      mvPredQuadPel.changePrecision(MV_PRECISION_4PEL, MV_PRECISION_INT);
       if (bitsBVPQP < bitsBVPBest && cMv != mvPredQuadPel)
       {
         bitsBVPBest = bitsBVPQP;
@@ -1445,8 +1527,8 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
 
     }
 
-    pu.bv = cMv;
-    cMv <<= (2);
+    pu.bv = cMv; // bv is always at integer accuracy
+    cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
     pu.mv[REF_PIC_LIST_0] = cMv; // store in fractional pel accuracy
 
     pu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest;
@@ -1463,13 +1545,8 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
     if (cu.cs->sps->getAMVREnabledFlag())
       assert(pu.cu->imv>0 || pu.mvd[REF_PIC_LIST_0] == Mv());
 
-    if (!cu.cs->sps->getAMVREnabledFlag())
-      pu.mvd[REF_PIC_LIST_0] >>= (2);
-
     pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
-    pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
 
-    m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[pu.bv] = cost;
   }
 
   return true;
@@ -1488,21 +1565,21 @@ void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred,
     const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
     const int   cuPelX = pu.Y().x;
     const int   cuPelY = pu.Y().y;
-    const int   picWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
-    const int   picHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
+    const int   picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
+    const int   picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
     int         roiWidth = pu.lwidth();
     int         roiHeight = pu.lheight();
 
     for (std::vector<Position>::iterator pos = candPos.begin(); pos != candPos.end(); pos++)
     {
       Position bottomRight = pos->offset(pu.Y().width - 1, pu.Y().height - 1);
-      if (pu.cs->isDecomp(*pos, pu.cs->chType) && pu.cs->isDecomp(bottomRight, pu.cs->chType))
+      if (pu.cs->isDecomp(*pos, CHANNEL_TYPE_LUMA) && pu.cs->isDecomp(bottomRight, CHANNEL_TYPE_LUMA))
       {
         Position tmp = *pos - pu.Y().pos();
         Mv candMv;
         candMv.set(tmp.x, tmp.y);
 
-        if (!PU::isBlockVectorValid(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, 0, candMv.getHor(), candMv.getVer(), lcuWidth))
+        if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, candMv.getHor(), candMv.getVer(), lcuWidth))
         {
           continue;
         }
@@ -1527,7 +1604,9 @@ void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred,
             int imvShift = 2;
             int offset = 1 << (imvShift - 1);
 
-            mvPredQuadPel.set(((mvPred[n].hor + offset) >> 2), ((mvPred[n].ver + offset) >> 2));
+            int x = (mvPred[n].hor + offset - (mvPred[n].hor >= 0)) >> 2;
+            int y = (mvPred[n].ver + offset - (mvPred[n].ver >= 0)) >> 2;
+            mvPredQuadPel.set(x, y);
 
             m_pcRdCost->setPredictor(mvPredQuadPel);
 
@@ -1602,83 +1681,152 @@ void InterSearch::selectMatchesInter(const MapIterator& itBegin, int count, std:
     }
   }
 }
-
-int InterSearch::xHashInterPredME(const PredictionUnit& pu, RefPicList currRefPicList, int currRefPicIndex, Mv bestMv[5])
+void InterSearch::selectRectangleMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& listBlockHash, const BlockHash& currBlockHash, int width, int height, int idxNonSimple, unsigned int* &hashValues, int baseNum, int picWidth, int picHeight, bool isHorizontal, uint16_t* curHashPic)
 {
-  int width = pu.cu->lumaSize().width;
-  int height = pu.cu->lumaSize().height;
-  int xPos = pu.cu->lumaPos().x;
-  int yPos = pu.cu->lumaPos().y;
+  const int maxReturnNumber = 5;
+  int baseSize = min(width, height);
+  unsigned int crcMask = 1 << 16;
+  crcMask -= 1;
 
-  uint32_t hashValue1;
-  uint32_t hashValue2;
+  listBlockHash.clear();
+  std::list<int> listCost;
+  listCost.clear();
 
-  if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2))
-  {
-    return 0;
-  }
-  BlockHash currBlockHash;
-  currBlockHash.x = xPos;
-  currBlockHash.y = yPos;
-  currBlockHash.hashValue2 = hashValue2;
+  MapIterator it = itBegin;
 
-  int count = static_cast<int>(pu.cu->slice->getRefPic(currRefPicList, currRefPicIndex)->getHashMap()->count(hashValue1));
-  if (count == 0)
+  for (int i = 0; i < count; i++, it++)
   {
-    return 0;
-  }
+    if ((*it).hashValue2 != currBlockHash.hashValue2)
+    {
+      continue;
+    }
+    int xRef = (*it).x;
+    int yRef = (*it).y;
+    if (isHorizontal)
+    {
+      xRef -= idxNonSimple * baseSize;
+    }
+    else
+    {
+      yRef -= idxNonSimple * baseSize;
+    }
+    if (xRef < 0 || yRef < 0 || xRef + width >= picWidth || yRef + height >= picHeight)
+    {
+      continue;
+    }
+    //check Other baseSize hash values
+    uint16_t* refHashValue = curHashPic + yRef * picWidth + xRef;
+    bool isSame = true;
 
-  list<BlockHash> listBlockHash;
-  selectMatchesInter(pu.cu->slice->getRefPic(currRefPicList, currRefPicIndex)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash);
+    for (int k = 0; k < baseNum; k++)
+    {
+      if ((*refHashValue) != (uint16_t)(hashValues[k] & crcMask))
+      {
+        isSame = false;
+        break;
+      }
+      refHashValue += (isHorizontal ? baseSize : (baseSize*picWidth));
+    }
+    if (!isSame)
+    {
+      continue;
+    }
 
-  if (listBlockHash.empty())
-  {
-    return 0;
-  }
+    int currCost = RdCost::xGetExpGolombNumberOfBits(xRef - currBlockHash.x) +
+      RdCost::xGetExpGolombNumberOfBits(yRef - currBlockHash.y);
 
-  int totalSize = 0;
-  list<BlockHash>::iterator it = listBlockHash.begin();
-  for (int i = 0; i < 5 && i < listBlockHash.size(); i++, it++)
-  {
-    bestMv[i].set((*it).x - currBlockHash.x, (*it).y - currBlockHash.y);
-    totalSize++;
-  }
+    BlockHash refBlockHash;
+    refBlockHash.hashValue2 = (*it).hashValue2;
+    refBlockHash.x = xRef;
+    refBlockHash.y = yRef;
 
-  return totalSize;
+    if (listBlockHash.size() < maxReturnNumber)
+    {
+      addToSortList(listBlockHash, listCost, currCost, refBlockHash);
+    }
+    else if (!listCost.empty() && currCost < listCost.back())
+    {
+      listCost.pop_back();
+      listBlockHash.pop_back();
+      addToSortList(listBlockHash, listCost, currCost, refBlockHash);
+    }
+  }
 }
 
-bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch)
+bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch)
 {
   int width = pu.cu->lumaSize().width;
   int height = pu.cu->lumaSize().height;
+
+  int baseSize = min(width, height);
+  bool isHorizontal = true;;
+  int baseNum = 0;
+  if (height < width)
+  {
+    isHorizontal = true;
+    baseNum = 1 << (floorLog2(width) - floorLog2(height));
+  }
+  else
+  {
+    isHorizontal = false;
+    baseNum = 1 << (floorLog2(height) - floorLog2(width));
+  }
+
   int xPos = pu.cu->lumaPos().x;
   int yPos = pu.cu->lumaPos().y;
+  const int currStride = pu.cs->picture->getOrigBuf().get(COMPONENT_Y).stride;
+  const Pel* curPel = pu.cs->picture->getOrigBuf().get(COMPONENT_Y).buf + yPos * currStride + xPos;
+  int picWidth = pu.cu->slice->getPPS()->getPicWidthInLumaSamples();
+  int picHeight = pu.cu->slice->getPPS()->getPicHeightInLumaSamples();
 
-  uint32_t hashValue1;
-  uint32_t hashValue2;
-  Distortion bestCost = UINT64_MAX;
+  int xBase = xPos;
+  int yBase = yPos;
+  const Pel* basePel = curPel;
+  int idxNonSimple = -1;
+  unsigned int* hashValue1s = new unsigned int[baseNum];
+  unsigned int* hashValue2s = new unsigned int[baseNum];
 
-  if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2))
+  for (int k = 0; k < baseNum; k++)
   {
-    return false;
+    if (isHorizontal)
+    {
+      xBase = xPos + k * baseSize;
+      basePel = curPel + k * baseSize;
+    }
+    else
+    {
+      yBase = yPos + k * baseSize;
+      basePel = curPel + k * baseSize * currStride;
+    }
+
+    if (idxNonSimple == -1 && !TComHash::isHorizontalPerfectLuma(basePel, currStride, baseSize, baseSize) && !TComHash::isVerticalPerfectLuma(basePel, currStride, baseSize, baseSize))
+    {
+      idxNonSimple = k;
+    }
+    TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), baseSize, baseSize, xBase, yBase, pu.cu->slice->getSPS()->getBitDepths(), hashValue1s[k], hashValue2s[k]);
+  }
+  if (idxNonSimple == -1)
+  {
+    idxNonSimple = 0;
   }
 
+  Distortion bestCost = UINT64_MAX;
+
   BlockHash currBlockHash;
-  currBlockHash.x = xPos;
+  currBlockHash.x = xPos;//still use the first base block location
   currBlockHash.y = yPos;
-  currBlockHash.hashValue2 = hashValue2;
+
+  currBlockHash.hashValue2 = hashValue2s[idxNonSimple];
 
   m_pcRdCost->setDistParam(m_cDistParam, pu.cs->getOrgBuf(pu).Y(), 0, 0, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, false);
 
   int imvBest = 0;
-
   int numPredDir = pu.cu->slice->isInterP() ? 1 : 2;
   for (int refList = 0; refList < numPredDir; refList++)
   {
     RefPicList eRefPicList = (refList == 0) ? REF_PIC_LIST_0 : REF_PIC_LIST_1;
     int refPicNumber = pu.cu->slice->getNumRefIdx(eRefPicList);
 
-
     for (int refIdx = 0; refIdx < refPicNumber; refIdx++)
     {
       int bitsOnRefIdx = 1;
@@ -1690,47 +1838,63 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
           bitsOnRefIdx--;
         }
       }
+      m_numHashMVStoreds[eRefPicList][refIdx] = 0;
+
+      const std::pair<int, int>& scaleRatio = pu.cu->slice->getScalingRatio( eRefPicList, refIdx );
+      if( scaleRatio != SCALE_1X )
+      {
+        continue;
+      }
+
+      CHECK( pu.cu->slice->getRefPic( eRefPicList, refIdx )->getHashMap() == nullptr, "Hash table is not initialized" );
 
       if (refList == 0 || pu.cu->slice->getList1IdxToList0Idx(refIdx) < 0)
       {
-        int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1));
+        int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1s[idxNonSimple]));
         if (count == 0)
         {
           continue;
         }
 
         list<BlockHash> listBlockHash;
-        selectMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash);
+        selectRectangleMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1s[idxNonSimple]), count, listBlockHash, currBlockHash, width, height, idxNonSimple, hashValue2s, baseNum, picWidth, picHeight, isHorizontal, pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getHashPic(baseSize));
 
+        m_numHashMVStoreds[eRefPicList][refIdx] = int(listBlockHash.size());
         if (listBlockHash.empty())
         {
           continue;
         }
         AMVPInfo currAMVPInfoPel;
         AMVPInfo currAMVPInfo4Pel;
+        AMVPInfo currAMVPInfoQPel;
         pu.cu->imv = 2;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel);
         pu.cu->imv = 1;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoPel);
-        AMVPInfo currAMVPInfoQPel;
         pu.cu->imv = 0;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoQPel);
-        CHECK(currAMVPInfoPel.numCand <= 1, "Wrong")
-
-        const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf().get(COMPONENT_Y).buf;
-        const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf().get(COMPONENT_Y).stride;
+        for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++)
+        {
+          currAMVPInfoQPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+          currAMVPInfoPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+          currAMVPInfo4Pel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+        }
 
+        const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).buf;
+        const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).stride;
         m_cDistParam.cur.stride = refStride;
 
-        m_pcRdCost->selectMotionLambda(pu.cu->transQuantBypass);
+        m_pcRdCost->selectMotionLambda( );
         m_pcRdCost->setCostScale(0);
 
         list<BlockHash>::iterator it;
+        int countMV = 0;
         for (it = listBlockHash.begin(); it != listBlockHash.end(); ++it)
         {
           int curMVPIdx = 0;
           unsigned int curMVPbits = MAX_UINT;
           Mv cMv((*it).x - currBlockHash.x, (*it).y - currBlockHash.y);
+          m_hashMVStoreds[eRefPicList][refIdx][countMV++] = cMv;
           cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER);
 
           for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++)
@@ -1775,7 +1939,6 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
               }
             }
           }
-
           curMVPbits += bitsOnRefIdx;
 
           m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x;
@@ -1815,6 +1978,8 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
       }
     }
   }
+  delete[] hashValue1s;
+  delete[] hashValue2s;
   pu.cu->imv = imvBest;
   if (bestMvd == Mv(0, 0))
   {
@@ -1824,74 +1989,272 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
   return (bestCost < MAX_INT);
 }
 
-bool InterSearch::predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch)
+bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch)
 {
-  Mv       bestMv, bestMvd;
-  RefPicList   bestRefPicList;
-  int          bestRefIndex;
-  int          bestMVPIndex;
-
-  auto &pu = *cu.firstPU;
-
-  Mv cMvZero;
-  pu.mv[REF_PIC_LIST_0] = Mv();
-  pu.mv[REF_PIC_LIST_1] = Mv();
-  pu.mvd[REF_PIC_LIST_0] = cMvZero;
-  pu.mvd[REF_PIC_LIST_1] = cMvZero;
-  pu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
-  pu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
-  pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
-  pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
-  pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
-  pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
-
-  if (xHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch))
+  int width = pu.cu->lumaSize().width;
+  int height = pu.cu->lumaSize().height;
+  if (width != height)
   {
-    pu.interDir = static_cast<int>(bestRefPicList) + 1;
-    pu.mv[bestRefPicList] = bestMv;
-    pu.mv[bestRefPicList].hor <<= MV_FRACTIONAL_BITS_DIFF;
-    pu.mv[bestRefPicList].ver <<= MV_FRACTIONAL_BITS_DIFF;
-
-    pu.mvd[bestRefPicList] = bestMvd;
-    pu.refIdx[bestRefPicList] = bestRefIndex;
-    pu.mvpIdx[bestRefPicList] = bestMVPIndex;
+    return xRectHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch);
+  }
+  int xPos = pu.cu->lumaPos().x;
+  int yPos = pu.cu->lumaPos().y;
 
-    pu.mvpNum[bestRefPicList] = 2;
+  uint32_t hashValue1;
+  uint32_t hashValue2;
+  Distortion bestCost = UINT64_MAX;
 
-    PU::spanMotionInfo(pu);
-    PelUnitBuf predBuf = pu.cs->getPredBuf(pu);
-    motionCompensation(pu, predBuf, REF_PIC_LIST_X);
-    return true;
-  }
-  else
+  if (!TComHash::getBlockHashValue((pu.cs->picture->getOrigBuf()), width, height, xPos, yPos, pu.cu->slice->getSPS()->getBitDepths(), hashValue1, hashValue2))
   {
     return false;
   }
 
-  return true;
-}
-
-
-//! search of the best candidate for inter prediction
-void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
-{
-  CodingStructure& cs = *cu.cs;
+  BlockHash currBlockHash;
+  currBlockHash.x = xPos;
+  currBlockHash.y = yPos;
+  currBlockHash.hashValue2 = hashValue2;
 
-  AMVPInfo     amvp[2];
-  Mv           cMvSrchRngLT;
-  Mv           cMvSrchRngRB;
+  m_pcRdCost->setDistParam(m_cDistParam, pu.cs->getOrgBuf(pu).Y(), 0, 0, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, false);
 
-  Mv           cMvZero;
+  int imvBest = 0;
 
-  Mv           cMv[2];
-  Mv           cMvBi[2];
-  Mv           cMvTemp[2][33];
-  Mv           cMvHevcTemp[2][33];
-  int          iNumPredDir = cs.slice->isInterP() ? 1 : 2;
+  int numPredDir = pu.cu->slice->isInterP() ? 1 : 2;
+  for (int refList = 0; refList < numPredDir; refList++)
+  {
+    RefPicList eRefPicList = (refList == 0) ? REF_PIC_LIST_0 : REF_PIC_LIST_1;
+    int refPicNumber = pu.cu->slice->getNumRefIdx(eRefPicList);
 
-  Mv           cMvPred[2][33];
 
-  Mv           cMvPredBi[2][33];
+    for (int refIdx = 0; refIdx < refPicNumber; refIdx++)
+    {
+      int bitsOnRefIdx = 1;
+      if (refPicNumber > 1)
+      {
+        bitsOnRefIdx += refIdx + 1;
+        if (refIdx == refPicNumber - 1)
+        {
+          bitsOnRefIdx--;
+        }
+      }
+      m_numHashMVStoreds[eRefPicList][refIdx] = 0;
+
+      const std::pair<int, int>& scaleRatio = pu.cu->slice->getScalingRatio( eRefPicList, refIdx );
+      if( scaleRatio != SCALE_1X )
+      {
+        continue;
+      }
+
+      CHECK( pu.cu->slice->getRefPic( eRefPicList, refIdx )->getHashMap() == nullptr, "Hash table is not initialized" );
+
+      if (refList == 0 || pu.cu->slice->getList1IdxToList0Idx(refIdx) < 0)
+      {
+        int count = static_cast<int>(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->count(hashValue1));
+        if (count == 0)
+        {
+          continue;
+        }
+
+        list<BlockHash> listBlockHash;
+        selectMatchesInter(pu.cu->slice->getRefPic(eRefPicList, refIdx)->getHashMap()->getFirstIterator(hashValue1), count, listBlockHash, currBlockHash);
+        m_numHashMVStoreds[eRefPicList][refIdx] = (int)listBlockHash.size();
+        if (listBlockHash.empty())
+        {
+          continue;
+        }
+        AMVPInfo currAMVPInfoPel;
+        AMVPInfo currAMVPInfo4Pel;
+        pu.cu->imv = 2;
+        PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel);
+        pu.cu->imv = 1;
+        PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoPel);
+        AMVPInfo currAMVPInfoQPel;
+        pu.cu->imv = 0;
+        PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoQPel);
+        CHECK(currAMVPInfoPel.numCand <= 1, "Wrong")
+        for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++)
+        {
+          currAMVPInfoQPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+          currAMVPInfoPel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+          currAMVPInfo4Pel.mvCand[mvpIdxTemp].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+        }
+
+        const Pel* refBufStart = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).buf;
+        const int refStride = pu.cu->slice->getRefPic(eRefPicList, refIdx)->getRecoBuf(pu.cs->sps->getWrapAroundEnabledFlag()).get(COMPONENT_Y).stride;
+
+        m_cDistParam.cur.stride = refStride;
+
+        m_pcRdCost->selectMotionLambda( );
+        m_pcRdCost->setCostScale(0);
+
+        list<BlockHash>::iterator it;
+        int countMV = 0;
+        for (it = listBlockHash.begin(); it != listBlockHash.end(); ++it)
+        {
+          int curMVPIdx = 0;
+          unsigned int curMVPbits = MAX_UINT;
+          Mv cMv((*it).x - currBlockHash.x, (*it).y - currBlockHash.y);
+          m_hashMVStoreds[eRefPicList][refIdx][countMV++] = cMv;
+          cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER);
+
+          for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++)
+          {
+            Mv cMvPredPel = currAMVPInfoQPel.mvCand[mvpIdxTemp];
+            m_pcRdCost->setPredictor(cMvPredPel);
+
+            unsigned int tempMVPbits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 0);
+
+            if (tempMVPbits < curMVPbits)
+            {
+              curMVPbits = tempMVPbits;
+              curMVPIdx = mvpIdxTemp;
+              pu.cu->imv = 0;
+            }
+
+            if (pu.cu->slice->getSPS()->getAMVREnabledFlag())
+            {
+              unsigned int bitsMVP1Pel = MAX_UINT;
+              Mv mvPred1Pel = currAMVPInfoPel.mvCand[mvpIdxTemp];
+              m_pcRdCost->setPredictor(mvPred1Pel);
+              bitsMVP1Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 2);
+              if (bitsMVP1Pel < curMVPbits)
+              {
+                curMVPbits = bitsMVP1Pel;
+                curMVPIdx = mvpIdxTemp;
+                pu.cu->imv = 1;
+              }
+
+              if ((cMv.getHor() % 16 == 0) && (cMv.getVer() % 16 == 0))
+              {
+                unsigned int bitsMVP4Pel = MAX_UINT;
+                Mv mvPred4Pel = currAMVPInfo4Pel.mvCand[mvpIdxTemp];
+                m_pcRdCost->setPredictor(mvPred4Pel);
+                bitsMVP4Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 4);
+                if (bitsMVP4Pel < curMVPbits)
+                {
+                  curMVPbits = bitsMVP4Pel;
+                  curMVPIdx = mvpIdxTemp;
+                  pu.cu->imv = 2;
+                }
+              }
+            }
+          }
+
+          curMVPbits += bitsOnRefIdx;
+
+          m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x;
+          Distortion currSad = m_cDistParam.distFunc(m_cDistParam);
+          Distortion currCost = currSad + m_pcRdCost->getCost(curMVPbits);
+
+          if (!isPerfectMatch)
+          {
+            if (pu.cu->slice->getRefPic(eRefPicList, refIdx)->slices[0]->getSliceQp() <= pu.cu->slice->getSliceQp())
+            {
+              isPerfectMatch = true;
+            }
+          }
+
+          if (currCost < bestCost)
+          {
+            bestCost = currCost;
+            bestRefPicList = eRefPicList;
+            bestRefIndex = refIdx;
+            bestMv = cMv;
+            bestMVPIndex = curMVPIdx;
+            imvBest = pu.cu->imv;
+            if (pu.cu->imv == 2)
+            {
+              bestMvd = cMv - currAMVPInfo4Pel.mvCand[curMVPIdx];
+            }
+            else if (pu.cu->imv == 1)
+            {
+              bestMvd = cMv - currAMVPInfoPel.mvCand[curMVPIdx];
+            }
+            else
+            {
+              bestMvd = cMv - currAMVPInfoQPel.mvCand[curMVPIdx];
+            }
+          }
+        }
+      }
+    }
+  }
+  pu.cu->imv = imvBest;
+  if (bestMvd == Mv(0, 0))
+  {
+    pu.cu->imv = 0;
+    return false;
+  }
+  return (bestCost < MAX_INT);
+}
+
+bool InterSearch::predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch)
+{
+  Mv       bestMv, bestMvd;
+  RefPicList   bestRefPicList;
+  int          bestRefIndex;
+  int          bestMVPIndex;
+
+  auto &pu = *cu.firstPU;
+
+  Mv cMvZero;
+  pu.mv[REF_PIC_LIST_0] = Mv();
+  pu.mv[REF_PIC_LIST_1] = Mv();
+  pu.mvd[REF_PIC_LIST_0] = cMvZero;
+  pu.mvd[REF_PIC_LIST_1] = cMvZero;
+  pu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
+  pu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
+  pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
+  pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
+  pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
+  pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
+
+  if (xHashInterEstimation(pu, bestRefPicList, bestRefIndex, bestMv, bestMvd, bestMVPIndex, isPerfectMatch))
+  {
+    pu.interDir = static_cast<int>(bestRefPicList) + 1;
+    pu.mv[bestRefPicList] = bestMv;
+    pu.mv[bestRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+
+    pu.mvd[bestRefPicList] = bestMvd;
+    pu.mvd[bestRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+    pu.refIdx[bestRefPicList] = bestRefIndex;
+    pu.mvpIdx[bestRefPicList] = bestMVPIndex;
+
+    pu.mvpNum[bestRefPicList] = 2;
+
+    PU::spanMotionInfo(pu);
+    PelUnitBuf predBuf = pu.cs->getPredBuf(pu);
+    motionCompensation(pu, predBuf, REF_PIC_LIST_X);
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+
+  return true;
+}
+
+
+//! search of the best candidate for inter prediction
+void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
+{
+  CodingStructure& cs = *cu.cs;
+
+  AMVPInfo     amvp[2];
+  Mv           cMvSrchRngLT;
+  Mv           cMvSrchRngRB;
+
+  Mv           cMvZero;
+
+  Mv           cMv[2];
+  Mv           cMvBi[2];
+  Mv           cMvTemp[2][33];
+  Mv           cMvHevcTemp[2][33];
+  int          iNumPredDir = cs.slice->isInterP() ? 1 : 2;
+
+  Mv           cMvPred[2][33];
+
+  Mv           cMvPredBi[2][33];
   int          aaiMvpIdxBi[2][33];
 
   int          aaiMvpIdx[2][33];
@@ -1914,8 +2277,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   int          bestBiPMvpL1    = 0;
   Distortion   biPDistTemp     = std::numeric_limits<Distortion>::max();
 
-  uint8_t      gbiIdx          = (cu.cs->slice->isInterB() ? cu.GBiIdx : GBI_DEFAULT);
-  bool         enforceGBiPred = false;
+  uint8_t      bcwIdx          = (cu.cs->slice->isInterB() ? cu.BcwIdx : BCW_DEFAULT);
+  bool         enforceBcwPred = false;
   MergeCtx     mergeCtx;
 
   // Loop over Prediction Units
@@ -1925,9 +2288,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   WPScalingParam *wp0;
   WPScalingParam *wp1;
   int tryBipred = 0;
-  bool checkAffine    = pu.cu->imv == 0 || pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag();
-  bool checkNonAffine = pu.cu->imv == 0 || ( pu.cu->slice->getSPS()->getAMVREnabledFlag() &&
-                                             pu.cu->imv <= (pu.cu->slice->getSPS()->getAMVREnabledFlag() ? IMV_4PEL : 0));
+  bool checkAffine    = (pu.cu->imv == 0 || pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag()) && pu.cu->imv != IMV_HPEL;
+  bool checkNonAffine = pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL || (pu.cu->slice->getSPS()->getAMVREnabledFlag() &&
+                                            pu.cu->imv <= (pu.cu->slice->getSPS()->getAMVREnabledFlag() ? IMV_4PEL : 0));
   CodingUnit *bestCU  = pu.cu->cs->bestCS != nullptr ? pu.cu->cs->bestCS->getCU( CHANNEL_TYPE_LUMA ) : nullptr;
   bool trySmvd        = ( bestCU != nullptr && pu.cu->imv == 2 && checkAffine ) ? ( !bestCU->firstPU->mergeFlag && !bestCU->affine ) : true;
   if ( pu.cu->imv && bestCU != nullptr && checkAffine )
@@ -1941,6 +2304,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   }
 
   {
+    if (pu.cu->cs->bestParent != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA) != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA)->affine == false)
+    {
+      m_skipPROF = true;
+    }
+    m_encOnly = true;
     // motion estimation only evaluates luma component
     m_maxCompIDToPred = MAX_NUM_COMPONENT;
 //    m_maxCompIDToPred = COMPONENT_Y;
@@ -1980,9 +2348,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
     xGetBlkBits( cs.slice->isInterP(), puIdx, uiLastMode, uiMbBits );
 
-    m_pcRdCost->selectMotionLambda( cu.transQuantBypass );
+    m_pcRdCost->selectMotionLambda( );
 
-    unsigned imvShift = pu.cu->imv << 1;
+    unsigned imvShift = pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1);
     if ( checkNonAffine )
     {
       //  Uni-directional prediction
@@ -2005,7 +2373,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList];
           aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList];
 
-          if(cs.slice->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist)
+          if(cs.picHeader->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist)
           {
             bestBiPDist = biPDistTemp;
             bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
@@ -2024,7 +2392,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
               uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->getList1IdxToList0Idx( iRefIdxTemp )] );
               /*correct the bit-rate part of the current ref*/
               m_pcRdCost->setPredictor  ( cMvPred[iRefList][iRefIdxTemp] );
-              uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer(), imvShift );
+              uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer(), imvShift + MV_FRACTIONAL_BITS_DIFF );
               /*calculate the correct cost*/
               uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
             }
@@ -2037,7 +2405,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           {
             xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] );
           }
-          if( cu.cs->sps->getUseGBi() && cu.GBiIdx == GBI_DEFAULT && cu.cs->slice->isInterB() )
+          if( cu.cs->sps->getUseBcw() && cu.BcwIdx == BCW_DEFAULT && cu.cs->slice->isInterB() )
           {
             const bool checkIdentical = true;
             m_uniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
@@ -2073,20 +2441,23 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         }
       }
 
-      if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine()
-        && checkAffine
-        && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast())
-        )
+      ::memcpy(cMvHevcTemp, cMvTemp, sizeof(cMvTemp));
+      if (cu.imv == 0 && (!cu.slice->getSPS()->getUseBcw() || bcwIdx == BCW_DEFAULT))
       {
-        ::memcpy( cMvHevcTemp, cMvTemp, sizeof( cMvTemp ) );
+        insertUniMvCands(pu.Y(), cMvTemp);
+
+        unsigned idx1, idx2, idx3, idx4;
+        getAreaIdx(cu.Y(), *cu.slice->getPPS()->pcv, idx1, idx2, idx3, idx4);
+        ::memcpy(&(g_reusedUniMVs[idx1][idx2][idx3][idx4][0][0]), cMvTemp, 2 * 33 * sizeof(Mv));
+        g_isReusedUniMVsFilled[idx1][idx2][idx3][idx4] = true;
       }
       //  Bi-predictive Motion estimation
       if( ( cs.slice->isInterB() ) && ( PU::isBipredRestriction( pu ) == false )
-        && (cu.slice->getCheckLDC() || gbiIdx == GBI_DEFAULT || !m_affineModeSelected || !m_pcEncCfg->getUseGBiFast())
+        && (cu.slice->getCheckLDC() || bcwIdx == BCW_DEFAULT || !m_affineModeSelected || !m_pcEncCfg->getUseBcwFast())
         )
       {
         bool doBiPred = true;
-		tryBipred = 1;
+        tryBipred = 1;
         cMvBi[0] = cMv[0];
         cMvBi[1] = cMv[1];
         iRefIdxBi[0] = iRefIdx[0];
@@ -2097,7 +2468,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
         uint32_t uiMotBits[2];
 
-        if(cs.slice->getMvdL1ZeroFlag())
+        if(cs.picHeader->getMvdL1ZeroFlag())
         {
           xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], &amvp[REF_PIC_LIST_1]);
           aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
@@ -2106,23 +2477,22 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           cMvBi    [1] = cMvPredBi[1][bestBiPRefIdxL1];
           iRefIdxBi[1] = bestBiPRefIdxL1;
           pu.mv    [REF_PIC_LIST_1] = cMvBi[1];
-          pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
           pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
           pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
 
-            if( m_pcEncCfg->getMCTSEncConstraint() )
+          if( m_pcEncCfg->getMCTSEncConstraint() )
+          {
+            Mv restrictedMv = pu.mv[REF_PIC_LIST_1];
+            Area curTileAreaRestricted;
+            curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu );
+            MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps );
+            // If sub-pel filter samples are not inside of allowed area
+            if( restrictedMv != pu.mv[REF_PIC_LIST_1] )
             {
-              Mv restrictedMv = pu.mv[REF_PIC_LIST_1];
-              Area curTileAreaRestricted;
-              curTileAreaRestricted = pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu );
-              MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps );
-              // If sub-pel filter samples are not inside of allowed area
-              if( restrictedMv != pu.mv[REF_PIC_LIST_1] )
-              {
-                uiCostBi = std::numeric_limits<Distortion>::max();
-                doBiPred = false;
-              }
+              uiCostBi = std::numeric_limits<Distortion>::max();
+              doBiPred = false;
             }
+          }
           PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(cu, pu) );
           motionCompensation( pu, predBufTmp, REF_PIC_LIST_1 );
 
@@ -2157,12 +2527,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         int iNumIter = 4;
 
         // fast encoder setting: only one iteration
-        if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || cs.slice->getMvdL1ZeroFlag() )
+        if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || cs.picHeader->getMvdL1ZeroFlag() )
         {
           iNumIter = 1;
         }
 
-        enforceGBiPred = (gbiIdx != GBI_DEFAULT);
+        enforceBcwPred = (bcwIdx != BCW_DEFAULT);
         for ( int iIter = 0; iIter < iNumIter; iIter++ )
         {
           int         iRefList    = iIter % 2;
@@ -2177,19 +2547,18 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             {
               iRefList = 0;
             }
-            if( gbiIdx != GBI_DEFAULT )
+            if( bcwIdx != BCW_DEFAULT )
             {
-              iRefList = ( abs( getGbiWeight(gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight(gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 );
+              iRefList = ( abs( getBcwWeight(bcwIdx, REF_PIC_LIST_0 ) ) > abs( getBcwWeight(bcwIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 );
             }
           }
           else if ( iIter == 0 )
           {
             iRefList = 0;
           }
-          if ( iIter == 0 && !cs.slice->getMvdL1ZeroFlag())
+          if ( iIter == 0 && !cs.picHeader->getMvdL1ZeroFlag())
           {
             pu.mv    [1 - iRefList] = cMv    [1 - iRefList];
-            pu.mv[1 - iRefList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
             pu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList];
 
             PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(cu, pu) );
@@ -2198,7 +2567,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
           RefPicList  eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
 
-          if(cs.slice->getMvdL1ZeroFlag())
+          if(cs.picHeader->getMvdL1ZeroFlag())
           {
             iRefList = 0;
             eRefPicList = REF_PIC_LIST_0;
@@ -2210,14 +2579,14 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           iRefEnd   = cs.slice->getNumRefIdx(eRefPicList)-1;
           for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
           {
-            if( m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT)
+            if( m_pcEncCfg->getUseBcwFast() && (bcwIdx != BCW_DEFAULT)
               && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC())
               && (!pu.cu->imv && pu.cu->slice->getTLayer()>1))
             {
               continue;
             }
             uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
-            uiBitsTemp += ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0);
+            uiBitsTemp += ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
             if ( cs.slice->getNumRefIdx(eRefPicList) > 1 )
             {
               uiBitsTemp += iRefIdxTemp+1;
@@ -2244,14 +2613,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
               uiCostBi            = uiCostTemp;
               uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
-              uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0);
+              uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
               uiBits[2]           = uiBitsTemp;
 
               if(iNumIter!=1)
               {
                 //  Set motion
                 pu.mv    [eRefPicList] = cMvBi    [iRefList];
-                pu.mv[eRefPicList].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
                 pu.refIdx[eRefPicList] = iRefIdxBi[iRefList];
 
                 PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(cu, pu) );
@@ -2262,14 +2630,14 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
           if ( !bChanged )
           {
-            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred)
+            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
             {
               xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]);
-              xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv);
-              if(!cs.slice->getMvdL1ZeroFlag())
+              xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, pu.cu->imv);
+              if(!cs.picHeader->getMvdL1ZeroFlag())
               {
                 xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]);
-                xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[eRefPicList], uiBits[2], uiCostBi, pu.cu->imv);
+                xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, pu.cu->imv);
               }
             }
             break;
@@ -2291,6 +2659,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           int refIdxCur = cs.slice->getSymRefIdx( curRefList );
           int refIdxTar = cs.slice->getSymRefIdx( tarRefList );
 
+          if ( aacAMVPInfo[curRefList][refIdxCur].mvCand[0] == aacAMVPInfo[curRefList][refIdxCur].mvCand[1] )
+            aacAMVPInfo[curRefList][refIdxCur].numCand = 1;
+          if ( aacAMVPInfo[tarRefList][refIdxTar].mvCand[0] == aacAMVPInfo[tarRefList][refIdxTar].mvCand[1] )
+            aacAMVPInfo[tarRefList][refIdxTar].numCand = 1;
+
           MvField cCurMvField, cTarMvField;
           Distortion costStart = std::numeric_limits<Distortion>::max();
           for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ )
@@ -2299,7 +2672,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             {
               cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur );
               cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar );
-              Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, gbiIdx );
+              Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, bcwIdx );
               if ( cost < costStart )
               {
                 costStart = cost;
@@ -2314,17 +2687,52 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           cTarMvField.mv = cMvPredSym[tarRefList];
 
           m_pcRdCost->setCostScale(0);
-          m_pcRdCost->setPredictor(cMvPredSym[curRefList]);
-          uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(cCurMvField.mv.hor, cCurMvField.mv.ver, (pu.cu->imv << 1));
+          Mv pred = cMvPredSym[curRefList];
+          pred.changeTransPrecInternal2Amvr(pu.cu->imv);
+          m_pcRdCost->setPredictor(pred);
+          Mv mv = cCurMvField.mv;
+          mv.changeTransPrecInternal2Amvr(pu.cu->imv);
+          uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
           bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS];
           bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS];
           costStart += m_pcRdCost->getCost(bits);
 
           std::vector<Mv> symmvdCands;
-          symmvdCands.push_back(cMvTemp[curRefList][refIdxCur]);
-          if (iRefIdxBi[curRefList] == refIdxCur && cMvBi[curRefList] != cMvTemp[curRefList][refIdxCur])
+          auto smmvdCandsGen = [&](Mv mvCand, bool mvPrecAdj)
           {
-            symmvdCands.push_back(cMvBi[curRefList]);
+            if (mvPrecAdj && pu.cu->imv)
+            {
+              mvCand.roundTransPrecInternal2Amvr(pu.cu->imv);
+            }
+
+            bool toAddMvCand = true;
+            for (std::vector<Mv>::iterator pos = symmvdCands.begin(); pos != symmvdCands.end(); pos++)
+            {
+              if (*pos == mvCand)
+              {
+                toAddMvCand = false;
+                break;
+              }
+            }
+
+            if (toAddMvCand)
+            {
+              symmvdCands.push_back(mvCand);
+            }
+          };
+
+          smmvdCandsGen(cMvHevcTemp[curRefList][refIdxCur], false);
+          smmvdCandsGen(cMvTemp[curRefList][refIdxCur], false);
+          if (iRefIdxBi[curRefList] == refIdxCur)
+          {
+            smmvdCandsGen(cMvBi[curRefList], false);
+          }
+          for (int i = 0; i < m_uniMvListSize; i++)
+          {
+            if ( symmvdCands.size() >= 5 )
+              break;
+            BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+            smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true);
           }
 
           for (auto mvStart : symmvdCands)
@@ -2335,10 +2743,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
               checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]);
             }
             if (checked)
-              break;
+            {
+              continue;
+            }
 
             Distortion bestCost = costStart;
-            symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, costStart);
+            symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, mvpIdxSym, costStart);
             if (costStart < bestCost)
             {
               cCurMvField.setMvField(mvStart, refIdxCur);
@@ -2351,18 +2761,18 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           symCost = costStart - mvpCost;
 
           // ME
-          xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, gbiIdx );
+          xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, bcwIdx );
 
           symCost += mvpCost;
 
           if (startPtMv != cCurMvField.mv)
           { // if ME change MV, run a final check for best MVP.
-            symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, gbiIdx, cMvPredSym, mvpIdxSym, symCost, true);
+            symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, mvpIdxSym, symCost, true);
           }
 
           bits = uiMbBits[2];
           bits += 1; // add one bit for #symmetrical MVD mode
-          bits += ((cs.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0);
+          bits += ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
           symCost += m_pcRdCost->getCost(bits);
           cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
 
@@ -2411,20 +2821,20 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
     iRefIdx[1] = refIdxValidList1;
     uiBits [1] = bitsValidList1;
     uiCost [1] = costValidList1;
-	if (cu.cs->pps->getWPBiPred() == true && tryBipred && (gbiIdx != GBI_DEFAULT))
-	{
-		CHECK(iRefIdxBi[0]<0, "Invalid picture reference index");
-		CHECK(iRefIdxBi[1]<0, "Invalid picture reference index");
-		cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0);
-		cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1);	
-		if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag
-			|| wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag))
-		{
-			uiCostBi = MAX_UINT;
-			enforceGBiPred = false;
-		}
-	}
-    if( enforceGBiPred )
+    if (cu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT))
+    {
+      CHECK(iRefIdxBi[0]<0, "Invalid picture reference index");
+      CHECK(iRefIdxBi[1]<0, "Invalid picture reference index");
+      cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0);
+      cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1);
+      if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag
+        || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag))
+      {
+        uiCostBi = MAX_UINT;
+        enforceBcwPred = false;
+      }
+    }
+    if( enforceBcwPred )
     {
       uiCost[0] = uiCost[1] = MAX_UINT;
     }
@@ -2435,8 +2845,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         uiLastMode = 2;
         pu.mv    [REF_PIC_LIST_0] = cMvBi[0];
         pu.mv    [REF_PIC_LIST_1] = cMvBi[1];
-        pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-        pu.mv[REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
         pu.mvd   [REF_PIC_LIST_0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
         pu.mvd   [REF_PIC_LIST_1] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
         pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
@@ -2453,7 +2861,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       {
         uiLastMode = 0;
         pu.mv    [REF_PIC_LIST_0] = cMv[0];
-        pu.mv    [REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
         pu.mvd   [REF_PIC_LIST_0] = cMv[0] - cMvPred[0][iRefIdx[0]];
         pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
         pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
@@ -2464,7 +2871,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       {
         uiLastMode = 1;
         pu.mv    [REF_PIC_LIST_1] = cMv[1];
-        pu.mv    [REF_PIC_LIST_1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
         pu.mvd   [REF_PIC_LIST_1] = cMv[1] - cMvPred[1][iRefIdx[1]];
         pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
         pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
@@ -2472,16 +2878,16 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         pu.interDir = 2;
       }
 
-      if( gbiIdx != GBI_DEFAULT )
+      if( bcwIdx != BCW_DEFAULT )
       {
-        cu.GBiIdx = GBI_DEFAULT; // Reset to default for the Non-NormalMC modes.
+        cu.BcwIdx = BCW_DEFAULT; // Reset to default for the Non-NormalMC modes.
       }
 
     uiHevcCost = ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) ? uiCostBi : ( ( uiCost[0] <= uiCost[1] ) ? uiCost[0] : uiCost[1] );
     }
     if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine()
       && checkAffine
-      && (gbiIdx == GBI_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseGBiFast())
+      && (bcwIdx == BCW_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseBcwFast())
       )
     {
       m_hevcCost = uiHevcCost;
@@ -2509,12 +2915,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       Mv acMvAffine4Para[2][33][3];
       int refIdx4Para[2] = { -1, -1 };
 
-      xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred,
-        ((cu.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0));
+      xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, bcwIdx, enforceBcwPred,
+        ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0));
 
       if ( pu.cu->imv == 0 )
       {
-        storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, gbiIdx );
+        storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, bcwIdx );
       }
 
       if ( cu.slice->getSPS()->getUseAffineType() )
@@ -2549,12 +2955,12 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
           Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max();
           cu.affineType = AFFINEMODEL_6PARAM;
-          xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, gbiIdx, enforceGBiPred,
-            ((cu.slice->getSPS()->getUseGBi() == true) ? getWeightIdxBits(gbiIdx) : 0));
+          xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, bcwIdx, enforceBcwPred,
+            ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0));
 
           if ( pu.cu->imv == 0 )
           {
-            storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, gbiIdx );
+            storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, bcwIdx );
           }
 
           // reset to 4 parameter affine inter mode
@@ -2575,12 +2981,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
               pu.mvdAffi[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx];
             }
 
-            PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0
-              , false
-            );
-            PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1
-              , false
-            );
+            PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0);
+            PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1);
           }
           else
           {
@@ -2603,6 +3005,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         // set hevc me result
         cu.affine = false;
         pu.mergeFlag = bMergeFlag;
+        pu.regularMergeFlag = false;
         pu.mergeIdx = uiMRGIndex;
         pu.interDir = uiInterDir;
         cu.smvdMode = iSymMode;
@@ -2627,9 +3030,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
     if( cu.firstPU->interDir == 3 && !cu.firstPU->mergeFlag )
     {
-      if (gbiIdx != GBI_DEFAULT)
+      if (bcwIdx != BCW_DEFAULT)
       {
-        cu.GBiIdx = gbiIdx;
+        cu.BcwIdx = bcwIdx;
       }
     }
     m_maxCompIDToPred = MAX_NUM_COMPONENT;
@@ -2638,9 +3041,11 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       PU::spanMotionInfo( pu, mergeCtx );
     }
 
+    m_skipPROF = false;
+    m_encOnly = false;
     //  MC
     PelUnitBuf predBuf = pu.cs->getPredBuf(pu);
-    if ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail )
+    if ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail )
     {
       m_affineMotion.hevcCost[pu.cu->imv] = uiHevcCost;
     }
@@ -2653,33 +3058,21 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   return;
 }
 
-uint32_t InterSearch::xCalcAffineMVBits( PredictionUnit& pu, Mv acMvTemp[3], Mv acMvPred[3], bool mvHighPrec )
+uint32_t InterSearch::xCalcAffineMVBits( PredictionUnit& pu, Mv acMvTemp[3], Mv acMvPred[3] )
 {
   int mvNum  = pu.cu->affineType ? 3 : 2;
-  Mv tempMv0 = acMvTemp[0];
-  const int shift = mvHighPrec ? MV_FRACTIONAL_BITS_DIFF : 0;
-  const unsigned int mvdShift = pu.cu->imv == 2 ? MV_FRACTIONAL_BITS_DIFF : 0;
-  Mv secondPred;
-
-  if ( mvHighPrec )
-  {
-    tempMv0.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
-  }
-
   m_pcRdCost->setCostScale( 0 );
   uint32_t bitsTemp = 0;
 
   for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
   {
-    m_pcRdCost->setPredictor( acMvPred[verIdx] );
-
-    if ( verIdx != 0 )
-    {
-      secondPred = acMvPred[verIdx] + ( tempMv0 - acMvPred[0] );
-      m_pcRdCost->setPredictor( secondPred );
-    }
+    Mv pred = verIdx == 0 ? acMvPred[verIdx] : acMvPred[verIdx] + acMvTemp[0] - acMvPred[0];
+    pred.changeAffinePrecInternal2Amvr(pu.cu->imv);
+    m_pcRdCost->setPredictor( pred );
+    Mv mv = acMvTemp[verIdx];
+    mv.changeAffinePrecInternal2Amvr(pu.cu->imv);
 
-    bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( acMvTemp[verIdx].getHor() >> shift, acMvTemp[verIdx].getVer() >> shift, mvdShift );
+    bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 );
   }
 
   return bitsTemp;
@@ -2774,11 +3167,10 @@ void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
 
 void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv )
 {
-  if( imv > 0 )
+  if ( imv > 0 && imv < 3 )
   {
     return;
   }
-  unsigned imvshift = imv << 1;
 
   AMVPInfo* pcAMVPInfo = &amvpInfo;
 
@@ -2793,8 +3185,12 @@ void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred,
 
   int iBestMVPIdx = riMVPIdx;
 
-  m_pcRdCost->setPredictor( rcMvPred );
-  int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift);
+  Mv pred = rcMvPred;
+  pred.changeTransPrecInternal2Amvr(imv);
+  m_pcRdCost->setPredictor( pred );
+  Mv mv = cMv;
+  mv.changeTransPrecInternal2Amvr(imv);
+  int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0);
   iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
   int iBestMvBits = iOrgMvBits;
 
@@ -2805,8 +3201,10 @@ void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred,
       continue;
     }
 
-    m_pcRdCost->setPredictor( pcAMVPInfo->mvCand[iMVPIdx] );
-    int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), imvshift);
+    pred = pcAMVPInfo->mvCand[iMVPIdx];
+    pred.changeTransPrecInternal2Amvr(imv);
+    m_pcRdCost->setPredictor( pred );
+    int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0);
     iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
 
     if (iMvBits < iBestMvBits)
@@ -2841,12 +3239,7 @@ Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu,
   Distortion uiCost = std::numeric_limits<Distortion>::max();
 
   const Picture* picRef = pu.cu->slice->getRefPic( eRefPicList, iRefIdx );
-  cMvCand.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  clipMv( cMvCand, pu.cu->lumaPos(),
-          pu.cu->lumaSize(),
-          *pu.cs->sps );
-
-
+  clipMv( cMvCand, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
   // prediction pattern
   const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE;
 
@@ -2879,12 +3272,7 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
   const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE;
   Mv mv[3];
   memcpy(mv, acMvCand, sizeof(mv));
-  if ( pu.cu->imv != 1 )
-  {
-    mv[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    mv[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    mv[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  }
+  m_iRefListIdx = eRefPicList;
   xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y));
   if( bi )
   {
@@ -2892,9 +3280,9 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
   }
 
   // calc distortion
-
+  enum DFunc distFunc = (pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD;
   uiCost  = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y
-    , DF_HAD
+    , distFunc
   );
   uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
   DTRACE( g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCost );
@@ -2903,7 +3291,7 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
 
 void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
 {
-  if( pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) )
+  if( pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) )
   {
     return;
   }
@@ -2925,11 +3313,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
     PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu ));
     origBufTmp.copyFrom(origBuf);
     origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()
-                              ,getGbiWeight( pu.cu->GBiIdx, eRefPicList )
+                              ,getBcwWeight( pu.cu->BcwIdx, eRefPicList )
                               );
     pBuf = &origBufTmp;
 
-    fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList );
+    fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eRefPicList );
   }
   m_cDistParam.isBiPred = bBi;
 
@@ -2939,13 +3327,14 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
 
   m_lumaClpRng = pu.cs->slice->clpRng( COMPONENT_Y );
 
-  CPelBuf buf = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred)->getRecoBuf(pu.blocks[COMPONENT_Y]);
+  CPelBuf buf = pu.cu->slice->getRefPic(eRefPicList, iRefIdxPred)->getRecoBuf(pu.blocks[COMPONENT_Y], pu.cs->sps->getWrapAroundEnabledFlag());
 
   IntTZSearchStruct cStruct;
   cStruct.pcPatternKey  = pcPatternKey;
   cStruct.iRefStride    = buf.stride;
   cStruct.piRefY        = buf.buf;
-  cStruct.imvShift      = pu.cu->imv << 1;
+  cStruct.imvShift = pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1);
+  cStruct.useAltHpelIf = pu.cu->imv == IMV_HPEL;
   cStruct.inCtuSearch = false;
   cStruct.zeroMV = false;
   {
@@ -2966,12 +3355,13 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
     if( bValid )
     {
       bQTBTMV2 = true;
-      cIntMv <<= 2;
+      cIntMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL);
     }
   }
 
-
-  m_pcRdCost->setPredictor( rcMvPred );
+  Mv predQuarter = rcMvPred;
+  predQuarter.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+  m_pcRdCost->setPredictor( predQuarter );
 
   m_pcRdCost->setCostScale(2);
 
@@ -2984,13 +3374,54 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
   //  Do integer search
   if( ( m_motionEstimationSearchMethod == MESEARCH_FULL ) || bBi || bQTBTMV )
   {
+    cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0;
+    m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode);
+
+    Mv bestInitMv = (bBi ? rcMv : rcMvPred);
+    Mv cTmpMv = bestInitMv;
+    clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
+    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
+    Distortion uiBestSad = m_cDistParam.distFunc(m_cDistParam);
+    uiBestSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
+
+    for (int i = 0; i < m_uniMvListSize; i++)
+    {
+      BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+
+      int j = 0;
+      for (; j < i; j++)
+      {
+        BlkUniMvInfo *prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+        if (curMvInfo->uniMvs[eRefPicList][iRefIdxPred] == prevMvInfo->uniMvs[eRefPicList][iRefIdxPred])
+        {
+          break;
+        }
+      }
+      if (j < i)
+        continue;
+
+      cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
+      clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+      cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
+      m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
+
+      Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
+      uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
+      if (uiSad < uiBestSad)
+      {
+        uiBestSad = uiSad;
+        bestInitMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
+        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
+      }
+    }
+
     if( !bQTBTMV )
     {
-      xSetSearchRange(pu, (bBi ? rcMv : rcMvPred), iSrchRng, cStruct.searchRange
+      xSetSearchRange(pu, bestInitMv, iSrchRng, cStruct.searchRange
         , cStruct
       );
     }
-    cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0;
     xPatternSearch( cStruct, rcMv, ruiCost);
   }
   else if( bQTBTMV2 )
@@ -2999,7 +3430,7 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
 
     cStruct.subShiftMode = ( !m_pcEncCfg->getRestrictMESampling() && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE ) ? 1 :
                             ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0;
-    xTZSearch( pu, cStruct, rcMv, ruiCost, NULL, false, true );
+    xTZSearch(pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiCost, NULL, false, true);
   }
   else
   {
@@ -3007,7 +3438,7 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
                             ( m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ) ? 2 : 0;
     rcMv = rcMvPred;
     const Mv *pIntegerMv2Nx2NPred = 0;
-    xPatternSearchFast( pu, cStruct, rcMv, ruiCost, pIntegerMv2Nx2NPred );
+    xPatternSearchFast(pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiCost, pIntegerMv2Nx2NPred);
     if( blkCache )
     {
       blkCache->setMv( pu.cs->area, eRefPicList, iRefIdxPred, rcMv );
@@ -3020,7 +3451,7 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
 
   DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), pu.cu->slice->getPOC(), 0, ( int ) eRefPicList, ( int ) bBi, pu.Y().x, pu.Y().y, pu.Y().width, pu.Y().height, ruiCost );
   // sub-pel refinement for sub-pel resolution
-  if( pu.cu->imv == 0 )
+  if ( pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL )
   {
     if( m_pcEncCfg->getMCTSEncConstraint() )
     {
@@ -3043,9 +3474,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
     uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.getHor(), rcMv.getVer(), cStruct.imvShift );
     ruiBits += uiMvBits;
     ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) );
+    rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
   }
   else // integer refinement for integer-pel and 4-pel resolution
   {
+    rcMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
     xPatternSearchIntRefine( pu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight);
   }
   DTRACE(g_trace_ctx, D_ME, "   MECost<L%d,%d>: %6d (%d)  MV:%d,%d\n", (int)eRefPicList, (int)bBi, ruiCost, ruiBits, rcMv.getHor() << 2, rcMv.getVer() << 2);
@@ -3062,10 +3495,7 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
 {
   const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
   Mv cFPMvPred = cMvPred;
-  cFPMvPred.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  clipMv( cFPMvPred, pu.cu->lumaPos(),
-          pu.cu->lumaSize(),
-          *pu.cs->sps );
+  clipMv( cFPMvPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
 
   Mv mvTL(cFPMvPred.getHor() - (iSrchRng << iMvShift), cFPMvPred.getVer() - (iSrchRng << iMvShift));
   Mv mvBR(cFPMvPred.getHor() + (iSrchRng << iMvShift), cFPMvPred.getVer() + (iSrchRng << iMvShift));
@@ -3079,10 +3509,14 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
   {
     xClipMv( mvTL, pu.cu->lumaPos(),
             pu.cu->lumaSize(),
-            *pu.cs->sps );
+            *pu.cs->sps
+          , *pu.cs->pps
+    );
     xClipMv( mvBR, pu.cu->lumaPos(),
             pu.cu->lumaSize(),
-            *pu.cs->sps );
+            *pu.cs->sps
+          , *pu.cs->pps
+    );
   }
 
   mvTL.divideByPowerOf2( iMvShift );
@@ -3168,6 +3602,8 @@ void InterSearch::xPatternSearch( IntTZSearchStruct&    cStruct,
 
 
 void InterSearch::xPatternSearchFast( const PredictionUnit& pu,
+                                      RefPicList            eRefPicList,
+                                      int                   iRefIdxPred,
                                       IntTZSearchStruct&    cStruct,
                                       Mv&                   rcMv,
                                       Distortion&           ruiSAD,
@@ -3176,15 +3612,15 @@ void InterSearch::xPatternSearchFast( const PredictionUnit& pu,
   switch ( m_motionEstimationSearchMethod )
   {
   case MESEARCH_DIAMOND:
-    xTZSearch         ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false );
+    xTZSearch         ( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false );
     break;
 
   case MESEARCH_SELECTIVE:
-    xTZSearchSelective( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred );
+    xTZSearchSelective( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred );
     break;
 
   case MESEARCH_DIAMOND_ENHANCED:
-    xTZSearch         ( pu, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true );
+    xTZSearch         ( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true );
     break;
 
   case MESEARCH_FULL: // shouldn't get here.
@@ -3195,6 +3631,8 @@ void InterSearch::xPatternSearchFast( const PredictionUnit& pu,
 
 
 void InterSearch::xTZSearch( const PredictionUnit& pu,
+                             RefPicList            eRefPicList,
+                             int                   iRefIdxPred,
                              IntTZSearchStruct&    cStruct,
                              Mv&                   rcMv,
                              Distortion&           ruiSAD,
@@ -3226,15 +3664,14 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
   const bool bNewZeroNeighbourhoodTest               = bExtendedSettings;
 
   int iSearchRange = m_iSearchRange;
-  rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
   if( m_pcEncCfg->getMCTSEncConstraint() )
   {
     MCTSHelper::clipMvToArea( rcMv, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps );
   }
   else
-  clipMv( rcMv, pu.cu->lumaPos(),
-          pu.cu->lumaSize(),
-          *pu.cs->sps );
+  {
+    clipMv( rcMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+  }
   rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
   rcMv.divideByPowerOf2(2);
 
@@ -3273,9 +3710,9 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
       MCTSHelper::clipMvToArea( integerMv2Nx2NPred, pu.Y(), pu.cs->picture->mctsInfo.getTileArea(), *pu.cs->sps );
     }
     else
-    clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(),
-            pu.cu->lumaSize(),
-            *pu.cs->sps );
+    {
+      clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    }
     integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
     integerMv2Nx2NPred.divideByPowerOf2(2);
 
@@ -3286,26 +3723,56 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
       xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
     }
   }
+
+  for (int i = 0; i < m_uniMvListSize; i++)
+  {
+    BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+
+    int j = 0;
+    for (; j < i; j++)
+    {
+      BlkUniMvInfo *prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+      if (curMvInfo->uniMvs[eRefPicList][iRefIdxPred] == prevMvInfo->uniMvs[eRefPicList][iRefIdxPred])
+      {
+        break;
+      }
+    }
+    if (j < i)
+      continue;
+
+    Mv cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
+    clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
+    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
+
+    Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
+    uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
+    if (uiSad < cStruct.uiBestSad)
+    {
+      cStruct.uiBestSad = uiSad;
+      cStruct.iBestX = cTmpMv.hor;
+      cStruct.iBestY = cTmpMv.ver;
+      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
+    }
+  }
+
   {
     // set search range
     Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
-    currBestMv <<= 2;
+    currBestMv <<= MV_FRACTIONAL_BITS_INTERNAL;
     xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr
       , cStruct
     );
   }
-  if (m_pcEncCfg->getUseHashME())
+  if (m_pcEncCfg->getUseHashME() && (m_currRefPicList == 0 || pu.cu->slice->getList1IdxToList0Idx(m_currRefPicIndex) < 0))
   {
-    int width = pu.cu->lumaSize().width;
-    int height = pu.cu->lumaSize().height;
-    if ((width == height && width <= 64 && width >= 4) || (width == 8 && height == 4) || (width == 4 && height == 8))
+    int minSize = min(pu.cu->lumaSize().width, pu.cu->lumaSize().height);
+    if (minSize < 128 && minSize >= 4)
     {
-      Mv otherMvps[5];
-      int numberOfOtherMvps;
-      numberOfOtherMvps = xHashInterPredME(pu, m_currRefPicList, m_currRefPicIndex, otherMvps);
+      int numberOfOtherMvps = m_numHashMVStoreds[m_currRefPicList][m_currRefPicIndex];
       for (int i = 0; i < numberOfOtherMvps; i++)
       {
-        xTZSearchHelp(cStruct, otherMvps[i].getHor(), otherMvps[i].getVer(), 0, 0);
+        xTZSearchHelp(cStruct, m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getHor(), m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getVer(), 0, 0);
       }
       if (numberOfOtherMvps > 0)
       {
@@ -3505,6 +3972,8 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
 
 
 void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
+                                      RefPicList            eRefPicList,
+                                      int                   iRefIdxPred,
                                       IntTZSearchStruct&    cStruct,
                                       Mv                    &rcMv,
                                       Distortion            &ruiSAD,
@@ -3525,10 +3994,7 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
   int   iStartX                 = 0;
   int   iStartY                 = 0;
   int   iDist                   = 0;
-  rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  clipMv( rcMv, pu.cu->lumaPos(),
-          pu.cu->lumaSize(),
-          *pu.cs->sps );
+  clipMv( rcMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
   rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
   rcMv.divideByPowerOf2(2);
 
@@ -3556,15 +4022,46 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
   {
     Mv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
     integerMv2Nx2NPred.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
-    clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(),
-            pu.cu->lumaSize(),
-            *pu.cs->sps );
+    clipMv( integerMv2Nx2NPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
     integerMv2Nx2NPred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
     integerMv2Nx2NPred.divideByPowerOf2(2);
 
     xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
 
   }
+
+  for (int i = 0; i < m_uniMvListSize; i++)
+  {
+    BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+
+    int j = 0;
+    for (; j < i; j++)
+    {
+      BlkUniMvInfo *prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+      if (curMvInfo->uniMvs[eRefPicList][iRefIdxPred] == prevMvInfo->uniMvs[eRefPicList][iRefIdxPred])
+      {
+        break;
+      }
+    }
+    if (j < i)
+      continue;
+
+    Mv cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
+    clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
+    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
+
+    Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
+    uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
+    if (uiSad < cStruct.uiBestSad)
+    {
+      cStruct.uiBestSad = uiSad;
+      cStruct.iBestX = cTmpMv.hor;
+      cStruct.iBestY = cTmpMv.ver;
+      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
+    }
+  }
+
   {
     // set search range
     Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
@@ -3573,21 +4070,16 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
       , cStruct
     );
   }
-
-  if (m_pcEncCfg->getUseHashME())
+  if (m_pcEncCfg->getUseHashME() && (m_currRefPicList == 0 || pu.cu->slice->getList1IdxToList0Idx(m_currRefPicIndex) < 0))
   {
-    int width = pu.cu->lumaSize().width;
-    int height = pu.cu->lumaSize().height;
-    if ((width == height && width <= 64 && width >= 4) || (width == 8 && height == 4) || (width == 4 && height == 8))
+    int minSize = min(pu.cu->lumaSize().width, pu.cu->lumaSize().height);
+    if (minSize < 128 && minSize >= 4)
     {
-      Mv otherMvps[5];
-      int numberOfOtherMvps;
-      numberOfOtherMvps = xHashInterPredME(pu, m_currRefPicList, m_currRefPicIndex, otherMvps);
+      int numberOfOtherMvps = m_numHashMVStoreds[m_currRefPicList][m_currRefPicIndex];
       for (int i = 0; i < numberOfOtherMvps; i++)
       {
-        xTZSearchHelp(cStruct, otherMvps[i].getHor(), otherMvps[i].getVer(), 0, 0);
+        xTZSearchHelp(cStruct, m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getHor(), m_hashMVStoreds[m_currRefPicList][m_currRefPicIndex][i].getVer(), 0, 0);
       }
-
       if (numberOfOtherMvps > 0)
       {
         // write out best match
@@ -3676,15 +4168,12 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
 void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight)
 {
 
-  CHECK( pu.cu->imv == 0,                       "xPatternSearchIntRefine(): IMV not used.");
+  CHECK( pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL , "xPatternSearchIntRefine(): Sub-pel MV used.");
   CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
 
   const SPS &sps = *pu.cs->sps;
-  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass );
+  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cs->slice->getDisableSATDForRD());
 
-  // input MV rcMV has integer resolution
-  // -> shift it to QPEL
-  rcMv <<= 2;
   // -> set MV scale for cost calculation to QPEL (0)
   m_pcRdCost->setCostScale ( 0 );
 
@@ -3697,7 +4186,7 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
   Mv cBaseMvd[2];
   int iBestBits = 0;
   int iBestMVPIdx = riMVPIdx;
-  int testPos[9][2] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} };
+  Mv testPos[9] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} };
 
 
   cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]);
@@ -3705,10 +4194,8 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
   CHECK( (cBaseMvd[0].getHor() & 0x03) != 0 || (cBaseMvd[0].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue.");
   CHECK( (cBaseMvd[1].getHor() & 0x03) != 0 || (cBaseMvd[1].getVer() & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue.");
 
-  cBaseMvd[0].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv);
-  cBaseMvd[1].roundToAmvrSignalPrecision(MV_PRECISION_QUARTER, pu.cu->imv);
-
-  int mvOffset = 1 << cStruct.imvShift;
+  cBaseMvd[0].roundTransPrecInternal2Amvr(pu.cu->imv);
+  cBaseMvd[1].roundTransPrecInternal2Amvr(pu.cu->imv);
 
   // test best integer position and all 8 neighboring positions
   for (int pos = 0; pos < 9; pos ++)
@@ -3717,7 +4204,8 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
     // test both AMVP candidates for each position
     for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++)
     {
-      cTestMv[iMVPIdx].set(testPos[pos][0]*mvOffset, testPos[pos][1]*mvOffset);
+      cTestMv[iMVPIdx] = testPos[pos];
+      cTestMv[iMVPIdx].changeTransPrecAmvr2Internal(pu.cu->imv);
       cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx];
       cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx];
 
@@ -3725,9 +4213,7 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
       if( m_pcEncCfg->getMCTSEncConstraint() )
       {
         Mv cTestMVRestr = cTestMv[iMVPIdx];
-        cTestMVRestr.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
         MCTSHelper::clipMvToArea( cTestMVRestr, pu.cu->Y(), pu.cs->picture->mctsInfo.getTileAreaIntPelRestricted( pu ), *pu.cs->sps );
-        cTestMVRestr.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
 
         if( cTestMVRestr != cTestMv[iMVPIdx] )
         {
@@ -3740,13 +4226,9 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
         Mv cTempMV = cTestMv[iMVPIdx];
         if( !m_pcEncCfg->getMCTSEncConstraint() )
         {
-        cTempMV.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-        clipMv(cTempMV, pu.cu->lumaPos(),
-               pu.cu->lumaSize(),
-               sps);
-        cTempMV.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+          clipMv( cTempMV, pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps );
         }
-        m_cDistParam.cur.buf = cStruct.piRefY  + cStruct.iRefStride * (cTempMV.getVer() >>  2) + (cTempMV.getHor() >> 2);
+        m_cDistParam.cur.buf = cStruct.piRefY  + cStruct.iRefStride * (cTempMV.getVer() >>  MV_FRACTIONAL_BITS_INTERNAL) + (cTempMV.getHor() >> MV_FRACTIONAL_BITS_INTERNAL);
         uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight);
       }
       else
@@ -3755,9 +4237,13 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
       }
 
       int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
-      m_pcRdCost->setPredictor( amvpInfo.mvCand[iMVPIdx] );
-      iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift );
-      uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cTestMv[iMVPIdx].getHor(), cTestMv[iMVPIdx].getVer(), cStruct.imvShift );
+      Mv pred = amvpInfo.mvCand[iMVPIdx];
+      pred.changeTransPrecInternal2Amvr(pu.cu->imv);
+      m_pcRdCost->setPredictor( pred );
+      Mv mv = cTestMv[iMVPIdx];
+      mv.changeTransPrecInternal2Amvr(pu.cu->imv);
+      iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 );
+      uiDist += m_pcRdCost->getCost(iMvBits);
 
       if (uiDist < uiBestDist)
       {
@@ -3787,7 +4273,6 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
   ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits);
   // taken from JEM 5.0
   // verify since it makes no sense to add rate for MVDs twicce
-  ruiBits += m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), cStruct.imvShift);
 
   return;
 }
@@ -3803,7 +4288,6 @@ void InterSearch::xPatternSearchFracDIF(
   Distortion&           ruiCost
 )
 {
-  const bool bIsLosslessCoded = pu.cu->transQuantBypass;
 
   //  Reference pattern initialization (integer scale)
   int         iOffset    = rcMvInt.getHor() + rcMvInt.getVer() * cStruct.iRefStride;
@@ -3813,16 +4297,16 @@ void InterSearch::xPatternSearchFracDIF(
     Mv baseRefMv(0, 0);
     rcMvHalf.setZero();
     m_pcRdCost->setCostScale(0);
-    xExtDIFUpSamplingH(&cPatternRoi);
+    xExtDIFUpSamplingH(&cPatternRoi, cStruct.useAltHpelIf);
     rcMvQter = rcMvInt;   rcMvQter <<= 2;    // for mv-cost
-    ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded);
+    ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !pu.cs->slice->getDisableSATDForRD());
     return;
   }
 
 
-  if (cStruct.imvShift || (m_useCompositeRef && cStruct.zeroMV))
+  if (cStruct.imvShift > IMV_FPEL || (m_useCompositeRef && cStruct.zeroMV))
   {
-    m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded );
+    m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cs->slice->getDisableSATDForRD());
     ruiCost = m_cDistParam.distFunc( m_cDistParam );
     ruiCost += m_pcRdCost->getCostOfVectorWithPredictor( rcMvInt.getHor(), rcMvInt.getVer(), cStruct.imvShift );
     return;
@@ -3830,13 +4314,15 @@ void InterSearch::xPatternSearchFracDIF(
 
   //  Half-pel refinement
   m_pcRdCost->setCostScale(1);
-  xExtDIFUpSamplingH ( &cPatternRoi );
+  xExtDIFUpSamplingH(&cPatternRoi, cStruct.useAltHpelIf);
 
   rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
   Mv baseRefMv(0, 0);
-  ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded);
+  ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, (!pu.cs->slice->getDisableSATDForRD()));
 
   //  quarter-pel refinement
+  if (cStruct.imvShift == IMV_OFF)
+  {
   m_pcRdCost->setCostScale( 0 );
   xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf );
   baseRefMv = rcMvHalf;
@@ -3844,10 +4330,11 @@ void InterSearch::xPatternSearchFracDIF(
 
   rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
   rcMvQter += rcMvHalf;  rcMvQter <<= 1;
-  ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded );
+  ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!pu.cs->slice->getDisableSATDForRD()));
+  }
 }
 
-Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int gbiIdx )
+Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int bcwIdx )
 {
   Distortion cost = std::numeric_limits<Distortion>::max();
   RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList);
@@ -3856,32 +4343,51 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB
   PelUnitBuf predBufA = m_tmpPredStorage[eCurRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
   const Picture* picRefA = pu.cu->slice->getRefPic( eCurRefPicList, cCurMvField.refIdx );
   Mv mvA = cCurMvField.mv;
-  mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps );
-  xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
+  clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+  if ( (mvA.hor & 15) == 0 && (mvA.ver & 15) == 0 )
+  {
+    Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvA.getHor() >> 4, mvA.getVer() >> 4 );
+    CPelBuf pelBufA = picRefA->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false );
+    predBufA.bufs[0].buf = const_cast<Pel *>(pelBufA.buf);
+    predBufA.bufs[0].stride = pelBufA.stride;
+    predBufA.bufs[0].width = pelBufA.width;
+    predBufA.bufs[0].height = pelBufA.height;
+  }
+  else
+  {
+    xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
+  }
 
   // get prediction of eTarRefPicList
   PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
   const Picture* picRefB = pu.cu->slice->getRefPic( eTarRefPicList, cTarMvField.refIdx );
   Mv mvB = cTarMvField.mv;
-  mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps );
-  xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
+  clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+  if ( (mvB.hor & 15) == 0 && (mvB.ver & 15) == 0 )
+  {
+    Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvB.getHor() >> 4, mvB.getVer() >> 4 );
+    CPelBuf pelBufB = picRefB->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false );
+    predBufB.bufs[0].buf = const_cast<Pel *>(pelBufB.buf);
+    predBufB.bufs[0].stride = pelBufB.stride;
+  }
+  else
+  {
+    xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
+  }
 
   PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
-  if (gbiIdx != GBI_DEFAULT)
-    bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx);
-  else
-    bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) );
+  bufTmp.copyFrom( origBuf );
+  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs(), getBcwWeight( pu.cu->BcwIdx, eTarRefPicList ) );
+  double fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eTarRefPicList );
 
   // calc distortion
-  cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
-
+  DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD;
+  cost = (Distortion)floor( fWeight * (double)m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, distFunc ) );
   return(cost);
 }
 
 Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred
-  , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int gbiIdx )
+  , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int bcwIdx )
 {
   const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 ,  0 ) };
   const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 ,  1 ) , Mv( 1 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) };
@@ -3949,9 +4455,13 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf&
           continue; // Skip this this pos
       }
       // get MVD cost
-      m_pcRdCost->setPredictor( rcMvCurPred );
+      Mv pred = rcMvCurPred;
+      pred.changeTransPrecInternal2Amvr(pu.cu->imv);
+      m_pcRdCost->setPredictor( pred );
       m_pcRdCost->setCostScale( 0 );
-      uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mvCand.mv.getHor(), mvCand.mv.getVer(), (pu.cu->imv << 1) );
+      Mv mv = mvCand.mv;
+      mv.changeTransPrecInternal2Amvr(pu.cu->imv);
+      uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 );
       Distortion uiCost = m_pcRdCost->getCost( uiMvBits );
 
       // get MVD pair and set target MV
@@ -3962,7 +4472,7 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf&
         if( !( MCTSHelper::checkMvForMCTSConstraint( pu, mvPair.mv ) ) )
           continue; // Skip this this pos
       }
-      uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, gbiIdx );
+      uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, bcwIdx );
       if ( uiCost < uiMinCost )
       {
         uiMinCost = uiCost;
@@ -3989,18 +4499,18 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf&
 }
 
 
-void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx )
+void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx )
 {
   // Refine Search
-  int nSearchStepShift = 0;
+  int nSearchStepShift = MV_FRACTIONAL_BITS_DIFF;
   int nDiamondRound = 8;
   int nCrossRound = 1;
 
-  nSearchStepShift += (pu.cu->imv << 1);
+  nSearchStepShift += pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1);
   nDiamondRound >>= pu.cu->imv;
 
-  ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, gbiIdx );
-  ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, gbiIdx );
+  ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, bcwIdx );
+  ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, bcwIdx );
 }
 
 void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
@@ -4011,9 +4521,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
                                           Mv                    hevcMv[2][33]
                                         , Mv                    mvAffine4Para[2][33][3]
                                         , int                   refIdx4Para[2]
-                                        , uint8_t               gbiIdx
-                                        , bool                  enforceGBiPred
-                                        , uint32_t              gbiIdxBits
+                                        , uint8_t               bcwIdx
+                                        , bool                  enforceBcwPred
+                                        , uint32_t              bcwIdxBits
                                          )
 {
   const Slice &slice = *pu.cu->slice;
@@ -4071,7 +4581,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   uint32_t          bitsValidList1 = MAX_UINT;
   Distortion costValidList1 = std::numeric_limits<Distortion>::max();
   Mv            mvHevc[3];
-  const bool changeToHighPrec  = pu.cu->imv != 1;
   const bool affineAmvrEnabled = pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag();
   int tryBipred = 0;
   WPScalingParam *wp0;
@@ -4080,16 +4589,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
   pu.cu->affine = true;
   pu.mergeFlag = false;
-
-  if( gbiIdx != GBI_DEFAULT )
+  pu.regularMergeFlag = false;
+  if( bcwIdx != BCW_DEFAULT )
   {
-    pu.cu->GBiIdx = gbiIdx;
+    pu.cu->BcwIdx = bcwIdx;
   }
 
   // Uni-directional prediction
   for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
   {
     RefPicList  eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
+    pu.interDir = ( iRefList ? 2 : 1 );
     for (int iRefIdxTemp = 0; iRefIdxTemp < slice.getNumRefIdx(eRefPicList); iRefIdxTemp++)
     {
       // Get RefIdx bits
@@ -4121,14 +4631,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       for ( int i=0; i<3; i++ )
       {
         mvHevc[i] = hevcMv[iRefList][iRefIdxTemp];
-        if ( pu.cu->imv == 1 )
-        {
-          mvHevc[i].changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
-        }
-        else if ( pu.cu->imv == 2 )
-        {
-          mvHevc[i].roundToPrecision( MV_PRECISION_QUARTER, MV_PRECISION_INT );
-        }
+        mvHevc[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
       }
       PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) );
 
@@ -4151,11 +4654,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         for ( int i = 0; i < mvNum; i++ )
         {
           mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i];
-          if ( pu.cu->imv != 1 )
-          {
-            mvFour[i].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
-            mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
-          }
+          mvFour[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
         }
 
         Distortion candCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp );
@@ -4169,7 +4668,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
 
       if (pu.cu->affineType == AFFINEMODEL_4PARAM && m_affMVListSize
-        && (!pu.cu->cs->sps->getUseGBi() || gbiIdx == GBI_DEFAULT)
+        && (!pu.cu->cs->sps->getUseBcw() || bcwIdx == BCW_DEFAULT)
         )
       {
         int shift = MAX_CU_DEPTH;
@@ -4199,11 +4698,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
           int mvScaleHor = nbMv[0].getHor() << shift;
           int mvScaleVer = nbMv[0].getVer() << shift;
           Mv dMv = nbMv[1] - nbMv[0];
-          mvScaleHor <<= MV_FRACTIONAL_BITS_DIFF;
-          mvScaleVer <<= MV_FRACTIONAL_BITS_DIFF;
-          dMv <<= MV_FRACTIONAL_BITS_DIFF;
-          dMvHorX = dMv.getHor() << (shift - g_aucLog2[mvInfo->w]);
-          dMvHorY = dMv.getVer() << (shift - g_aucLog2[mvInfo->w]);
+          dMvHorX = dMv.getHor() << (shift - floorLog2(mvInfo->w));
+          dMvHorY = dMv.getVer() << (shift - floorLog2(mvInfo->w));
           dMvVerX = -dMvHorY;
           dMvVerY = dMvHorX;
           vx = mvScaleHor + dMvHorX * (pu.Y().x - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y);
@@ -4211,29 +4707,16 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
           roundAffineMv(vx, vy, shift);
           mvTmp[0] = Mv(vx, vy);
           mvTmp[0].clipToStorageBitDepth();
-          clipMv(mvTmp[0], pu.cu->lumaPos(),
-                 pu.cu->lumaSize(),
-                 *pu.cs->sps);
-          if ( pu.cu->imv == 2 )
-          {
-            mvTmp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-          }
-          else if ( pu.cu->imv == 0 )
-          mvTmp[0].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+          clipMv( mvTmp[0], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+          mvTmp[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
           vx = mvScaleHor + dMvHorX * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerX * (pu.Y().y - mvInfo->y);
           vy = mvScaleVer + dMvHorY * (pu.Y().x + pu.Y().width - mvInfo->x) + dMvVerY * (pu.Y().y - mvInfo->y);
           roundAffineMv(vx, vy, shift);
           mvTmp[1] = Mv(vx, vy);
           mvTmp[1].clipToStorageBitDepth();
-          clipMv(mvTmp[1], pu.cu->lumaPos(),
-                 pu.cu->lumaSize(),
-                 *pu.cs->sps);
-          if ( pu.cu->imv != 1 )
-          {
-            mvTmp[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
-            mvTmp[0].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
-            mvTmp[1].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
-          }
+          clipMv( mvTmp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+          mvTmp[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
+          mvTmp[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
           Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp);
           if ( affineAmvrEnabled )
           {
@@ -4249,39 +4732,23 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
       {
         Mv mvFour[3];
-        if ( pu.cu->imv != 1 )
-        {
-          mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-          mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-        }
         mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0];
         mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1];
-        if ( pu.cu->imv != 1 )
-        {
-          mvAffine4Para[iRefList][iRefIdxTemp][0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-          mvAffine4Para[iRefList][iRefIdxTemp][1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-        }
+        mvAffine4Para[iRefList][iRefIdxTemp][0].roundAffinePrecInternal2Amvr(pu.cu->imv);
+        mvAffine4Para[iRefList][iRefIdxTemp][1].roundAffinePrecInternal2Amvr(pu.cu->imv);
+
         int shift = MAX_CU_DEPTH;
-        int vx2 = (mvFour[0].getHor() << shift) - ((mvFour[1].getVer() - mvFour[0].getVer()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()]));
-        int vy2 = (mvFour[0].getVer() << shift) + ((mvFour[1].getHor() - mvFour[0].getHor()) << (shift + g_aucLog2[pu.lheight()] - g_aucLog2[pu.lwidth()]));
-        vx2 >>= shift;
-        vy2 >>= shift;
+        int vx2 = (mvFour[0].getHor() << shift) - ((mvFour[1].getVer() - mvFour[0].getVer()) << (shift + floorLog2(pu.lheight()) - floorLog2(pu.lwidth())));
+        int vy2 = (mvFour[0].getVer() << shift) + ((mvFour[1].getHor() - mvFour[0].getHor()) << (shift + floorLog2(pu.lheight()) - floorLog2(pu.lwidth())));
+        int offset = (1 << (shift - 1));
+        vx2 = (vx2 + offset - (vx2 >= 0)) >> shift;
+        vy2 = (vy2 + offset - (vy2 >= 0)) >> shift;
         mvFour[2].hor = vx2;
         mvFour[2].ver = vy2;
         mvFour[2].clipToStorageBitDepth();
-        if ( pu.cu->imv != 1 )
-        {
-          mvFour[0].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
-          mvFour[1].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
-          mvFour[2].roundToPrecision( MV_PRECISION_INTERNAL, pu.cu->imv == 2 ? MV_PRECISION_INT : MV_PRECISION_QUARTER );
-        }
-        for (int i = 0; i < 3; i++)
-        {
-          if ( pu.cu->imv != 1 )
-          {
-            mvFour[i].changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
-          }
-        }
+        mvFour[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
+        mvFour[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
+        mvFour[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
         Distortion uiCandCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp );
         if ( affineAmvrEnabled )
         {
@@ -4307,7 +4774,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
 
       // GPB list 1, save the best MvpIdx, RefIdx and Cost
-      if ( slice.getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist )
+      if ( slice.getPicHeader()->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist )
       {
         bestBiPDist = biPDistTemp;
         bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
@@ -4344,7 +4811,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
                                  , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList]
         );
       }
-      if(pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB())
+      if(pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx == BCW_DEFAULT && pu.cu->slice->isInterB())
       {
         m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType);
         m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType
@@ -4387,7 +4854,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   if ( pu.cu->affineType == AFFINEMODEL_4PARAM )
   {
     ::memcpy( mvAffine4Para, cMvTemp, sizeof( cMvTemp ) );
-    if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getUseGBi() || gbiIdx == GBI_DEFAULT ) )
+    if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getUseBcw() || bcwIdx == BCW_DEFAULT ) )
     {
       AffineMVInfo *affMVInfo = m_affMVList + m_affMVListIdx;
 
@@ -4421,7 +4888,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   // Bi-directional prediction
   if ( slice.isInterB() && !PU::isBipredRestriction(pu) )
   {
-	  tryBipred = 1;
+    tryBipred = 1;
+    pu.interDir = 3;
+    m_isBi = true;
     // Set as best list0 and list1
     iRefIdxBi[0] = iRefIdx[0];
     iRefIdxBi[1] = iRefIdx[1];
@@ -4433,7 +4902,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     uint32_t uiMotBits[2];
     bool doBiPred = true;
 
-    if ( slice.getMvdL1ZeroFlag() ) // GPB, list 1 only use Mvp
+    if ( slice.getPicHeader()->getMvdL1ZeroFlag() ) // GPB, list 1 only use Mvp
     {
       xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1] );
       pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
@@ -4455,9 +4924,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         for( int i = 0; i < mvNum; i++ )
         {
           Mv restrictedMv = pcMvTemp[i];
-          restrictedMv.changePrecision( MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL );
           MCTSHelper::clipMvToArea( restrictedMv, pu.cu->Y(), curTileAreaRestricted, *pu.cs->sps );
-          restrictedMv.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER );
 
           // If sub-pel filter samples are not inside of allowed area
           if( restrictedMv != pcMvTemp[i] )
@@ -4468,9 +4935,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         }
       }
       // Get list1 prediction block
-      PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1
-        , changeToHighPrec
-      );
+      PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
       pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
 
       PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(*pu.cu, pu) );
@@ -4503,7 +4968,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     // 4-times iteration (default)
     int iNumIter = 4;
     // fast encoder setting or GPB: only one iteration
-    if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || slice.getMvdL1ZeroFlag() )
+    if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || slice.getPicHeader()->getMvdL1ZeroFlag() )
     {
       iNumIter = 1;
     }
@@ -4522,9 +4987,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         {
           iRefList = 0;
         }
-        if( gbiIdx != GBI_DEFAULT )
+        if( bcwIdx != BCW_DEFAULT )
         {
-          iRefList = ( abs( getGbiWeight( gbiIdx, REF_PIC_LIST_0 ) ) > abs( getGbiWeight( gbiIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 );
+          iRefList = ( abs( getBcwWeight( bcwIdx, REF_PIC_LIST_0 ) ) > abs( getBcwWeight( bcwIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 );
         }
       }
       else if ( iIter == 0 )
@@ -4533,11 +4998,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
 
       // First iterate, get prediction block of opposite direction
-      if( iIter == 0 && !slice.getMvdL1ZeroFlag() )
+      if( iIter == 0 && !slice.getPicHeader()->getMvdL1ZeroFlag() )
       {
-        PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList)
-          , changeToHighPrec
-        );
+        PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList));
         pu.refIdx[1-iRefList] = iRefIdx[1-iRefList];
 
         PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) );
@@ -4546,7 +5009,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
       RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
 
-      if ( slice.getMvdL1ZeroFlag() ) // GPB, fix List 1, search List 0
+      if ( slice.getPicHeader()->getMvdL1ZeroFlag() ) // GPB, fix List 1, search List 0
       {
         iRefList = 0;
         eRefPicList = REF_PIC_LIST_0;
@@ -4562,7 +5025,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         {
           continue;
         }
-        if(m_pcEncCfg->getUseGBiFast() && (gbiIdx != GBI_DEFAULT)
+        if(m_pcEncCfg->getUseBcwFast() && (bcwIdx != BCW_DEFAULT)
           && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC())
           && (pu.cu->affineType == AFFINEMODEL_4PARAM && pu.cu->slice->getTLayer()>1))
         {
@@ -4570,7 +5033,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         }
         // update bits
         uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
-        uiBitsTemp += ((pu.cu->slice->getSPS()->getUseGBi() == true) ? gbiIdxBits : 0);
+        uiBitsTemp += ((pu.cu->slice->getSPS()->getUseBcw() == true) ? bcwIdxBits : 0);
         if( slice.getNumRefIdx(eRefPicList) > 1 )
         {
           uiBitsTemp += iRefIdxTemp+1;
@@ -4597,15 +5060,13 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
           uiCostBi            = uiCostTemp;
           uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
-          uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getUseGBi() == true) ? gbiIdxBits : 0);
+          uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getUseBcw() == true) ? bcwIdxBits : 0);
           uiBits[2]           = uiBitsTemp;
 
           if ( iNumIter != 1 ) // MC for next iter
           {
             //  Set motion
-            PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList
-              , changeToHighPrec
-            );
+            PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList);
             pu.refIdx[eRefPicList] = iRefIdxBi[eRefPicList];
             PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) );
             motionCompensation( pu, predBufTmp, eRefPicList );
@@ -4615,12 +5076,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
       if ( !bChanged )
       {
-        if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceGBiPred)
+        if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
         {
           xCopyAffineAMVPInfo( aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0] );
           xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi );
 
-          if ( !slice.getMvdL1ZeroFlag() )
+          if ( !slice.getPicHeader()->getMvdL1ZeroFlag() )
           {
             xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1] );
             xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi );
@@ -4630,6 +5091,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
     } // for loop-iter
     }
+    m_isBi = false;
   } // if (B_SLICE)
 
   pu.mv    [REF_PIC_LIST_0] = Mv();
@@ -4654,20 +5116,20 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   iRefIdx[1] = refIdxValidList1;
   uiBits[1]  = bitsValidList1;
   uiCost[1]  = costValidList1;
-  if (pu.cs->pps->getWPBiPred() == true && tryBipred && (gbiIdx != GBI_DEFAULT))
+  if (pu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT))
   {
-	  CHECK(iRefIdxBi[0]<0, "Invalid picture reference index");
-	  CHECK(iRefIdxBi[1]<0, "Invalid picture reference index");
-	  pu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0);
-	  pu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1);
-	  if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag
-		  || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag))
-	  {
-		  uiCostBi = MAX_UINT;
-		  enforceGBiPred = false;
-	  }
+    CHECK(iRefIdxBi[0]<0, "Invalid picture reference index");
+    CHECK(iRefIdxBi[1]<0, "Invalid picture reference index");
+    pu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0], wp0);
+    pu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1], wp1);
+    if ((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag
+      || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag))
+    {
+      uiCostBi = MAX_UINT;
+      enforceBcwPred = false;
+    }
   }
-  if( enforceGBiPred )
+  if( enforceBcwPred )
   {
     uiCost[0] = uiCost[1] = MAX_UINT;
   }
@@ -4677,13 +5139,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   {
     lastMode = 2;
     affineCost = uiCostBi;
-
-    PU::setAllAffineMv( pu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0
-      , changeToHighPrec
-    );
-    PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1
-      , changeToHighPrec
-    );
+    pu.interDir = 3;
+    PU::setAllAffineMv( pu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0);
+    PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
     pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
     pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
 
@@ -4698,7 +5156,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
     }
 
-    pu.interDir = 3;
 
     pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
     pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
@@ -4709,10 +5166,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   {
     lastMode = 0;
     affineCost = uiCost[0];
-
-    PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0
-      , changeToHighPrec
-    );
+    pu.interDir = 1;
+    PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0);
     pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
 
     for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
@@ -4723,7 +5178,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         pu.mvdAffi[0][verIdx] = pu.mvdAffi[0][verIdx] - pu.mvdAffi[0][0];
       }
     }
-    pu.interDir = 1;
 
     pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
     pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
@@ -4732,10 +5186,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   {
     lastMode = 1;
     affineCost = uiCost[1];
-
-    PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1
-      , changeToHighPrec
-    );
+    pu.interDir = 2;
+    PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1);
     pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
 
     for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
@@ -4746,18 +5198,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         pu.mvdAffi[1][verIdx] = pu.mvdAffi[1][verIdx] - pu.mvdAffi[1][0];
       }
     }
-    pu.interDir = 2;
 
     pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
     pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
   }
-  if( gbiIdx != GBI_DEFAULT )
+  if( bcwIdx != BCW_DEFAULT )
   {
-    pu.cu->GBiIdx = GBI_DEFAULT;
+    pu.cu->BcwIdx = BCW_DEFAULT;
   }
 }
 
-void solveEqual( double** dEqualCoeff, int iOrder, double* dAffinePara )
+void solveEqual(double dEqualCoeff[7][7], int iOrder, double *dAffinePara)
 {
   for ( int k = 0; k < iOrder; k++ )
   {
@@ -4837,7 +5288,7 @@ void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affin
 
   int mvNum = pu.cu->affineType ? 3 : 2;
 
-  m_pcRdCost->selectMotionLambda( pu.cu->transQuantBypass );
+  m_pcRdCost->selectMotionLambda( );
   m_pcRdCost->setCostScale ( 0 );
 
   int iBestMVPIdx = riMVPIdx;
@@ -4894,7 +5345,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
                                            const AffineAMVPInfo& aamvpi,
                                            bool            bBi)
 {
-  if( pu.cu->cs->sps->getUseGBi() && pu.cu->GBiIdx != GBI_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost
+  if( pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost
       , mvpIdx, aamvpi
   ) )
   {
@@ -4913,6 +5364,8 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   double        fWeight       = 1.0;
 
   PelUnitBuf  origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
+  enum DFunc distFunc = (pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD;
+  m_iRefListIdx = eRefPicList;
 
   // if Bi, set to ( 2 * Org - ListX )
   if ( bBi )
@@ -4921,11 +5374,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
     origBufTmp.copyFrom(origBuf);
     origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()
-                             ,getGbiWeight(pu.cu->GBiIdx, eRefPicList)
+                             ,getBcwWeight(pu.cu->BcwIdx, eRefPicList)
                              );
     pBuf = &origBufTmp;
 
-    fWeight = xGetMEDistortionWeight( pu.cu->GBiIdx, eRefPicList );
+    fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, eRefPicList );
   }
 
   // pred YUV
@@ -4934,23 +5387,12 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   // Set start Mv position, use input mv as started search mv
   Mv acMvTemp[3];
   ::memcpy( acMvTemp, acMv, sizeof(Mv)*3 );
-  if ( pu.cu->imv != 1 )
-  {
-    acMvTemp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    acMvTemp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    acMvTemp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  }
   // Set delta mv
   // malloc buffer
   int iParaNum = pu.cu->affineType ? 7 : 5;
   int affineParaNum = iParaNum - 1;
   int mvNum = pu.cu->affineType ? 3 : 2;
-  double **pdEqualCoeff;
-  pdEqualCoeff = new double *[iParaNum];
-  for ( int i = 0; i < iParaNum; i++ )
-  {
-    pdEqualCoeff[i] = new double[iParaNum];
-  }
+  double pdEqualCoeff[7][7];
 
   int64_t  i64EqualCoeff[7][7];
   Pel    *piError = m_tmpAffiError;
@@ -4973,32 +5415,24 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     }
   }
   else
-  clipMv( acMvTemp[0], pu.cu->lumaPos(),
-          pu.cu->lumaSize(),
-          *pu.cs->sps );
-  clipMv( acMvTemp[1], pu.cu->lumaPos(),
-          pu.cu->lumaSize(),
-          *pu.cs->sps );
-  if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
   {
-    clipMv( acMvTemp[2], pu.cu->lumaPos(),
-            pu.cu->lumaSize(),
-            *pu.cs->sps );
-  }
-  int mvdPrecision = ( pu.cu->imv == 1 ) ? 2 : 0;
-  if ( pu.cu->imv == 2 )
-  {
-    acMvTemp[0].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-    acMvTemp[1].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-    if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
+    clipMv( acMvTemp[0], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    clipMv( acMvTemp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+    if( pu.cu->affineType == AFFINEMODEL_6PARAM )
     {
-      acMvTemp[2].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
+      clipMv( acMvTemp[2], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
     }
   }
+  acMvTemp[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
+  acMvTemp[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
+  if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+  {
+    acMvTemp[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
+  }
   xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng( COMPONENT_Y ) );
 
   // get error
-  uiCostBest = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD );
+  uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
 
   // get cost with mv
   m_pcRdCost->setCostScale(0);
@@ -5013,7 +5447,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   else
   {
     DTRACE( g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest );
-    uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 );
+    uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred );
     DTRACE( g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest );
   }
   uiCostBest = (Distortion)( floor( fWeight * (double)uiCostBest ) + (double)m_pcRdCost->getCost( uiBitsBest ) );
@@ -5090,7 +5524,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     }
 
     double dAffinePara[6];
-    double dDeltaMv[6];
+    double dDeltaMv[6]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0,};
     Mv acDeltaMv[3];
 
     solveEqual( pdEqualCoeff, affineParaNum, dAffinePara );
@@ -5110,12 +5544,13 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
       dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
       dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2];
     }
-    int mvShift = MV_FRACTIONAL_BITS_DIFF - mvdPrecision;
-    int multiShift = 1 << ( MV_FRACTIONAL_BITS_DIFF + mvdPrecision );
 
+    const int normShiftTab[3] = { MV_PRECISION_QUARTER - MV_PRECISION_INT, MV_PRECISION_SIXTEENTH - MV_PRECISION_INT, MV_PRECISION_QUARTER - MV_PRECISION_INT };
+    const int stepShiftTab[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH, MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER };
+    const int multiShift = 1 << normShiftTab[pu.cu->imv];
+    const int mvShift = stepShiftTab[pu.cu->imv];
     acDeltaMv[0] = Mv( ( int ) ( dDeltaMv[0] * multiShift + SIGN( dDeltaMv[0] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[2] * multiShift + SIGN( dDeltaMv[2] ) * 0.5 ) << mvShift );
     acDeltaMv[1] = Mv( ( int ) ( dDeltaMv[1] * multiShift + SIGN( dDeltaMv[1] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[3] * multiShift + SIGN( dDeltaMv[3] ) * 0.5 ) << mvShift );
-
     if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
     {
       acDeltaMv[2] = Mv( ( int ) ( dDeltaMv[4] * multiShift + SIGN( dDeltaMv[4] ) * 0.5 ) << mvShift, ( int ) ( dDeltaMv[5] * multiShift + SIGN( dDeltaMv[5] ) * 0.5 ) << mvShift );
@@ -5145,25 +5580,16 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     for ( int i = 0; i < mvNum; i++ )
     {
       acMvTemp[i] += acDeltaMv[i];
-      acMvTemp[i].hor = Clip3( -131072, 131071, acMvTemp[i].hor );
-      acMvTemp[i].ver = Clip3( -131072, 131071, acMvTemp[i].ver );
-      if ( pu.cu->imv == 0 )
-      {
-        acMvTemp[i].roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-      }
-      else if ( pu.cu->imv == 2 )
-      {
-        acMvTemp[i].roundToPrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
-      }
+      acMvTemp[i].hor = Clip3(MV_MIN, MV_MAX, acMvTemp[i].hor );
+      acMvTemp[i].ver = Clip3(MV_MIN, MV_MAX, acMvTemp[i].ver );
+      acMvTemp[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
       if( m_pcEncCfg->getMCTSEncConstraint() )
       {
         MCTSHelper::clipMvToArea( acMvTemp[i], pu.cu->Y(), pu.cs->picture->mctsInfo.getTileAreaSubPelRestricted( pu ), *pu.cs->sps );
       }
       else
       {
-      clipMv(acMvTemp[i], pu.cu->lumaPos(),
-             pu.cu->lumaSize(),
-             *pu.cs->sps);
+        clipMv( acMvTemp[i], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
       }
     }
 
@@ -5190,7 +5616,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) );
 
     // get error
-    Distortion uiCostTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD );
+    Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
     DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp );
 
     // get cost with mv
@@ -5205,7 +5631,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     }
     else
     {
-      uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 );
+      uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred );
     }
     uiCostTemp = (Distortion)( floor( fWeight * (double)uiCostTemp ) + (double)m_pcRdCost->getCost( uiBitsTemp ) );
 
@@ -5223,11 +5649,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   {
     xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
     // get error
-    Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
+    Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
     // get cost with mv
     m_pcRdCost->setCostScale(0);
     uint32_t bitsTemp = ruiBits;
-    bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred, pu.cu->imv != 1 );
+    bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred );
     costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
     // store best cost and mv
     if (costTemp < uiCostBest)
@@ -5238,63 +5664,12 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     }
   };
 
+  const uint32_t mvShiftTable[3] = {MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_INTERNAL, MV_PRECISION_INTERNAL - MV_PRECISION_INT};
+  const uint32_t mvShift = mvShiftTable[pu.cu->imv];
   if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost)
   {
-    //search 8 nearest neighbors; integer distance
-    int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } };
-    const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF );
-    const int maxSearchRound = 3;
-
-    if ( m_pcEncCfg->getUseAffineAmvrEncOpt() && m_pcEncCfg->getIntraPeriod() != ( uint32_t ) -1 && pu.cu->imv )
-    {
-      for ( int rnd = 0; rnd < ( pu.cu->slice->getTLayer() <= 2 ? maxSearchRound : maxSearchRound - 1 ); rnd++ )
-      {
-        bool modelChange = false;
-        //search the model parameters with finear granularity;
-        for ( int j = 0; j < mvNum; j++ )
-        {
-          for ( int iter = 0; iter < 2; iter++ )
-          {
-            Mv centerMv[3];
-            memcpy( centerMv, acMv, sizeof( Mv ) * 3 );
-            memcpy( acMvTemp, acMv, sizeof( Mv ) * 3 );
-            for ( int i = ( iter ? 0: 4 ); i < ( iter ? 4 : 8 ); i++ )
-            {
-              acMvTemp[j].set( centerMv[j].getHor() + ( testPos[i][0] << mvShift ), centerMv[j].getVer() + ( testPos[i][1] << mvShift ) );
-
-              clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps );
-              xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) );
-
-              Distortion costTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, DF_HAD );
-              uint32_t bitsTemp   = ruiBits;
-              bitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 );
-              costTemp = ( Distortion ) ( floor( fWeight * ( double ) costTemp ) + ( double ) m_pcRdCost->getCost( bitsTemp ) );
-
-              if ( costTemp < uiCostBest )
-              {
-                uiCostBest = costTemp;
-                uiBitsBest = bitsTemp;
-                ::memcpy( acMv, acMvTemp, sizeof( Mv ) * 3 );
-                modelChange = true;
-              }
-            }
-          }
-        }
-
-        if ( !modelChange )
-        {
-          break;
-        }
-      }
-    }
 
     Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] };
-    if ( pu.cu->imv != 1 )
-    {
-      mvPredTmp[0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-      mvPredTmp[1].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-      mvPredTmp[2].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-    }
     Mv mvME[3];
     ::memcpy(mvME, acMv, sizeof(Mv) * 3);
     Mv dMv = mvME[0] - mvPredTmp[0];
@@ -5337,42 +5712,63 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
       checkCPMVRdCost(acMvTemp);
     }
 
+    // 8 nearest neighbor search
+    int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } };
+    const int maxSearchRound = (pu.cu->imv) ? 3 : ((m_pcEncCfg->getUseAffineAmvrEncOpt() && m_pcEncCfg->getIntraPeriod() == (uint32_t)-1) ? 2 : 3);
+
+    for (int rnd = 0; rnd < maxSearchRound; rnd++)
     {
-      dMv = acMv[1] - acMv[0];
-      if (pu.cu->affineType == AFFINEMODEL_4PARAM && (dMv.getAbsHor() > 4 || dMv.getAbsVer() > 4))
+      bool modelChange = false;
+      //search the model parameters with finear granularity;
+      for (int j = 0; j < mvNum; j++)
       {
-        int testPos[4][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 } };
-        Mv centerMv[3];
-        const uint32_t mvShift = pu.cu->imv == 1 ? 0 : ( pu.cu->imv == 2 ? ( MV_FRACTIONAL_BITS_DIFF << 1 ) : MV_FRACTIONAL_BITS_DIFF );
-        ::memcpy(centerMv, acMv, sizeof(Mv) * 3);
-        acMvTemp[0] = centerMv[0];
-        for (int i = 0; i < 4; i++)
+        bool loopChange = false;
+        for (int iter = 0; iter < 2; iter++)
         {
-          acMvTemp[1].set( centerMv[1].getHor() + ( testPos[i][0] << mvShift ), centerMv[1].getVer() + ( testPos[i][1] << mvShift ) );
-          checkCPMVRdCost(acMvTemp);
+          if (iter == 1 && !loopChange)
+          {
+            break;
+          }
+          Mv centerMv[3];
+          memcpy(centerMv, acMv, sizeof(Mv) * 3);
+          memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
+
+          for (int i = ((iter == 0) ? 0 : 4); i < ((iter == 0) ? 4 : 8); i++)
+          {
+            acMvTemp[j].set(centerMv[j].getHor() + (testPos[i][0] << mvShift), centerMv[j].getVer() + (testPos[i][1] << mvShift));
+            clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+            xPredAffineBlk(COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+
+            Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+            uint32_t bitsTemp = ruiBits;
+            bitsTemp += xCalcAffineMVBits(pu, acMvTemp, acMvPred);
+            costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
+
+            if (costTemp < uiCostBest)
+            {
+              uiCostBest = costTemp;
+              uiBitsBest = bitsTemp;
+              ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
+              modelChange = true;
+              loopChange = true;
+            }
+          }
         }
       }
+
+      if (!modelChange)
+      {
+        break;
+      }
     }
   }
-  if ( pu.cu->imv != 1 )
-  {
-    acMv[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-    acMv[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-    acMv[2].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
-  }
   acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
   acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
   acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
 
-  // free buffer
-  for (int i = 0; i<iParaNum; i++)
-    delete[]pdEqualCoeff[i];
-  delete[]pdEqualCoeff;
-
   ruiBits = uiBitsBest;
   ruiCost = uiCostBest;
   DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest );
-
 }
 
 void InterSearch::xEstimateAffineAMVP( PredictionUnit&  pu,
@@ -5438,7 +5834,7 @@ void InterSearch::xCopyAffineAMVPInfo (AffineAMVPInfo& src, AffineAMVPInfo& dst)
 * \param pattern Reference picture ROI
 * \param biPred    Flag indicating whether block is for biprediction
 */
-void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern )
+void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
 {
   const ClpRng& clpRng = m_lumaClpRng;
   int width      = pattern->width;
@@ -5455,15 +5851,15 @@ void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern )
 
   const ChromaFormat chFmt = m_currChromaFormat;
 
-  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng);
+  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, 0, false, useAltHpelIf);
   if (!m_skipFracME)
   {
-  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng);
+    m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, 0, false, useAltHpelIf);
   }
 
   intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1;
   dstPtr = m_filteredBlock[0][0][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
   if (m_skipFracME)
   {
     return;
@@ -5471,15 +5867,15 @@ void InterSearch::xExtDIFUpSamplingH( CPelBuf* pattern )
 
   intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
   dstPtr = m_filteredBlock[2][0][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
 
   intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride;
   dstPtr = m_filteredBlock[0][2][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
 
   intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
   dstPtr = m_filteredBlock[2][2][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
 }
 
 
@@ -5701,7 +6097,7 @@ void InterSearch::setWpScalingDistParam( int iRefIdx, RefPicList eRefPicListCur,
 void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID)
 {
   const UnitArea& currArea    = partitioner.currArea();
-  const TransformUnit &currTU = *cs.getTU(currArea.lumaPos(), partitioner.chType);
+  const TransformUnit &currTU = *cs.getTU(isLuma(partitioner.chType) ? currArea.lumaPos() : currArea.chromaPos(), partitioner.chType);
   const CodingUnit &cu        = *currTU.cu;
   const unsigned currDepth    = partitioner.currTrDepth;
 
@@ -5724,26 +6120,27 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
 
     CHECK(CU::isIntra(cu), "Inter search provided with intra CU");
 
-    if( cu.chromaFormat != CHROMA_400 )
+    if( cu.chromaFormat != CHROMA_400
+      && (!cu.isSepTree() || isChroma(partitioner.chType))
+      )
     {
-      const bool firstCbfOfCU = ( currDepth == 0 );
       {
-        if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth - 1 ) )
         {
           const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth );
-          if( !( cu.sbtInfo && currDepth == 1 ) )
+          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
           m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth );
         }
-        if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth - 1 ) )
         {
           const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth );
-          if( !( cu.sbtInfo && currDepth == 1 ) )
+          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
           m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth, TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) );
         }
       }
     }
 
-    if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual ) )
+    if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual )
+      && !isChroma(partitioner.chType)
+      )
     {
       m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currArea.Y(), currDepth );
     }
@@ -5755,6 +6152,11 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
     {
       if( currArea.blocks[compID].valid() )
       {
+        if( compID == COMPONENT_Cr )
+        {
+          const int cbfMask = ( TU::getCbf( currTU, COMPONENT_Cb ) ? 2 : 0) + ( TU::getCbf( currTU, COMPONENT_Cr ) ? 1 : 0 );
+          m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
+        }
         if( TU::hasCrossCompPredInfo( currTU, compID ) )
         {
           m_CABACEstimator->cross_comp_pred( currTU, compID );
@@ -6038,14 +6440,18 @@ uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_
 
 void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/
   , const bool luma, const bool chroma
+  , PelUnitBuf* orgResi
 )
 {
   const UnitArea& currArea = partitioner.currArea();
   const SPS &sps           = *cs.sps;
+  m_pcRdCost->setChromaFormat(sps.getChromaFormatIdc());
+
   const uint32_t numValidComp  = getNumberValidComponents( sps.getChromaFormatIdc() );
   const uint32_t numTBlocks    = getNumberValidTBlocks   ( *cs.pcv );
   const CodingUnit &cu = *cs.getCU(partitioner.chType);
   const unsigned currDepth = partitioner.currTrDepth;
+  const bool colorTransFlag = cs.cus[0]->colorTransform;
 
   bool bCheckFull  = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
   if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
@@ -6068,6 +6474,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
   Distortion uiSingleDist         = 0;
   Distortion uiSingleDistComp [3] = { 0, 0, 0 };
+  uint64_t   uiSingleFracBits[3] = { 0, 0, 0 };
   TCoeff     uiAbsSum         [3] = { 0, 0, 0 };
 
   const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
@@ -6075,23 +6482,19 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
   if (bCheckFull)
   {
-    TransformUnit &tu = csFull->addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType);
+    TransformUnit &tu = csFull->addTU(CS::getArea(cs, currArea, partitioner.chType), partitioner.chType);
     tu.depth          = currDepth;
-    tu.mtsIdx         = 0;
+    for (int i = 0; i<MAX_NUM_TBLOCKS; i++) tu.mtsIdx[i] = MTS_DCT2_DCT2;
     tu.checkTuNoResidual( partitioner.currPartIdx() );
+    Position tuPos = tu.Y();
+    tuPos.relativeTo(cu.Y());
+    const UnitArea relativeUnitArea(tu.chromaFormat, Area(tuPos, tu.Y().size()));
 
     const Slice           &slice = *cs.slice;
-    if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && slice.getReshapeInfo().getSliceReshapeChromaAdj())
+    if (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && !(CS::isDualITree(cs) && slice.isIntra() && tu.cu->predMode==MODE_IBC ))
     {
       const CompArea      &areaY = tu.blocks[COMPONENT_Y];
-      PelBuf              piPredY = cs.getPredBuf(areaY);
-      CompArea      tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
-      PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
-      tmpPred.copyFrom(piPredY);
-      if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
-        tmpPred.rspSignal(m_pcReshape->getFwdLUT());
-      const Pel           avgLuma = tmpPred.computeAvg();
-      int                    adj  = m_pcReshape->calculateChromaAdj(avgLuma);
+      int adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY);
       tu.setChromaAdj(adj);
     }
 
@@ -6111,7 +6514,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     saveCS.picture = cs.picture;
     saveCS.area.repositionTo(currArea);
     saveCS.clearTUs();
-    TransformUnit & bestTU = saveCS.addTU(CS::isDualITree(cs) ? cu : currArea, partitioner.chType);
+    TransformUnit & bestTU = saveCS.addTU(CS::getArea(cs, currArea, partitioner.chType), partitioner.chType);
 
     for( uint32_t c = 0; c < numTBlocks; c++ )
     {
@@ -6140,8 +6543,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         preCalcAlpha = xCalcCrossComponentPredictionAlpha( tu, compID, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() );
       }
 
-      const bool tsAllowed  = TU::isTSAllowed ( tu, compID );
-      const bool mtsAllowed = TU::isMTSAllowed( tu, compID );
+      const bool tsAllowed  = TU::isTSAllowed(tu, compID) && (isLuma(compID) || (isChroma(compID) && m_pcEncCfg->getUseChromaTS()));
+      const bool mtsAllowed = CU::isMTSAllowed( *tu.cu, compID );
+      
       uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests
       std::vector<TrMode> trModes;
       trModes.push_back( TrMode( 0, true ) ); //DCT2
@@ -6174,6 +6578,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 #endif
         }
       }
+
+      if (colorTransFlag)
+      {
+        m_pcTrQuant->lambdaAdjustColorTrans(true);
+        m_pcRdCost->lambdaAdjustColorTrans(true, compID);
+      }
+
       const int crossCPredictionModesToTest = preCalcAlpha != 0 ? 2 : 1;
       const int numTransformCandidates = nNumTransformCands;
       const bool isOneMode                  = crossCPredictionModesToTest == 1 && numTransformCandidates == 1;
@@ -6193,9 +6604,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
           m_CABACEstimator->getCtx() = ctxStart;
           m_CABACEstimator->resetBits();
 
-          if( isLuma( compID ) )
           {
-            if( bestTU.mtsIdx == 1 && m_pcEncCfg->getUseTransformSkipFast() )
+#if JVET_AHG14_LOSSLESS
+            if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) )
+            {
+#endif
+            if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->getUseTransformSkipFast())
             {
               continue;
             }
@@ -6203,20 +6617,37 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
             {
               continue;
             }
-            tu.mtsIdx = trModes[transformMode].first;
+#if JVET_AHG14_LOSSLESS
+            }
+#endif
+            tu.mtsIdx[compID] = trModes[transformMode].first;
           }
           tu.compAlpha[compID]      = bUseCrossCPrediction ? preCalcAlpha : 0;
 
-          const QpParam cQP(tu, compID);  // note: uses tu.transformSkip[compID]
+          QpParam cQP(tu, compID);  // note: uses tu.transformSkip[compID]
+          if (colorTransFlag)
+          {
+            for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+            {
+              cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+              cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6;
+              cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6;
+            }
+          }
 
 #if RDOQ_CHROMA_LAMBDA
           m_pcTrQuant->selectLambda(compID);
 #endif
-          if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj())
+          if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
           {
-            double cRescale = round((double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj()));
+            double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj());
             m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale));
           }
+          if ( sps.getJointCbCrEnabledFlag() && isChroma( compID ) && ( tu.cu->cs->slice->getSliceQp() > 18 ) )
+          {
+            m_pcTrQuant->setLambda( 1.05 * m_pcTrQuant->getLambda() );
+          }
+
           TCoeff     currAbsSum = 0;
           uint64_t   currCompFracBits = 0;
           Distortion currCompDist = 0;
@@ -6230,7 +6661,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
             PelBuf resiBuf = csFull->getResiBuf( compArea );
             crossComponentPrediction( tu, compID, lumaResi, resiBuf, resiBuf, false );
           }
-          if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[compID].width*tu.blocks[compID].height > 4 )
+          if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && tu.blocks[compID].width*tu.blocks[compID].height > 4)
           {
             PelBuf resiBuf = csFull->getResiBuf(compArea);
             resiBuf.scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(compID));
@@ -6239,10 +6670,17 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
           {
             if( transformMode == 0 )
             {
-              m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand() );
-              tu.mtsIdx = trModes[0].first;
+              m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, m_pcEncCfg->getMTSInterMaxCand() );
+              tu.mtsIdx[compID] = trModes[0].first;
+            }
+#if JVET_AHG14_LOSSLESS
+            if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) )
+            {
+              m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true );
             }
+#else
             m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true );
+#endif
           }
           else
           {
@@ -6284,7 +6722,14 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
             }
             else
 #endif
-            nonCoeffCost     = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist);
+              if (cs.slice->getSPS()->getUseColorTrans())
+              {
+                nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false);
+              }
+              else
+              {
+                nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist);
+              }
           }
 
           if ((puiZeroDist != NULL) && isFirstMode)
@@ -6292,6 +6737,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
             *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion
           }
 
+#if JVET_AHG14_LOSSLESS
+          if( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 )
+          {
+            currAbsSum = 0;
+          }
+#endif
+
           if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction
           {
             if (isFirstMode)
@@ -6302,6 +6754,11 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
             const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false );
             m_CABACEstimator->cbf_comp( *csFull, true, compArea, currDepth, prevCbf );
+            if( compID == COMPONENT_Cr )
+            {
+              const int cbfMask = ( tu.cbf[COMPONENT_Cb] ? 2 : 0 ) + 1;
+              m_CABACEstimator->joint_cb_cr( tu, cbfMask );
+            }
 
             if( isCrossCPredictionAvailable )
             {
@@ -6315,7 +6772,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
             CPelBuf orgResiBuf = csFull->getOrgResiBuf(compArea);
 
             m_pcTrQuant->invTransformNxN(tu, compID, resiBuf, cQP);
-            if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() && tu.blocks[compID].width*tu.blocks[compID].height > 4 )
+            if (slice.getPicHeader()->getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && tu.blocks[compID].width*tu.blocks[compID].height > 4)
             {
               resiBuf.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID));
             }
@@ -6332,11 +6789,6 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 #else
             currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist);
 #endif
-
-            if (csFull->isLossless)
-            {
-              nonCoeffCost = MAX_DOUBLE;
-            }
           }
           else if( transformMode > 0 && !bUseCrossCPrediction )
           {
@@ -6369,6 +6821,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
             uiAbsSum[compID]         = currAbsSum;
             uiSingleDistComp[compID] = currCompDist;
+            uiSingleFracBits[compID] = currCompFracBits;
             minCost[compID]          = currCompCost;
 
             if (uiAbsSum[compID] == 0)
@@ -6382,7 +6835,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
               }
             }
 
-            if( !isLastMode )
+            if( !isLastMode || (compID != COMPONENT_Y && !tu.noResidual) )
             {
               bestTU.copyComponentFrom( tu, compID );
               saveCS.getResiBuf( compArea ).copyFrom( csFull->getResiBuf( compArea ) );
@@ -6403,8 +6856,227 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         tu.copyComponentFrom( bestTU, compID );
         csFull->getResiBuf( compArea ).copyFrom( saveCS.getResiBuf( compArea ) );
       }
+
+      if (colorTransFlag)
+      {
+        m_pcTrQuant->lambdaAdjustColorTrans(false);
+        m_pcRdCost->lambdaAdjustColorTrans(false, compID);
+      }
+
     } // component loop
 
+    if (colorTransFlag)
+    {
+      PelUnitBuf     orgResidual = orgResi->subBuf(relativeUnitArea);
+      PelUnitBuf     invColorTransResidual = m_colorTransResiBuf[2].getBuf(relativeUnitArea);
+      csFull->getResiBuf(currArea).colorSpaceConvert(invColorTransResidual, false);
+
+      for (uint32_t c = 0; c < numTBlocks; c++)
+      {
+        const ComponentID compID = (ComponentID)c;
+        uiSingleDistComp[c] = m_pcRdCost->getDistPart(orgResidual.bufs[c], invColorTransResidual.bufs[c], sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
+        minCost[c] = m_pcRdCost->calcRdCost(uiSingleFracBits[c], uiSingleDistComp[c]);
+      }
+    }
+
+    if ( chroma && tu.blocks[COMPONENT_Cb].valid() )
+    {
+      const CompArea& cbArea = tu.blocks[COMPONENT_Cb];
+      const CompArea& crArea = tu.blocks[COMPONENT_Cr];
+      bool checkJointCbCr = (sps.getJointCbCrEnabledFlag()) && (!tu.noResidual) && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr));
+      const int channelBitDepth = sps.getBitDepth(toChannelType(COMPONENT_Cb));
+      bool      reshape         = slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()
+                               && tu.blocks[COMPONENT_Cb].width * tu.blocks[COMPONENT_Cb].height > 4;
+      double minCostCbCr = minCost[COMPONENT_Cb] + minCost[COMPONENT_Cr];
+      if (colorTransFlag)
+      {
+        minCostCbCr += minCost[COMPONENT_Y];  // ACT should consider three-component cost
+      }
+      bool   isLastBest  = false;
+
+      CompStorage      orgResiCb[4], orgResiCr[4];   // 0:std, 1-3:jointCbCr
+      std::vector<int> jointCbfMasksToTest;
+      if ( checkJointCbCr )
+      {
+        orgResiCb[0].create(cbArea);
+        orgResiCr[0].create(crArea);
+        orgResiCb[0].copyFrom(cs.getOrgResiBuf(cbArea));
+        orgResiCr[0].copyFrom(cs.getOrgResiBuf(crArea));
+        if (reshape)
+        {
+          orgResiCb[0].scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(COMPONENT_Cb));
+          orgResiCr[0].scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+        }
+        jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, orgResiCb, orgResiCr);
+      }
+
+      for (int cbfMask: jointCbfMasksToTest)
+      {
+        TCoeff     currAbsSum       = 0;
+        uint64_t   currCompFracBits = 0;
+        Distortion currCompDistCb   = 0;
+        Distortion currCompDistCr   = 0;
+        double     currCompCost     = 0;
+
+        tu.jointCbCr = (uint8_t) cbfMask;
+        tu.compAlpha[COMPONENT_Cb] = tu.compAlpha[COMPONENT_Cr] = 0;
+        // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode.
+        tu.mtsIdx[COMPONENT_Cb] = tu.mtsIdx[COMPONENT_Cr] = MTS_DCT2_DCT2;
+        int         codedCbfMask = 0;
+        ComponentID codeCompId = (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr);
+        ComponentID otherCompId = (codeCompId == COMPONENT_Cr ? COMPONENT_Cb : COMPONENT_Cr);
+
+        if (colorTransFlag)
+        {
+          m_pcTrQuant->lambdaAdjustColorTrans(true);
+          m_pcTrQuant->selectLambda(codeCompId);
+        }
+        else
+        {
+          m_pcTrQuant->selectLambda(codeCompId);
+        }
+        // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
+        const int    absIct = abs( TU::getICTMode(tu) );
+        const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
+        m_pcTrQuant->setLambda( lfact * m_pcTrQuant->getLambda() );
+        if ( checkJointCbCr && (tu.cu->cs->slice->getSliceQp() > 18))
+        {
+          m_pcTrQuant->setLambda( 1.05 * m_pcTrQuant->getLambda() );
+        }
+
+        m_CABACEstimator->getCtx() = ctxStart;
+        m_CABACEstimator->resetBits();
+
+        PelBuf cbResi = csFull->getResiBuf(cbArea);
+        PelBuf crResi = csFull->getResiBuf(crArea);
+        cbResi.copyFrom(orgResiCb[cbfMask]);
+        crResi.copyFrom(orgResiCr[cbfMask]);
+
+        if ( reshape )
+        {
+          double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj());
+          m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale));
+        }
+
+        Distortion currCompDistY = MAX_UINT64;
+        QpParam qpCbCr(tu, codeCompId);
+        if (colorTransFlag)
+        {
+          for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+          {
+            qpCbCr.Qps[qpIdx] = qpCbCr.Qps[qpIdx] + (codeCompId == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+            qpCbCr.pers[qpIdx] = qpCbCr.Qps[qpIdx] / 6;
+            qpCbCr.rems[qpIdx] = qpCbCr.Qps[qpIdx] % 6;
+          }
+        }
+
+        tu.getCoeffs(otherCompId).fill(0);   // do we need that?
+        TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
+
+        PelBuf &codeResi   = (codeCompId == COMPONENT_Cr ? crResi : cbResi);
+        TCoeff  compAbsSum = 0;
+        m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx());
+        if (compAbsSum > 0)
+        {
+          m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
+          codedCbfMask += (codeCompId == COMPONENT_Cb ? 2 : 1);
+        }
+        else
+        {
+          codeResi.fill(0);
+        }
+
+        if (tu.jointCbCr == 3 && codedCbfMask == 2)
+        {
+          codedCbfMask = 3;
+          TU::setCbfAtDepth(tu, COMPONENT_Cr, tu.depth, true);
+        }
+        if (codedCbfMask && tu.jointCbCr != codedCbfMask)
+        {
+          codedCbfMask = 0;
+        }
+        currAbsSum = codedCbfMask;
+
+        if (currAbsSum > 0)
+        {
+          m_CABACEstimator->cbf_comp(cs, codedCbfMask >> 1, cbArea, currDepth, false);
+          m_CABACEstimator->cbf_comp(cs, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1);
+          m_CABACEstimator->joint_cb_cr(tu, codedCbfMask);
+          if (codedCbfMask >> 1)
+            m_CABACEstimator->residual_coding(tu, COMPONENT_Cb);
+          if (codedCbfMask & 1)
+            m_CABACEstimator->residual_coding(tu, COMPONENT_Cr);
+          currCompFracBits = m_CABACEstimator->getEstFracBits();
+
+          m_pcTrQuant->invTransformICT(tu, cbResi, crResi);
+          if (reshape)
+          {
+            cbResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb));
+            crResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+          }
+
+          if (colorTransFlag)
+          {
+            PelUnitBuf     orgResidual = orgResi->subBuf(relativeUnitArea);
+            PelUnitBuf     invColorTransResidual = m_colorTransResiBuf[2].getBuf(relativeUnitArea);
+            csFull->getResiBuf(currArea).colorSpaceConvert(invColorTransResidual, false);
+
+            currCompDistY = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Y], invColorTransResidual.bufs[COMPONENT_Y], sps.getBitDepth(toChannelType(COMPONENT_Y)), COMPONENT_Y, DF_SSE);
+            currCompDistCb = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cb], invColorTransResidual.bufs[COMPONENT_Cb], sps.getBitDepth(toChannelType(COMPONENT_Cb)), COMPONENT_Cb, DF_SSE);
+            currCompDistCr = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cr], invColorTransResidual.bufs[COMPONENT_Cr], sps.getBitDepth(toChannelType(COMPONENT_Cr)), COMPONENT_Cr, DF_SSE);
+            currCompCost = m_pcRdCost->calcRdCost(uiSingleFracBits[COMPONENT_Y] + currCompFracBits, currCompDistY + currCompDistCr + currCompDistCb, false);
+          }
+          else
+          {
+          currCompDistCb = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(cbArea), cbResi, channelBitDepth, COMPONENT_Cb, DF_SSE);
+          currCompDistCr = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(crArea), crResi, channelBitDepth, COMPONENT_Cr, DF_SSE);
+#if WCG_EXT
+          currCompCost   = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false);
+#else
+          currCompCost   = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb);
+#endif
+          }
+        }
+        else
+          currCompCost = MAX_DOUBLE;
+
+        // evaluate
+        if( currCompCost < minCostCbCr )
+        {
+          uiAbsSum[COMPONENT_Cb]         = currAbsSum;
+          uiAbsSum[COMPONENT_Cr]         = currAbsSum;
+          uiSingleDistComp[COMPONENT_Cb] = currCompDistCb;
+          uiSingleDistComp[COMPONENT_Cr] = currCompDistCr;
+          if (colorTransFlag)
+          {
+            uiSingleDistComp[COMPONENT_Y] = currCompDistY;
+          }
+          minCostCbCr                    = currCompCost;
+          isLastBest = (cbfMask == jointCbfMasksToTest.back());
+          if (!isLastBest)
+          {
+            bestTU.copyComponentFrom(tu, COMPONENT_Cb);
+            bestTU.copyComponentFrom(tu, COMPONENT_Cr);
+            saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
+            saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
+          }
+        }
+
+        if( !isLastBest )
+        {
+          // copy component
+          tu.copyComponentFrom( bestTU, COMPONENT_Cb );
+          tu.copyComponentFrom( bestTU, COMPONENT_Cr );
+          csFull->getResiBuf( cbArea ).copyFrom( saveCS.getResiBuf( cbArea ) );
+          csFull->getResiBuf( crArea ).copyFrom( saveCS.getResiBuf( crArea ) );
+        }
+        if (colorTransFlag)
+        {
+          m_pcTrQuant->lambdaAdjustColorTrans(false);
+        }
+      }
+    }
+
     m_CABACEstimator->getCtx() = ctxStart;
     m_CABACEstimator->resetBits();
     if( !tu.noResidual )
@@ -6434,6 +7106,11 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         continue;
       if (tu.blocks[compID].valid())
       {
+        if( compID == COMPONENT_Cr )
+        {
+          const int cbfMask = ( TU::getCbf( tu, COMPONENT_Cb ) ? 2 : 0 ) + ( TU::getCbf( tu, COMPONENT_Cr ) ? 1 : 0 );
+          m_CABACEstimator->joint_cb_cr(tu, cbfMask);
+        }
         if( cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma(compID) && uiAbsSum[COMPONENT_Y] )
         {
           m_CABACEstimator->cross_comp_pred( tu, compID );
@@ -6485,6 +7162,7 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     {
       xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist
         , luma, chroma
+        , orgResi
       );
 
       csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist );
@@ -6563,22 +7241,33 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   , const bool luma, const bool chroma
 )
 {
+  m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
+
   CodingUnit &cu = *cs.getCU( partitioner.chType );
+  if( cu.predMode == MODE_INTER )
+    CHECK( cu.isSepTree(), "CU with Inter mode must be in single tree" );
 
   const ChromaFormat format     = cs.area.chromaFormat;;
   const int  numValidComponents = getNumberValidComponents(format);
   const SPS &sps                = *cs.sps;
-  const PPS &pps                = *cs.pps;
+
+  bool colorTransAllowed = cs.slice->getSPS()->getUseColorTrans() && luma && chroma;
+  if (cs.slice->getSPS()->getUseColorTrans())
+  {
+    CHECK(cu.treeType != TREE_D || partitioner.treeType != TREE_D, "localtree should not be applied when adaptive color transform is enabled");
+    CHECK(cu.modeType != MODE_TYPE_ALL || partitioner.modeType != MODE_TYPE_ALL, "localtree should not be applied when adaptive color transform is enabled");
+  }
 
   if( skipResidual ) //  No residual coding : SKIP mode
   {
     cu.skip    = true;
     cu.rootCbf = false;
+    cu.colorTransform = false;
     CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" );
     cs.getResiBuf().fill(0);
     {
       cs.getRecoBuf().copyFrom(cs.getPredBuf() );
-      if (m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()) && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
+      if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) && !cu.firstPU->ciipFlag && !CU::isIBC(cu))
       {
         cs.getRecoBuf().Y().rspSignal(m_pcReshape->getFwdLUT());
       }
@@ -6586,7 +7275,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
 
     // add an empty TU
-    cs.addTU(CS::isDualITree(cs) ? cu : cs.area, partitioner.chType);
+    cs.addTU(CS::getArea(cs, cs.area, partitioner.chType), partitioner.chType);
     Distortion distortion = 0;
 
     for (int comp = 0; comp < numValidComponents; comp++)
@@ -6600,7 +7289,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
       CPelBuf org  = cs.getOrgBuf  (compID);
 #if WCG_EXT
       if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
-        m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper()&& m_pcReshape->getCTUFlag())))
+        m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())) )
       {
         const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] );
         if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
@@ -6622,29 +7311,10 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
     m_CABACEstimator->resetBits();
 
-    if( pps.getTransquantBypassEnabledFlag() )
-    {
-      m_CABACEstimator->cu_transquant_bypass_flag( cu );
-    }
-
     PredictionUnit &pu = *cs.getPU( partitioner.chType );
 
     m_CABACEstimator->cu_skip_flag  ( cu );
-    if (CU::isIBC(cu))
-    {
-      m_CABACEstimator->merge_idx(pu);
-    }
-    else
-    {
-    m_CABACEstimator->subblock_merge_flag( cu );
-    m_CABACEstimator->triangle_mode ( cu );
-    if (cu.mmvdSkip)
-    {
-      m_CABACEstimator->mmvd_merge_idx(pu);
-    }
-    else
-    m_CABACEstimator->merge_idx     ( pu );
-    }
+    m_CABACEstimator->merge_data(pu);
 
     cs.dist     = distortion;
     cs.fracBits = m_CABACEstimator->getEstFracBits();
@@ -6657,14 +7327,14 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   if (luma)
   {
     cs.getResiBuf().bufs[0].copyFrom(cs.getOrgBuf().bufs[0]);
-    if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+    if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
     {
       const CompArea &areaY = cu.Y();
       CompArea      tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
       PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
       tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y));
 
-      if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
+      if (!cu.firstPU->ciipFlag && !CU::isIBC(cu))
         tmpPred.rspSignal(m_pcReshape->getFwdLUT());
       cs.getResiBuf(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT());
       cs.getResiBuf(COMPONENT_Y).subtract(tmpPred);
@@ -6679,20 +7349,133 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
     cs.getResiBuf().bufs[1].subtract(cs.getPredBuf().bufs[1]);
     cs.getResiBuf().bufs[2].subtract(cs.getPredBuf().bufs[2]);
   }
-  Distortion zeroDistortion = 0;
+  const UnitArea curUnitArea = partitioner.currArea();
+  CodingStructure &saveCS = *m_pSaveCS[1];
+  saveCS.pcv = cs.pcv;
+  saveCS.picture = cs.picture;
+  saveCS.area.repositionTo(curUnitArea);
+  saveCS.clearCUs();
+  saveCS.clearPUs();
+  saveCS.clearTUs();
+  for (const auto &ppcu : cs.cus)
+  {
+    CodingUnit &pcu = saveCS.addCU(*ppcu, ppcu->chType);
+    pcu = *ppcu;
+  }
+  for (const auto &ppu : cs.pus)
+  {
+    PredictionUnit &pu = saveCS.addPU(*ppu, ppu->chType);
+    pu = *ppu;
+  }
+
+  PelUnitBuf orgResidual, colorTransResidual;
+  const UnitArea localUnitArea(cs.area.chromaFormat, Area(0, 0, cu.Y().width, cu.Y().height));
+  orgResidual = m_colorTransResiBuf[0].getBuf(localUnitArea);
+  colorTransResidual = m_colorTransResiBuf[1].getBuf(localUnitArea);
+  orgResidual.copyFrom(cs.getResiBuf());
+  if (colorTransAllowed)
+  {
+    cs.getResiBuf().colorSpaceConvert(colorTransResidual, true);
+  }
+
+  const TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
+  int           numAllowedColorSpace = (colorTransAllowed ? 2 : 1);
+  Distortion    zeroDistortion = 0;
 
-  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
+  double  bestCost = MAX_DOUBLE;
+  bool    bestColorTrans = false;
+  bool    bestRootCbf = false;
+  uint8_t bestsbtInfo = 0;
+  uint8_t orgSbtInfo = cu.sbtInfo;
+  int     bestIter = 0;
 
+  auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl);
+  bool rootCbfFirstColorSpace = true;
+
+  for (int iter = 0; iter < numAllowedColorSpace; iter++)
+  {
+    if (colorTransAllowed && !m_pcEncCfg->getRGBFormatFlag() && iter)
+    {
+      continue;
+    }
+    char colorSpaceOption = blkCache->getSelectColorSpaceOption(cu);
+    if (colorTransAllowed)
+    {
+      if (colorSpaceOption)
+      {
+        CHECK(colorSpaceOption > 2 || colorSpaceOption < 0, "invalid color space selection option");
+        if (colorSpaceOption == 1 && iter)
+        {
+          continue;
+        }
+        if (colorSpaceOption == 2 && !iter)
+        {
+          continue;
+        }
+      }
+    }
+    if (!colorSpaceOption)
+    {
+      if (iter && !rootCbfFirstColorSpace)
+      {
+        continue;
+      }
+      if (colorTransAllowed && cs.bestParent && cs.bestParent->tmpColorSpaceCost != MAX_DOUBLE)
+      {
+        if (cs.bestParent->firstColorSpaceSelected && iter)
+        {
+          continue;
+        }
+        if (m_pcEncCfg->getRGBFormatFlag())
+        {
+          if (!cs.bestParent->firstColorSpaceSelected && !iter)
+          {
+            continue;
+          }
+        }
+      }
+    }
+    bool colorTransFlag = (colorTransAllowed && m_pcEncCfg->getRGBFormatFlag()) ? (1 - iter) : iter;
+    cu.colorTransform = colorTransFlag;
+    cu.sbtInfo = orgSbtInfo;
+
+    m_CABACEstimator->resetBits();
+    m_CABACEstimator->getCtx() = ctxStart;
+    cs.clearTUs();
+    cs.fracBits = 0;
+    cs.dist = 0;
+    cs.cost = 0;
+
+  if (colorTransFlag)
+  {
+    cs.getOrgResiBuf().bufs[0].copyFrom(colorTransResidual.bufs[0]);
+    cs.getOrgResiBuf().bufs[1].copyFrom(colorTransResidual.bufs[1]);
+    cs.getOrgResiBuf().bufs[2].copyFrom(colorTransResidual.bufs[2]);
+
+    memset(m_pTempPel, 0, sizeof(Pel) * localUnitArea.blocks[0].area());
+    zeroDistortion = 0;
+    for (int compIdx = 0; compIdx < 3; compIdx++)
+    {
+      ComponentID componentID = (ComponentID)compIdx;
+      const CPelBuf zeroBuf(m_pTempPel, localUnitArea.blocks[compIdx]);
+      zeroDistortion += m_pcRdCost->getDistPart(zeroBuf, orgResidual.bufs[compIdx], sps.getBitDepth(toChannelType(componentID)), componentID, DF_SSE);
+    }
+    xEstimateInterResidualQT(cs, partitioner, NULL, luma, chroma, &orgResidual);
+  }
+  else
+  {
+    zeroDistortion = 0;
   if (luma)
   {
-    cs.getOrgResiBuf().bufs[0].copyFrom(cs.getResiBuf().bufs[0]);
+    cs.getOrgResiBuf().bufs[0].copyFrom(orgResidual.bufs[0]);
   }
   if (chroma)
   {
-    cs.getOrgResiBuf().bufs[1].copyFrom(cs.getResiBuf().bufs[1]);
-    cs.getOrgResiBuf().bufs[2].copyFrom(cs.getResiBuf().bufs[2]);
+    cs.getOrgResiBuf().bufs[1].copyFrom(orgResidual.bufs[1]);
+    cs.getOrgResiBuf().bufs[2].copyFrom(orgResidual.bufs[2]);
   }
   xEstimateInterResidualQT(cs, partitioner, &zeroDistortion, luma, chroma);
+  }
   TransformUnit &firstTU = *cs.getTU( partitioner.chType );
 
   cu.rootCbf = false;
@@ -6704,11 +7487,11 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 #if WCG_EXT
     if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() )
     {
-      zeroCost = cs.isLossless ? ( cs.cost + 1 ) : m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, false );
+      zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, false );
     }
     else
 #endif
-    zeroCost = cs.isLossless ? ( cs.cost + 1 ) : m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion );
+    zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion );
   }
 
   const int  numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
@@ -6723,6 +7506,8 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
   if (zeroCost < cs.cost || !cu.rootCbf)
   {
+    cs.cost = zeroCost;
+    cu.colorTransform = false;
     cu.sbtInfo = 0;
     cu.rootCbf = false;
 
@@ -6737,7 +7522,50 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
     }
     cu.firstTU = cu.lastTU = &tu;
   }
+  if (!iter)
+  {
+    rootCbfFirstColorSpace = cu.rootCbf;
+  }
+  if (cs.cost < bestCost)
+  {
+    bestIter = iter;
+    if (cu.rootCbf && cu.colorTransform)
+    {
+      cs.getResiBuf(curUnitArea).colorSpaceConvert(cs.getResiBuf(curUnitArea), false);
+    }
+
+    if (iter != (numAllowedColorSpace - 1))
+    {
+      bestCost = cs.cost;
+      bestColorTrans = cu.colorTransform;
+      bestRootCbf = cu.rootCbf;
+      bestsbtInfo = cu.sbtInfo;
 
+      saveCS.clearTUs();
+      for (const auto &ptu : cs.tus)
+      {
+        TransformUnit &tu = saveCS.addTU(*ptu, ptu->chType);
+        tu = *ptu;
+      }
+      saveCS.getResiBuf(curUnitArea).copyFrom(cs.getResiBuf(curUnitArea));
+    }
+  }
+  }
+
+  if (bestIter != (numAllowedColorSpace - 1))
+  {
+    cu.colorTransform = bestColorTrans;
+    cu.rootCbf = bestRootCbf;
+    cu.sbtInfo = bestsbtInfo;
+
+    cs.clearTUs();
+    for (const auto &ptu : saveCS.tus)
+    {
+      TransformUnit &tu = cs.addTU(*ptu, ptu->chType);
+      tu = *ptu;
+    }
+    cs.getResiBuf(curUnitArea).copyFrom(saveCS.getResiBuf(curUnitArea));
+  }
 
   // all decisions now made. Fully encode the CU, including the headers:
   m_CABACEstimator->getCtx() = ctxStart;
@@ -6759,14 +7587,14 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
   if (luma)
   {
-    if (cu.rootCbf && cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
+    if (cu.rootCbf && cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
     {
       const CompArea &areaY = cu.Y();
       CompArea      tmpArea(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
       PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
       tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y));
 
-      if (!cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
+      if (!cu.firstPU->ciipFlag && !CU::isIBC(cu))
         tmpPred.rspSignal(m_pcReshape->getFwdLUT());
 
       cs.getRecoBuf(COMPONENT_Y).reconstruct(tmpPred, cs.getResiBuf(COMPONENT_Y), cs.slice->clpRng(COMPONENT_Y));
@@ -6774,7 +7602,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
     else
     {
       cs.getRecoBuf().bufs[0].reconstruct(cs.getPredBuf().bufs[0], cs.getResiBuf().bufs[0], cs.slice->clpRngs().comp[0]);
-      if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && !cu.firstPU->mhIntraFlag && !CU::isIBC(cu))
+      if (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && !cu.firstPU->ciipFlag && !CU::isIBC(cu))
       {
         cs.getRecoBuf().bufs[0].rspSignal(m_pcReshape->getFwdLUT());
       }
@@ -6801,7 +7629,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
 #if WCG_EXT
     if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
-      m_pcEncCfg->getReshaper() && (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() ) ) )
+      m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
     {
       const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] );
       if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) )
@@ -6826,6 +7654,21 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   cs.dist     = finalDistortion;
   cs.fracBits = finalFracBits;
   cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
+  if (cs.slice->getSPS()->getUseColorTrans())
+  {
+    if (cs.cost < cs.tmpColorSpaceCost)
+    {
+      cs.tmpColorSpaceCost = cs.cost;
+      if (m_pcEncCfg->getRGBFormatFlag())
+      {
+        cs.firstColorSpaceSelected = cu.colorTransform || !cu.rootCbf;
+      }
+      else
+      {
+        cs.firstColorSpaceSelected = !cu.colorTransform || !cu.rootCbf;
+      }
+    }
+  }
 
   CHECK(cs.tus.size() == 0, "No TUs present");
 }
@@ -6840,31 +7683,22 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &
   if( cu.firstPU->mergeFlag && !cu.rootCbf )
   {
     cu.skip = true;
-
-    if( cs.pps->getTransquantBypassEnabledFlag() )
-    {
-      m_CABACEstimator->cu_transquant_bypass_flag( cu );
-    }
-
+    CHECK(cu.colorTransform, "ACT should not be enabled for skip mode");
     m_CABACEstimator->cu_skip_flag  ( cu );
-    m_CABACEstimator->subblock_merge_flag( cu );
-    m_CABACEstimator->triangle_mode ( cu );
-    if (cu.mmvdSkip)
+    if (cu.firstPU->ciipFlag)
     {
-      m_CABACEstimator->mmvd_merge_idx(*cu.firstPU);
+      // CIIP shouldn't be skip, the upper level function will deal with it, i.e. setting the overall cost to MAX_DOUBLE
     }
     else
-    m_CABACEstimator->merge_idx     ( *cu.firstPU );
+    {
+      m_CABACEstimator->merge_data(*cu.firstPU);
+    }
     fracBits   += m_CABACEstimator->getEstFracBits();
   }
   else
   {
     CHECK( cu.skip, "Skip flag has to be off at this point!" );
 
-    if( cs.pps->getTransquantBypassEnabledFlag() )
-    {
-      m_CABACEstimator->cu_transquant_bypass_flag( cu );
-    }
     if (cu.Y().valid())
     m_CABACEstimator->cu_skip_flag( cu );
     m_CABACEstimator->pred_mode   ( cu );
@@ -6879,11 +7713,11 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &
   return fracBits;
 }
 
-double InterSearch::xGetMEDistortionWeight(uint8_t gbiIdx, RefPicList eRefPicList)
+double InterSearch::xGetMEDistortionWeight(uint8_t bcwIdx, RefPicList eRefPicList)
 {
-  if( gbiIdx != GBI_DEFAULT )
+  if( bcwIdx != BCW_DEFAULT )
   {
-    return fabs((double)getGbiWeight(gbiIdx, eRefPicList) / (double)g_GbiWeightBase);
+    return fabs((double)getBcwWeight(bcwIdx, eRefPicList) / (double)g_BcwWeightBase);
   }
   else
   {
@@ -6896,13 +7730,16 @@ bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList,
   {
     m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
 
-    m_pcRdCost->setPredictor(pcMvPred);
+    Mv pred = pcMvPred;
+    pred.changeTransPrecInternal2Amvr(pu.cu->imv);
+    m_pcRdCost->setPredictor(pred);
     m_pcRdCost->setCostScale(0);
 
-    unsigned imvShift = pu.cu->imv << 1;
-    uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(rcMv.getHor(), rcMv.getVer(), imvShift);
+    Mv mv = rcMv;
+    mv.changeTransPrecInternal2Amvr(pu.cu->imv);
+    uint32_t mvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0);
 
-    ruiBits += uiMvBits;
+    ruiBits += mvBits;
     ruiCost += m_pcRdCost->getCost(ruiBits);
     return true;
   }
@@ -6915,28 +7752,23 @@ bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPi
 {
   if (m_uniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType))
   {
-    m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType
-                                , mvpIdx
-    );
+    m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType, mvpIdx);
     m_pcRdCost->setCostScale(0);
     acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
     acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
     acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
 
-    uint32_t uiMvBits = 0;
-    for (int iVerIdx = 0; iVerIdx<(pu.cu->affineType ? 3 : 2); iVerIdx++)
+    uint32_t mvBits = 0;
+    for (int verIdx = 0; verIdx<(pu.cu->affineType ? 3 : 2); verIdx++)
     {
-      if (iVerIdx)
-      {
-        m_pcRdCost->setPredictor(acMvPred[iVerIdx] + acMv[0] - acMvPred[0]);
-      }
-      else
-      {
-        m_pcRdCost->setPredictor(acMvPred[iVerIdx]);
-      }
-      uiMvBits += m_pcRdCost->getBitsOfVectorWithPredictor(acMv[iVerIdx].getHor(), acMv[iVerIdx].getVer(), 0);
+      Mv pred = verIdx ? acMvPred[verIdx] + acMv[0] - acMvPred[0] : acMvPred[verIdx];
+      pred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+      m_pcRdCost->setPredictor(pred);
+      Mv mv = acMv[verIdx];
+      mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
+      mvBits += m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0);
     }
-    ruiBits += uiMvBits;
+    ruiBits += mvBits;
     ruiCost += m_pcRdCost->getCost(ruiBits);
     return true;
   }
@@ -6944,25 +7776,25 @@ bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPi
 }
 void InterSearch::initWeightIdxBits()
 {
-  for (int n = 0; n < GBI_NUM; ++n)
+  for (int n = 0; n < BCW_NUM; ++n)
   {
     m_estWeightIdxBits[n] = deriveWeightIdxBits(n);
   }
 }
 
-void InterSearch::xClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps )
+void InterSearch::xClipMv( Mv& rcMv, const Position& pos, const struct Size& size, const SPS& sps, const PPS& pps )
 {
   int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
   int offset = 8;
-  int horMax = ( sps.getPicWidthInLumaSamples() + offset - ( int ) pos.x - 1 ) << mvShift;
+  int horMax = ( pps.getPicWidthInLumaSamples() + offset - (int)pos.x - 1 ) << mvShift;
   int horMin = ( -( int ) sps.getMaxCUWidth()   - offset - ( int ) pos.x + 1 ) << mvShift;
 
-  int verMax = ( sps.getPicHeightInLumaSamples() + offset - ( int ) pos.y - 1 ) << mvShift;
+  int verMax = ( pps.getPicHeightInLumaSamples() + offset - (int)pos.y - 1 ) << mvShift;
   int verMin = ( -( int ) sps.getMaxCUHeight()   - offset - ( int ) pos.y + 1 ) << mvShift;
 
   if( sps.getWrapAroundEnabledFlag() )
   {
-    int horMax = ( sps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + offset - ( int ) pos.x - 1 ) << mvShift;
+    int horMax = ( pps.getPicWidthInLumaSamples() + sps.getMaxCUWidth() - size.width + offset - (int)pos.x - 1 ) << mvShift;
     int horMin = ( -( int ) sps.getMaxCUWidth()                                      - offset - ( int ) pos.x + 1 ) << mvShift;
     rcMv.setHor( std::min( horMax, std::max( horMin, rcMv.getHor() ) ) );
     rcMv.setVer( std::min( verMax, std::max( verMin, rcMv.getVer() ) ) );
@@ -6981,7 +7813,7 @@ uint32_t InterSearch::xDetermineBestMvp( PredictionUnit& pu, Mv acMvTemp[3], int
   {
     Mv mvPred[3] = { aamvpi.mvCandLT[i], aamvpi.mvCandRT[i], aamvpi.mvCandLB[i] };
     uint32_t candBits = m_auiMVPIdxCost[i][aamvpi.numCand];
-    candBits += xCalcAffineMVBits( pu, acMvTemp, mvPred, pu.cu->imv != 1 );
+    candBits += xCalcAffineMVBits( pu, acMvTemp, mvPred );
 
     if ( candBits < minBits )
     {
@@ -7000,7 +7832,7 @@ void InterSearch::symmvdCheckBestMvp(
   Mv curMv,
   RefPicList curRefList,
   AMVPInfo amvpInfo[2][33],
-  int32_t gbiIdx,
+  int32_t bcwIdx,
   Mv cMvPredSym[2],
   int32_t mvpIdxSym[2],
   Distortion& bestCost,
@@ -7022,9 +7854,23 @@ void InterSearch::symmvdCheckBestMvp(
   PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getBuf(UnitAreaRelative(*pu.cu, pu));
   const Picture* picRefA = pu.cu->slice->getRefPic(curRefList, cCurMvField.refIdx);
   Mv mvA = cCurMvField.mv;
-  mvA.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-  clipMv(mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps);
-  xPredInterBlk(COMPONENT_Y, pu, picRefA, mvA, predBufA, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false);
+  clipMv( mvA, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+  if ( (mvA.hor & 15) == 0 && (mvA.ver & 15) == 0 )
+  {
+    Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvA.getHor() >> 4, mvA.getVer() >> 4 );
+    CPelBuf pelBufA = picRefA->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false );
+    predBufA.bufs[0].buf = const_cast<Pel *>(pelBufA.buf);
+    predBufA.bufs[0].stride = pelBufA.stride;
+  }
+  else
+  {
+    xPredInterBlk( COMPONENT_Y, pu, picRefA, mvA, predBufA, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
+  }
+  PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
+  bufTmp.copyFrom( origBuf );
+  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs(), getBcwWeight( pu.cu->BcwIdx, tarRefList ) );
+
+  double fWeight = xGetMEDistortionWeight( pu.cu->BcwIdx, tarRefList );
 
   int32_t skipMvpIdx[2];
   skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1;
@@ -7043,21 +7889,28 @@ void InterSearch::symmvdCheckBestMvp(
       PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getBuf(UnitAreaRelative(*pu.cu, pu));
       const Picture* picRefB = pu.cu->slice->getRefPic(tarRefList, cTarMvField.refIdx);
       Mv mvB = cTarMvField.mv;
-      mvB.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
-      clipMv(mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps);
-      xPredInterBlk(COMPONENT_Y, pu, picRefB, mvB, predBufB, true, pu.cu->slice->clpRng(COMPONENT_Y), false, false);
-
-      PelUnitBuf bufTmp = m_tmpStorageLCU.getBuf(UnitAreaRelative(*pu.cu, pu));
-      if (gbiIdx != GBI_DEFAULT)
-        bufTmp.Y().addWeightedAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y), gbiIdx);
+      clipMv( mvB, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+      if ( (mvB.hor & 15) == 0 && (mvB.ver & 15) == 0 )
+      {
+        Position offset = pu.blocks[COMPONENT_Y].pos().offset( mvB.getHor() >> 4, mvB.getVer() >> 4 );
+        CPelBuf pelBufB = picRefB->getRecoBuf( CompArea( COMPONENT_Y, pu.chromaFormat, offset, pu.blocks[COMPONENT_Y].size() ), false );
+        predBufB.bufs[0].buf = const_cast<Pel *>(pelBufB.buf);
+        predBufB.bufs[0].stride = pelBufB.stride;
+      }
       else
-        bufTmp.Y().addAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y));
-
+      {
+        xPredInterBlk( COMPONENT_Y, pu, picRefB, mvB, predBufB, false, pu.cu->slice->clpRng( COMPONENT_Y ), false, false );
+      }
       // calc distortion
-      Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
-
-      m_pcRdCost->setPredictor(amvpCur.mvCand[i]);
-      uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(curMv.hor, curMv.ver, (pu.cu->imv << 1));
+      DFunc distFunc = (!pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD;
+      Distortion cost = (Distortion)floor( fWeight * (double)m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, distFunc ) );
+
+      Mv pred = amvpCur.mvCand[i];
+      pred.changeTransPrecInternal2Amvr(pu.cu->imv);
+      m_pcRdCost->setPredictor(pred);
+      Mv mv = curMv;
+      mv.changeTransPrecInternal2Amvr(pu.cu->imv);
+      uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
       bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS];
       bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS];
       cost += m_pcRdCost->getCost(bits);
@@ -7072,3 +7925,116 @@ void InterSearch::symmvdCheckBestMvp(
     }
   }
 }
+
+uint64_t InterSearch::xCalcPuMeBits(PredictionUnit& pu)
+{
+  assert(pu.mergeFlag);
+  assert(!CU::isIBC(*pu.cu));
+  m_CABACEstimator->resetBits();
+  m_CABACEstimator->merge_flag(pu);
+  if (pu.mergeFlag)
+  {
+    m_CABACEstimator->merge_data(pu);
+  }
+  return m_CABACEstimator->getEstFracBits();
+}
+
+bool InterSearch::searchBv(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize)
+{
+  const int ctuSizeLog2 = floorLog2(ctuSize);
+
+  int refRightX = xPos + xBv + width - 1;
+  int refBottomY = yPos + yBv + height - 1;
+
+  int refLeftX = xPos + xBv;
+  int refTopY = yPos + yBv;
+
+  if ((xPos + xBv) < 0)
+  {
+    return false;
+  }
+  if (refRightX >= picWidth)
+  {
+    return false;
+  }
+
+  if ((yPos + yBv) < 0)
+  {
+    return false;
+  }
+  if (refBottomY >= picHeight)
+  {
+    return false;
+  }
+  if ((xBv + width) > 0 && (yBv + height) > 0)
+  {
+    return false;
+  }
+
+  // Don't search the above CTU row
+  if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2)
+    return false;
+
+  // Don't search the below CTU row
+  if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2)
+  {
+    return false;
+  }
+
+  unsigned curTileIdx = pu.cs->pps->getTileIdx(pu.lumaPos());
+  unsigned refTileIdx = pu.cs->pps->getTileIdx(Position(refLeftX, refTopY));
+  if (curTileIdx != refTileIdx)
+  {
+    return false;
+  }
+  refTileIdx = pu.cs->pps->getTileIdx(Position(refLeftX, refBottomY));
+  if (curTileIdx != refTileIdx)
+  {
+    return false;
+  }
+  refTileIdx = pu.cs->pps->getTileIdx(Position(refRightX, refTopY));
+  if (curTileIdx != refTileIdx)
+  {
+    return false;
+  }
+  refTileIdx = pu.cs->pps->getTileIdx(Position(refRightX, refBottomY));
+  if (curTileIdx != refTileIdx)
+  {
+    return false;
+  }
+
+  // in the same CTU line
+  int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
+  if ((refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2) && (refLeftX >> ctuSizeLog2 >= (xPos >> ctuSizeLog2) - numLeftCTUs))
+  {
+
+    // in the same CTU, or left CTU
+    // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer
+    if (((refLeftX >> ctuSizeLog2) == ((xPos >> ctuSizeLog2) - 1)) && (ctuSizeLog2 == 7))
+    {
+      // ref block's collocated block in current CTU
+      const Position refPosCol = pu.Y().topLeft().offset(xBv + ctuSize, yBv);
+      int offset64x = (refPosCol.x >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1);
+      int offset64y = (refPosCol.y >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1);
+      const Position refPosCol64x64 = {offset64x, offset64y};
+      if (pu.cs->isDecomp(refPosCol64x64, toChannelType(COMPONENT_Y)))
+        return false;
+      if (refPosCol64x64 == pu.Y().topLeft())
+        return false;
+    }
+  }
+  else
+    return false;
+
+  // in the same CTU, or valid area from left CTU. Check if the reference block is already coded
+  const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv);
+  const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv);
+  const ChannelType      chType = toChannelType(COMPONENT_Y);
+  if (!pu.cs->isDecomp(refPosBR, chType))
+    return false;
+  if (!pu.cs->isDecomp(refPosLT, chType))
+    return false;
+  return true;
+}
+
+//! \}
diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h
index d2a4bccb923c67ab42365efd82543acc3aa8f095..27430d67c7660e084b8eec83208886117cfc6a41 100644
--- a/source/Lib/EncoderLib/InterSearch.h
+++ b/source/Lib/EncoderLib/InterSearch.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -78,9 +78,15 @@ struct AffineMVInfo
   int x, y, w, h;
 };
 
+struct BlkUniMvInfo
+{
+  Mv uniMvs[2][33];
+  int x, y, w, h;
+};
+
 typedef struct
 {
-  Mv acMvAffine4Para[2][2];
+  Mv acMvAffine4Para[2][3];
   Mv acMvAffine6Para[2][3];
   int16_t affine4ParaRefIdx[2];
   int16_t affine6ParaRefIdx[2];
@@ -108,16 +114,21 @@ private:
   CodingStructure **m_pSaveCS;
 
   ClpRng          m_lumaClpRng;
-  uint32_t        m_estWeightIdxBits[GBI_NUM];
-  GBiMotionParam  m_uniMotions;
+  uint32_t        m_estWeightIdxBits[BCW_NUM];
+  BcwMotionParam  m_uniMotions;
   bool            m_affineModeSelected;
   std::unordered_map< Position, std::unordered_map< Size, BlkRecord> > m_ctuRecord;
   AffineMVInfo       *m_affMVList;
   int             m_affMVListIdx;
   int             m_affMVListSize;
   int             m_affMVListMaxSize;
+  BlkUniMvInfo*   m_uniMvList;
+  int             m_uniMvListIdx;
+  int             m_uniMvListSize;
+  int             m_uniMvListMaxSize;
   Distortion      m_hevcCost;
   EncAffineMotion m_affineMotion;
+  PatentBvCand    m_defaultCachedBvs;
 protected:
   // interface to option
   EncCfg*         m_pcEncCfg;
@@ -140,6 +151,8 @@ protected:
   RefPicList      m_currRefPicList;
   int             m_currRefPicIndex;
   bool            m_skipFracME;
+  int             m_numHashMVStoreds[NUM_REF_PIC_LIST_01][MAX_NUM_REF];
+  Mv              m_hashMVStoreds[NUM_REF_PIC_LIST_01][MAX_NUM_REF][5];
 
   // Misc.
   Pel            *m_pTempPel;
@@ -150,8 +163,9 @@ protected:
   Mv              m_integerMv2Nx2N              [NUM_REF_PIC_LIST_01][MAX_NUM_REF];
 
   bool            m_isInitialized;
-  unsigned int    m_numBVs, m_numBV16s;
-  Mv              m_acBVs[IBC_NUM_CANDIDATES];
+
+  Mv              m_acBVs[2 * IBC_NUM_CANDIDATES];
+  unsigned int    m_numBVs;
   bool            m_useCompositeRef;
   Distortion      m_estMinDistSbt[NUMBER_SBT_MODE + 1]; // estimated minimum SSE value of the PU if using a SBT mode
   uint8_t         m_sbtRdoOrder[NUMBER_SBT_MODE];       // order of SBT mode in RDO
@@ -228,8 +242,81 @@ public:
       m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize);
     }
   }
+  void resetUniMvList() { m_uniMvListIdx = 0; m_uniMvListSize = 0; }
+  void insertUniMvCands(CompArea blkArea, Mv cMvTemp[2][33])
+  {
+    BlkUniMvInfo* curMvInfo = m_uniMvList + m_uniMvListIdx;
+    int j = 0;
+    for (; j < m_uniMvListSize; j++)
+    {
+      BlkUniMvInfo* prevMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+      if ((blkArea.x == prevMvInfo->x) && (blkArea.y == prevMvInfo->y) && (blkArea.width == prevMvInfo->w) && (blkArea.height == prevMvInfo->h))
+      {
+        break;
+      }
+    }
+
+    if (j < m_uniMvListSize)
+    {
+      curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+    }
+
+    ::memcpy(curMvInfo->uniMvs, cMvTemp, 2 * 33 * sizeof(Mv));
+    if (j == m_uniMvListSize)  // new element
+    {
+      curMvInfo->x = blkArea.x;
+      curMvInfo->y = blkArea.y;
+      curMvInfo->w = blkArea.width;
+      curMvInfo->h = blkArea.height;
+      m_uniMvListSize = std::min(m_uniMvListSize + 1, m_uniMvListMaxSize);
+      m_uniMvListIdx = (m_uniMvListIdx + 1) % (m_uniMvListMaxSize);
+    }
+  }
+  void savePrevUniMvInfo(CompArea blkArea, BlkUniMvInfo &tmpUniMvInfo, bool& isUniMvInfoSaved)
+  {
+    int j = 0;
+    BlkUniMvInfo* curUniMvInfo = nullptr;
+    for (; j < m_uniMvListSize; j++)
+    {
+      curUniMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+      if ((blkArea.x == curUniMvInfo->x) && (blkArea.y == curUniMvInfo->y) && (blkArea.width == curUniMvInfo->w) && (blkArea.height == curUniMvInfo->h))
+      {
+        break;
+      }
+    }
+
+    if (j < m_uniMvListSize)
+    {
+      isUniMvInfoSaved = true;
+      tmpUniMvInfo = *curUniMvInfo;
+    }
+  }
+  void addUniMvInfo(BlkUniMvInfo &tmpUniMVInfo)
+  {
+    int j = 0;
+    BlkUniMvInfo* prevUniMvInfo = nullptr;
+    for (; j < m_uniMvListSize; j++)
+    {
+      prevUniMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - j + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
+      if ((tmpUniMVInfo.x == prevUniMvInfo->x) && (tmpUniMVInfo.y == prevUniMvInfo->y) && (tmpUniMVInfo.w == prevUniMvInfo->w) && (tmpUniMVInfo.h == prevUniMvInfo->h))
+      {
+        break;
+      }
+    }
+    if (j < m_uniMvListSize)
+    {
+      *prevUniMvInfo = tmpUniMVInfo;
+    }
+    else
+    {
+      m_uniMvList[m_uniMvListIdx] = tmpUniMVInfo;
+      m_uniMvListIdx = (m_uniMvListIdx + 1) % m_uniMvListMaxSize;
+      m_uniMvListSize = std::min(m_uniMvListSize + 1, m_uniMvListMaxSize);
+    }
+  }
   void resetSavedAffineMotion();
-  void storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int gbiIdx );
+  void storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx );
+  bool searchBv(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize);
 protected:
 
   /// sub-function for motion vector refinement used in fractional-pel accuracy
@@ -257,6 +344,7 @@ protected:
     uint8_t       ucPointNr;
     int         subShiftMode;
     unsigned    imvShift;
+    bool        useAltHpelIf;
     bool        inCtuSearch;
     bool        zeroMV;
   } IntTZSearchStruct;
@@ -281,14 +369,22 @@ public:
   bool  predIBCSearch           ( CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, IbcHashMap& ibcHashMap);
   void  xIntraPatternSearch         ( PredictionUnit& pu, IntTZSearchStruct&  cStruct, Mv& rcMv, Distortion&  ruiCost, Mv* cMvSrchRngLT, Mv* cMvSrchRngRB, Mv* pcMvPred);
   void  xSetIntraSearchRange        ( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, const int localSearchRangeX, const int localSearchRangeY, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB);
-  void  resetIbcSearch() { m_numBVs = m_numBV16s = 0; }
+  void  resetIbcSearch()
+  {
+    for (int i = 0; i < IBC_NUM_CANDIDATES; i++)
+    {
+      m_defaultCachedBvs.m_bvCands[i].setZero();
+    }
+    m_defaultCachedBvs.currCnt = 0;
+  }
   void  xIBCEstimation   ( PredictionUnit& pu, PelUnitBuf& origBuf, Mv     *pcMvPred, Mv     &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY);
   void  xIBCSearchMVCandUpdate  ( Distortion  uiSad, int x, int y, Distortion* uiSadBestCand, Mv* cMVCand);
   int   xIBCSearchMVChromaRefine( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, int cuPelX, int cuPelY, Distortion* uiSadBestCand, Mv*     cMVCand);
   void addToSortList(std::list<BlockHash>& listBlockHash, std::list<int>& listCost, int cost, const BlockHash& blockHash);
   bool predInterHashSearch(CodingUnit& cu, Partitioner& partitioner, bool& isPerfectMatch);
   bool xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch);
-  int  xHashInterPredME(const PredictionUnit& pu, RefPicList currRefPicList, int currRefPicIndex, Mv bestMv[5]);
+  bool xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPicList, int& bestRefIndex, Mv& bestMv, Mv& bestMvd, int& bestMVPIndex, bool& isPerfectMatch);
+  void selectRectangleMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& listBlockHash, const BlockHash& currBlockHash, int width, int height, int idxNonSimple, unsigned int* &hashValues, int baseNum, int picWidth, int picHeight, bool isHorizontal, uint16_t* curHashPic);
   void selectMatchesInter(const MapIterator& itBegin, int count, std::list<BlockHash>& vecBlockHash, const BlockHash& currBlockHash);
 protected:
 
@@ -326,7 +422,7 @@ protected:
                                     RefPicList            eRefPicList,
                                     int                   iRefIdx
                                   );
-  uint32_t xCalcAffineMVBits      ( PredictionUnit& pu, Mv mvCand[3], Mv mvPred[3], bool mvHighPrec = false );
+  uint32_t xCalcAffineMVBits      ( PredictionUnit& pu, Mv mvCand[3], Mv mvPred[3] );
 
   void xCopyAMVPInfo              ( AMVPInfo*   pSrc, AMVPInfo* pDst );
   uint32_t xGetMvpIdxBits             ( int iIdx, int iNum );
@@ -352,6 +448,8 @@ protected:
                                   );
 
   void xTZSearch                  ( const PredictionUnit& pu,
+                                    RefPicList            eRefPicList,
+                                    int                   iRefIdxPred,
                                     IntTZSearchStruct&    cStruct,
                                     Mv&                   rcMv,
                                     Distortion&           ruiSAD,
@@ -361,6 +459,8 @@ protected:
                                   );
 
   void xTZSearchSelective         ( const PredictionUnit& pu,
+                                    RefPicList            eRefPicList,
+                                    int                   iRefIdxPred,
                                     IntTZSearchStruct&    cStruct,
                                     Mv&                   rcMv,
                                     Distortion&           ruiSAD,
@@ -375,6 +475,8 @@ protected:
                                   );
 
   void xPatternSearchFast         ( const PredictionUnit& pu,
+                                    RefPicList            eRefPicList,
+                                    int                   iRefIdxPred,
                                     IntTZSearchStruct&    cStruct,
                                     Mv&                   rcMv,
                                     Distortion&           ruiSAD,
@@ -415,9 +517,9 @@ protected:
                                     Mv                    hevcMv[2][33]
                                   , Mv                    mvAffine4Para[2][33][3]
                                   , int                   refIdx4Para[2]
-                                  , uint8_t               gbiIdx = GBI_DEFAULT
-                                  , bool                  enforceGBiPred = false
-                                  , uint32_t              gbiIdxBits = 0
+                                  , uint8_t               bcwIdx = BCW_DEFAULT
+                                  , bool                  enforceBcwPred = false
+                                  , uint32_t              bcwIdxBits = 0
                                   );
 
   void xAffineMotionEstimation    ( PredictionUnit& pu,
@@ -447,24 +549,24 @@ protected:
   void xCopyAffineAMVPInfo        ( AffineAMVPInfo& src, AffineAMVPInfo& dst );
   void xCheckBestAffineMVP        ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost );
 
-  Distortion xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField , int gbiIdx );
+  Distortion xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField , int bcwIdx );
 
   Distortion xSymmeticRefineMvSearch( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred
-    , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int gbiIdx );
+    , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int bcwIdx );
 
-  void xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int gbiIdx );
+  void xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx );
 
   bool xReadBufferedAffineUniMv   ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost
                                     , int& mvpIdx, const AffineAMVPInfo& aamvpi
   );
-  double xGetMEDistortionWeight   ( uint8_t gbiIdx, RefPicList eRefPicList);
+  double xGetMEDistortionWeight   ( uint8_t bcwIdx, RefPicList eRefPicList);
   bool xReadBufferedUniMv         ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost);
 
-  void xClipMv                    ( Mv& rcMv, const struct Position& pos, const struct Size& size, const class SPS& sps );
+  void xClipMv                    ( Mv& rcMv, const struct Position& pos, const struct Size& size, const class SPS& sps, const class PPS& pps );
 
 public:
   void resetBufferedUniMotions    () { m_uniMotions.reset(); }
-  uint32_t getWeightIdxBits       ( uint8_t gbiIdx ) { return m_estWeightIdxBits[gbiIdx]; }
+  uint32_t getWeightIdxBits       ( uint8_t bcwIdx ) { return m_estWeightIdxBits[bcwIdx]; }
   void initWeightIdxBits          ();
   void symmvdCheckBestMvp(
     PredictionUnit& pu,
@@ -472,7 +574,7 @@ public:
     Mv curMv,
     RefPicList curRefList,
     AMVPInfo amvpInfo[2][33],
-    int32_t gbiIdx,
+    int32_t bcwIdx,
     Mv cMvPredSym[2],
     int32_t mvpIdxSym[2],
     Distortion& bestCost,
@@ -480,7 +582,7 @@ public:
     );
 protected:
 
-  void xExtDIFUpSamplingH         ( CPelBuf* pcPattern );
+  void xExtDIFUpSamplingH(CPelBuf* pcPattern, bool useAltHpelIf);
   void xExtDIFUpSamplingQ         ( CPelBuf* pcPatternKey, Mv halfPelRef );
   uint32_t xDetermineBestMvp      ( PredictionUnit& pu, Mv acMvTemp[3], int& mvpIdx, const AffineAMVPInfo& aamvpi );
   // -------------------------------------------------------------------------------------------------------------------
@@ -498,8 +600,10 @@ public:
   void xEncodeInterResidualQT     (CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID);
   void xEstimateInterResidualQT   (CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist = NULL
     , const bool luma = true, const bool chroma = true
+    , PelUnitBuf* orgResi = NULL
   );
   uint64_t xGetSymbolFracBitsInter  (CodingStructure &cs, Partitioner &partitioner);
+  uint64_t xCalcPuMeBits            (PredictionUnit& pu);
 
 };// END CLASS DEFINITION EncSearch
 
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index 4e2852e5fdb03188ddb80fea56aa73d2a2da0a73..ebc5ab44132e81daeafa53e611292b0ec49b2815 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,10 +49,9 @@
 
 #include <math.h>
 #include <limits>
-
  //! \ingroup EncoderLib
  //! \{
-
+#define PLTCtx(c) SubCtx( Ctx::Palette, c )
 IntraSearch::IntraSearch()
   : m_pSplitCS      (nullptr)
   , m_pFullCS       (nullptr)
@@ -69,6 +68,17 @@ IntraSearch::IntraSearch()
   {
     m_pSharedPredTransformSkip[ch] = nullptr;
   }
+  m_truncBinBits = nullptr;
+  m_escapeNumBins = nullptr;
+  m_minErrorIndexMap = nullptr;
+  for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
+  {
+    m_indexError[i] = nullptr;
+  }
+  for (unsigned i = 0; i < NUM_TRELLIS_STATE; i++)
+  {
+    m_statePtRDOQ[i] = nullptr;
+  }
 }
 
 
@@ -151,7 +161,44 @@ void IntraSearch::destroy()
   }
 
   m_tmpStorageLCU.destroy();
+  m_colorTransResiBuf.destroy();
   m_isInitialized = false;
+  if (m_truncBinBits != nullptr)
+  {
+    for (unsigned i = 0; i < m_symbolSize; i++)
+    {
+      delete[] m_truncBinBits[i];
+      m_truncBinBits[i] = nullptr;
+    }
+    delete[] m_truncBinBits;
+    m_truncBinBits = nullptr;
+  }
+  if (m_escapeNumBins != nullptr)
+  {
+    delete[] m_escapeNumBins;
+    m_escapeNumBins = nullptr;
+  }
+  if (m_indexError[0] != nullptr)
+  {
+    for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
+    {
+      delete[] m_indexError[i];
+      m_indexError[i] = nullptr;
+    }
+  }
+  if (m_minErrorIndexMap != nullptr)
+  {
+    delete[] m_minErrorIndexMap;
+    m_minErrorIndexMap = nullptr;
+  }
+  if (m_statePtRDOQ[0] != nullptr)
+  {
+    for (unsigned i = 0; i < NUM_TRELLIS_STATE; i++)
+    {
+      delete[] m_statePtRDOQ[i];
+      m_statePtRDOQ[i] = nullptr;
+    }
+  }
 }
 
 IntraSearch::~IntraSearch()
@@ -171,6 +218,7 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
                         const uint32_t     maxCUHeight,
                         const uint32_t     maxTotalCUDepth
                        , EncReshape*   pcReshape
+                       , const unsigned bitDepthY
 )
 {
   CHECK(m_isInitialized, "Already initialized");
@@ -185,6 +233,7 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
 
   IntraPrediction::init( cform, pcEncCfg->getBitDepth( CHANNEL_TYPE_LUMA ) );
   m_tmpStorageLCU.create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
+  m_colorTransResiBuf.create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
 
   for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
   {
@@ -218,8 +267,9 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
         m_pBestCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
         m_pTempCS[width][height] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
 
-        m_pBestCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
-        m_pTempCS[width][height]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
+        m_pBestCS[width][height]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode());
+        m_pTempCS[width][height]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode());
+
         m_pFullCS [width][height] = new CodingStructure*[uiNumLayersToAllocateFull];
         m_pSplitCS[width][height] = new CodingStructure*[uiNumLayersToAllocateSplit];
 
@@ -227,14 +277,13 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
         {
           m_pFullCS [width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
 
-          m_pFullCS [width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
+          m_pFullCS[width][height][layer]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode());
         }
 
         for( uint32_t layer = 0; layer < uiNumLayersToAllocateSplit; layer++ )
         {
           m_pSplitCS[width][height][layer] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
-
-          m_pSplitCS[width][height][layer]->create( m_pcEncCfg->getChromaFormatIdc(), Area( 0, 0, gp_sizeIdxInfo->sizeFrom( width ), gp_sizeIdxInfo->sizeFrom( height ) ), false );
+          m_pSplitCS[width][height][layer]->create(m_pcEncCfg->getChromaFormatIdc(), Area(0, 0, gp_sizeIdxInfo->sizeFrom(width), gp_sizeIdxInfo->sizeFrom(height)), false, (bool)pcEncCfg->getPLTMode());
         }
       }
       else
@@ -255,116 +304,172 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
   for( uint32_t depth = 0; depth < uiNumSaveLayersToAllocate; depth++ )
   {
     m_pSaveCS[depth] = new CodingStructure( m_unitCache.cuCache, m_unitCache.puCache, m_unitCache.tuCache );
-    m_pSaveCS[depth]->create( UnitArea( cform, Area( 0, 0, maxCUWidth, maxCUHeight ) ), false );
+    m_pSaveCS[depth]->create(UnitArea(cform, Area(0, 0, maxCUWidth, maxCUHeight)), false, (bool)pcEncCfg->getPLTMode());
   }
 
   m_isInitialized = true;
+  if (pcEncCfg->getPLTMode())
+  {
+    m_symbolSize = (1 << bitDepthY); // pixel values are within [0, SymbolSize-1] with size SymbolSize
+    if (m_truncBinBits == nullptr)
+    {
+      m_truncBinBits = new uint16_t*[m_symbolSize];
+      for (unsigned i = 0; i < m_symbolSize; i++)
+      {
+        m_truncBinBits[i] = new uint16_t[m_symbolSize + 1];
+      }
+    }
+    if (m_escapeNumBins == nullptr)
+    {
+      m_escapeNumBins = new uint16_t[m_symbolSize];
+    }
+    initTBCTable(bitDepthY);
+    if (m_indexError[0] == nullptr)
+    {
+      for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
+      {
+        m_indexError[i] = new double[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
+      }
+    }
+    if (m_minErrorIndexMap == nullptr)
+    {
+      m_minErrorIndexMap = new uint8_t[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
+    }
+    if (m_statePtRDOQ[0] == nullptr)
+    {
+      for (unsigned i = 0; i < NUM_TRELLIS_STATE; i++)
+      {
+        m_statePtRDOQ[i] = new uint8_t[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
+      }
+    }
+  }
 }
 
 
 //////////////////////////////////////////////////////////////////////////
 // INTRA PREDICTION
 //////////////////////////////////////////////////////////////////////////
+static constexpr double COST_UNKNOWN = -65536.0;
+
+double IntraSearch::findInterCUCost( CodingUnit &cu )
+{
+  if( cu.isConsIntra() && !cu.slice->isIntra() )
+  {
+    //search corresponding inter CU cost
+    for( int i = 0; i < m_numCuInSCIPU; i++ )
+    {
+      if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() )
+      {
+        return m_cuCostInSCIPU[i];
+      }
+    }
+  }
+  return COST_UNKNOWN;
+}
 
-void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar )
+bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst, CodingStructure* bestCS)
 {
   CodingStructure       &cs            = *cu.cs;
   const SPS             &sps           = *cs.sps;
-  const uint32_t             uiWidthBit    = g_aucLog2[partitioner.currArea().lwidth() ];
-  const uint32_t             uiHeightBit   =                   g_aucLog2[partitioner.currArea().lheight()];
+  const uint32_t             uiWidthBit    = floorLog2(partitioner.currArea().lwidth() );
+  const uint32_t             uiHeightBit   =                   floorLog2(partitioner.currArea().lheight());
 
   // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantization divisor is 1.
-  const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(cu.transQuantBypass) / double(1 << SCALE_BITS);
-
+  const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE;
 
   //===== loop over partitions =====
 
   const TempCtx ctxStart          ( m_CtxCache, m_CABACEstimator->getCtx() );
+  const TempCtx ctxStartMipFlag    ( m_CtxCache, SubCtx( Ctx::MipFlag,          m_CABACEstimator->getCtx() ) );
+  const TempCtx ctxStartIspMode    ( m_CtxCache, SubCtx( Ctx::ISPMode,          m_CABACEstimator->getCtx() ) );
+  const TempCtx ctxStartPlanarFlag ( m_CtxCache, SubCtx( Ctx::IntraLumaPlanarFlag, m_CABACEstimator->getCtx() ) );
   const TempCtx ctxStartIntraMode(m_CtxCache, SubCtx(Ctx::IntraLumaMpmFlag, m_CABACEstimator->getCtx()));
-  const TempCtx ctxStartMHIntraMode ( m_CtxCache, SubCtx( Ctx::MHIntraPredMode,        m_CABACEstimator->getCtx() ) );
   const TempCtx ctxStartMrlIdx      ( m_CtxCache, SubCtx( Ctx::MultiRefLineIdx,        m_CABACEstimator->getCtx() ) );
 
   CHECK( !cu.firstPU, "CU has no PUs" );
   const bool keepResi   = cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
 
+  // variables for saving fast intra modes scan results across multiple LFNST passes
+  bool LFNSTLoadFlag = sps.getUseLFNST() && cu.lfnstIdx != 0;
+  bool LFNSTSaveFlag = sps.getUseLFNST() && cu.lfnstIdx == 0;
+
+  LFNSTSaveFlag &= sps.getUseIntraMTS() ? cu.mtsFlag == 0 : true;
+
+  const uint32_t lfnstIdx = cu.lfnstIdx;
+  double costInterCU = findInterCUCost( cu );
+
+  const int width  = partitioner.currArea().lwidth();
+  const int height = partitioner.currArea().lheight();
+
+  // Marking MTS usage for faster MTS
+  // 0: MTS is either not applicable for current CU (cuWidth > MTS_INTRA_MAX_CU_SIZE or cuHeight > MTS_INTRA_MAX_CU_SIZE), not active in the config file or the fast decision algorithm is not used in this case
+  // 1: MTS fast algorithm can be applied for the current CU, and the DCT2 is being checked
+  // 2: MTS is being checked for current CU. Stored results of DCT2 can be utilized for speedup
+  uint8_t mtsUsageFlag = 0;
+  const int maxSizeEMT = MTS_INTRA_MAX_CU_SIZE;
+  if( width <= maxSizeEMT && height <= maxSizeEMT && sps.getUseIntraMTS() )
+  {
+    mtsUsageFlag = ( sps.getUseLFNST() && cu.mtsFlag == 1 ) ? 2 : 1;
+  }
+
+  if( width * height < 64 && !m_pcEncCfg->getUseFastLFNST() )
+  {
+    mtsUsageFlag = 0;
+  }
 
-  uint32_t extraModes = 0; // add two extra modes, which would be used after uiMode <= DC_IDX is removed for cu.nsstIdx == 3
+  const bool colorTransformIsEnabled = sps.getUseColorTrans() && !CS::isDualITree(cs);
+  const bool isFirstColorSpace       = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform));
+  const bool isSecondColorSpace      = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform));
 
-  const int width   = partitioner.currArea().lwidth();
-  const int height  = partitioner.currArea().lheight();
-  int nOptionsForISP = NUM_INTRA_SUBPARTITIONS_MODES;
   double bestCurrentCost = bestCostSoFar;
+  bool ispCanBeUsed   = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
+  bool saveDataForISP = ispCanBeUsed && (!colorTransformIsEnabled || isFirstColorSpace);
+  bool testISP        = ispCanBeUsed && (!colorTransformIsEnabled || !cu.colorTransform);
 
-  int ispOptions[NUM_INTRA_SUBPARTITIONS_MODES] = { 0 };
-  if( nOptionsForISP > 1 )
+  if ( saveDataForISP )
   {
-#if MAX_TB_SIZE_SIGNALLING
-    auto splitsThatCanBeUsedForISP = CU::canUseISPSplit( width, height, cu.cs->sps->getMaxTbSize() );
-#else
-    auto splitsThatCanBeUsedForISP = CU::canUseISPSplit( width, height, MAX_TB_SIZEY );
-#endif
-    if( splitsThatCanBeUsedForISP == CAN_USE_VER_AND_HORL_SPLITS )
-    {
-      const CodingUnit* cuLeft  = cu.ispMode != NOT_INTRA_SUBPARTITIONS ? cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( -1, 0 ), partitioner.chType ) : nullptr;
-      const CodingUnit* cuAbove = cu.ispMode != NOT_INTRA_SUBPARTITIONS ? cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( 0, -1 ), partitioner.chType ) : nullptr;
-      bool ispHorIsFirstTest = CU::firstTestISPHorSplit( width, height, COMPONENT_Y, cuLeft, cuAbove );
-      if( ispHorIsFirstTest )
-      {
-        ispOptions[1] = HOR_INTRA_SUBPARTITIONS;
-        ispOptions[2] = VER_INTRA_SUBPARTITIONS;
-      }
-      else
-      {
-        ispOptions[1] = VER_INTRA_SUBPARTITIONS;
-        ispOptions[2] = HOR_INTRA_SUBPARTITIONS;
-      }
-    }
-    else if( splitsThatCanBeUsedForISP == HOR_INTRA_SUBPARTITIONS )
-    {
-      nOptionsForISP = 2;
-      ispOptions[1] = HOR_INTRA_SUBPARTITIONS;
-    }
-    else if( splitsThatCanBeUsedForISP == VER_INTRA_SUBPARTITIONS )
-    {
-      nOptionsForISP = 2;
-      ispOptions[1] = VER_INTRA_SUBPARTITIONS;
-    }
-    else
-    {
-      nOptionsForISP = 1;
-    }
+    //reset the intra modes lists variables
+    m_ispCandListHor.clear();
+    m_ispCandListVer.clear();
   }
-  if( nOptionsForISP > 1 )
+  if( testISP )
   {
-    //variables for the full RD list without MRL modes
-    m_rdModeListWithoutMrl      .clear();
-    m_rdModeListWithoutMrlHor   .clear();
-    m_rdModeListWithoutMrlVer   .clear();
-    //variables with data from regular intra used to skip ISP splits
-    m_intraModeDiagRatio        .clear();
-    m_intraModeHorVerRatio      .clear();
-    m_intraModeTestedNormalIntra.clear();
+    //reset the variables used for the tests
+    m_regIntraRDListWithCosts.clear();
+    int numTotalPartsHor = (int)width  >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
+    int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
+    m_ispTestedModes[0].init( numTotalPartsHor, numTotalPartsVer );
+    //the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with ISP due to size restrictions
+    numTotalPartsHor = sps.getUseLFNST() && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
+    numTotalPartsVer = sps.getUseLFNST() && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0;
+    for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++)
+    {
+      m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer);
+    }
   }
 
-  static_vector<uint32_t,   FAST_UDI_MAX_RDMODE_NUM> uiHadModeList;
+  const bool testBDPCM = (sps.getBDPCMEnabled()!=0) && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType)) && cu.mtsFlag == 0 && cu.lfnstIdx == 0;
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList;
   static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
   static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
 
-  static_vector<int, FAST_UDI_MAX_RDMODE_NUM> extendRefList;
-  static_vector<int, FAST_UDI_MAX_RDMODE_NUM>* nullList = NULL;
-
   auto &pu = *cu.firstPU;
+  bool validReturn = false;
   {
     CandHadList.clear();
     CandCostList.clear();
     uiHadModeList.clear();
-    extendRefList.clear();
 
     CHECK(pu.cu != &cu, "PU is not contained in the CU");
 
     //===== determine set of modes to be tested (using prediction signal only) =====
     int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
-    static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeList;
+    const bool fastMip    = sps.getUseMIP() && m_pcEncCfg->getUseFastMIP();
+    const bool mipAllowed = sps.getUseMIP() && isLuma(partitioner.chType) && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.firstPU->lumaSize()));
+    const bool testMip = mipAllowed && !(cu.lwidth() > (8 * cu.lheight()) || cu.lheight() > (8 * cu.lwidth()));
+    const bool supportedMipBlkSize = pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT;
+
+    static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList;
 
     int numModesForFullRD = 3;
     numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
@@ -373,383 +478,507 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner,
     numModesForFullRD = numModesAvailable;
 #endif
 
+    if (isSecondColorSpace)
     {
-      // this should always be true
-      CHECK( !pu.Y().valid(), "PU is not valid" );
-#if ENABLE_JVET_L0283_MRL
-      bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y)&((pu.cs->sps)->getMaxCUWidth() - 1)) == 0);
-      int numOfPassesExtendRef = (isFirstLineOfCtu ? 1 : MRL_NUM_REF_LINES);
-#endif
-      pu.multiRefIdx = 0;
-
-      //===== init pattern for luma prediction =====
-      initIntraPatternChType( cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, false, pu ) );
-      if( numModesForFullRD != numModesAvailable )
+      uiRdModeList.clear();
+      if (m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx] > 0)
+      {
+        for (int i = 0; i < m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]; i++)
+        {
+          uiRdModeList.push_back(m_savedRdModeFirstColorSpace[m_savedRdModeIdx][i]);
+        }
+      }
+      else
       {
-        CHECK( numModesForFullRD >= numModesAvailable, "Too many modes for full RD search" );
+        return false;
+      }
+    }
+    else
+    {
+      if (mtsUsageFlag != 2)
+      {
+        // this should always be true
+        CHECK(!pu.Y().valid(), "PU is not valid");
+        bool isFirstLineOfCtu     = (((pu.block(COMPONENT_Y).y) & ((pu.cs->sps)->getMaxCUWidth() - 1)) == 0);
+        int  numOfPassesExtendRef = ((!sps.getUseMRL() || isFirstLineOfCtu) ? 1 : MRL_NUM_REF_LINES);
+        pu.multiRefIdx            = 0;
+
+        if (numModesForFullRD != numModesAvailable)
+        {
+          CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search");
 
-        const CompArea &area = pu.Y();
+          const CompArea &area = pu.Y();
 
-        PelBuf piOrg         = cs.getOrgBuf(area);
-        PelBuf piPred        = cs.getPredBuf(area);
+          PelBuf piOrg  = cs.getOrgBuf(area);
+          PelBuf piPred = cs.getPredBuf(area);
 
-        DistParam distParam;
+          DistParam distParamSad;
+          DistParam distParamHad;
+          if (cu.slice->getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
+          {
+            CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+            PelBuf   tmpOrg = m_tmpStorageLCU.getBuf(tmpArea);
+            tmpOrg.copyFrom(piOrg);
+            tmpOrg.rspSignal(m_pcReshape->getFwdLUT());
+            m_pcRdCost->setDistParam(distParamSad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y,
+                                     false);   // Use SAD cost
+            m_pcRdCost->setDistParam(distParamHad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y,
+                                     true);   // Use HAD (SATD) cost
+          }
+          else
+          {
+            m_pcRdCost->setDistParam(distParamSad, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y,
+                                     false);   // Use SAD cost
+            m_pcRdCost->setDistParam(distParamHad, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y,
+                                     true);   // Use HAD (SATD) cost
+          }
 
-        const bool bUseHadamard = cu.transQuantBypass == 0;
+          distParamSad.applyWeight = false;
+          distParamHad.applyWeight = false;
 
-        if (cu.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag())
-        {
-          CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-          PelBuf tmpOrg = m_tmpStorageLCU.getBuf(tmpArea);
-          tmpOrg.copyFrom(piOrg);
-          tmpOrg.rspSignal(m_pcReshape->getFwdLUT());
-          m_pcRdCost->setDistParam(distParam, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
-        }
-        else
-        m_pcRdCost->setDistParam(distParam, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
+          if (testMip && supportedMipBlkSize)
+          {
+            numModesForFullRD += fastMip
+                                   ? std::max(numModesForFullRD, floorLog2(std::min(pu.lwidth(), pu.lheight())) - 1)
+                                   : numModesForFullRD;
+          }
+          const int numHadCand = (testMip ? 2 : 1) * 3;
 
-        distParam.applyWeight = false;
+          //*** Derive (regular) candidates using Hadamard
+          cu.mipFlag = false;
 
-        bool bSatdChecked[NUM_INTRA_MODE];
-        memset( bSatdChecked, 0, sizeof( bSatdChecked ) );
+          //===== init pattern for luma prediction =====
+          initIntraPatternChType(cu, pu.Y(), true);
+          bool bSatdChecked[NUM_INTRA_MODE];
+          memset(bSatdChecked, 0, sizeof(bSatdChecked));
 
-        {
-          for( int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
+          if (!LFNSTLoadFlag)
           {
-            uint32_t       uiMode = modeIdx;
-            Distortion uiSad  = 0;
-
-            // Skip checking extended Angular modes in the first round of SATD
-            if( uiMode > DC_IDX && ( uiMode & 1 ) )
+            for (int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++)
             {
-              continue;
-            }
-
-            bSatdChecked[uiMode] = true;
+              uint32_t   uiMode    = modeIdx;
+              Distortion minSadHad = 0;
 
-            pu.intraDir[0] = modeIdx;
+              // Skip checking extended Angular modes in the first round of SATD
+              if (uiMode > DC_IDX && (uiMode & 1))
+              {
+                continue;
+              }
 
-            if( useDPCMForFirstPassIntraEstimation( pu, uiMode ) )
-            {
-              encPredIntraDPCM( COMPONENT_Y, piOrg, piPred, uiMode );
-            }
-            else
-            {
-              predIntraAng( COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, true, pu ) );
-            }
-            // use Hadamard transform here
-            uiSad += distParam.distFunc(distParam);
+              bSatdChecked[uiMode] = true;
 
-            // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
-            m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
-            m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode );
-            m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
+              pu.intraDir[0] = modeIdx;
 
-            uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
+              initPredIntraParams(pu, pu.Y(), sps);
+              predIntraAng(COMPONENT_Y, piPred, pu);
+              // Use the min between SAD and HAD as the cost criterion
+              // SAD is scaled by 2 to align with the scaling of HAD
+              minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
 
-            double cost = ( double ) uiSad + ( double ) fracModeBits * sqrtLambdaForFirstPass;
+              // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
+              m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
+              m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
+              m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
 
-            DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", uiSad, fracModeBits, cost, uiMode );
+              uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
 
-            updateCandList( uiMode, cost,  uiRdModeList, CandCostList
-              , extendRefList, 0
-              , numModesForFullRD + extraModes );
-            updateCandList(uiMode, (double) uiSad, uiHadModeList, CandHadList
-              , *nullList, -1
-              , 3 + extraModes);
-          }
-        } // NSSTFlag
+              double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
 
-        // forget the extra modes
-        uiRdModeList.resize( numModesForFullRD );
-        CandCostList.resize(numModesForFullRD);
-        extendRefList.resize(numModesForFullRD);
-        static_vector<unsigned, FAST_UDI_MAX_RDMODE_NUM> parentCandList(FAST_UDI_MAX_RDMODE_NUM);
-        std::copy_n(uiRdModeList.begin(), numModesForFullRD, parentCandList.begin());
+              DTRACE(g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiMode);
 
-        // Second round of SATD for extended Angular modes
-        for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++)
-        {
-          unsigned parentMode = parentCandList[modeIdx];
-          if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1))
+              updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList,
+                             CandCostList, numModesForFullRD);
+              updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), double(minSadHad),
+                             uiHadModeList, CandHadList, numHadCand);
+            }
+            if (!sps.getUseMIP() && LFNSTSaveFlag)
+            {
+              // save found best modes
+              m_uiSavedNumRdModesLFNST = numModesForFullRD;
+              m_uiSavedRdModeListLFNST = uiRdModeList;
+              m_dSavedModeCostLFNST    = CandCostList;
+              // PBINTRA fast
+              m_uiSavedHadModeListLFNST = uiHadModeList;
+              m_dSavedHadListLFNST      = CandHadList;
+              LFNSTSaveFlag             = false;
+            }
+          }   // NSSTFlag
+          if (!sps.getUseMIP() && LFNSTLoadFlag)
+          {
+            // restore saved modes
+            numModesForFullRD = m_uiSavedNumRdModesLFNST;
+            uiRdModeList      = m_uiSavedRdModeListLFNST;
+            CandCostList      = m_dSavedModeCostLFNST;
+            // PBINTRA fast
+            uiHadModeList = m_uiSavedHadModeListLFNST;
+            CandHadList   = m_dSavedHadListLFNST;
+          }   // !LFNSTFlag
+
+          if (!(sps.getUseMIP() && LFNSTLoadFlag))
           {
-            for (int subModeIdx = -1; subModeIdx <= 1; subModeIdx += 2)
+            static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> parentCandList = uiRdModeList;
+
+            // Second round of SATD for extended Angular modes
+            for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++)
             {
-              unsigned mode = parentMode + subModeIdx;
+              unsigned parentMode = parentCandList[modeIdx].modeId;
+              if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1))
+              {
+                for (int subModeIdx = -1; subModeIdx <= 1; subModeIdx += 2)
+                {
+                  unsigned mode = parentMode + subModeIdx;
 
+                  if (!bSatdChecked[mode])
+                  {
+                    pu.intraDir[0] = mode;
 
-              if (!bSatdChecked[mode])
-              {
-                pu.intraDir[0] = mode;
+                    initPredIntraParams(pu, pu.Y(), sps);
+                    predIntraAng(COMPONENT_Y, piPred, pu);
 
-                if (useDPCMForFirstPassIntraEstimation(pu, mode))
-                {
-                  encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode);
-                }
-                else
-                {
-                  predIntraAng(COMPONENT_Y, piPred, pu,
-                               IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu));
-                }
-                // use Hadamard transform here
-                Distortion sad = distParam.distFunc(distParam);
+                    // Use the min between SAD and SATD as the cost criterion
+                    // SAD is scaled by 2 to align with the scaling of HAD
+                    Distortion minSadHad =
+                      std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
 
-                // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
-                m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
-                m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode );
-                m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
+                    // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been
+                    // pre-estimated.
+                    m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+                    m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
+                    m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
+                    m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
+                    m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx);
 
-                uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
+                    uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
 
-                double cost = (double) sad + (double) fracModeBits * sqrtLambdaForFirstPass;
+                    double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
 
-                updateCandList(mode, cost, uiRdModeList, CandCostList
-                  , extendRefList, 0
-                  , numModesForFullRD);
-                updateCandList(mode, (double)sad, uiHadModeList, CandHadList
-                  , *nullList, -1
-                  , 3);
+                    updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, uiRdModeList,
+                                   CandCostList, numModesForFullRD);
+                    updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
+                                   uiHadModeList, CandHadList, numHadCand);
 
-                bSatdChecked[mode] = true;
+                    bSatdChecked[mode] = true;
+                  }
+                }
               }
             }
-          }
-        }
-        if( nOptionsForISP > 1 )
-        {
-          //we save the list with no mrl modes to keep only the Hadamard selected modes (no mpms)
-          m_rdModeListWithoutMrl.resize( numModesForFullRD );
-          std::copy_n( uiRdModeList.begin(), numModesForFullRD, m_rdModeListWithoutMrl.begin() );
-        }
-#if ENABLE_JVET_L0283_MRL
-        pu.multiRefIdx = 1;
-        const int  numMPMs = NUM_MOST_PROBABLE_MODES;
-        unsigned  multiRefMPM [numMPMs];
-        PU::getIntraMPMs(pu, multiRefMPM);
-        for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++)
-        {
-          int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
-
-          pu.multiRefIdx = multiRefIdx;
-          {
-            initIntraPatternChType(cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, false, pu));
-          }
-          for (int x = 0; x < numMPMs; x++)
-          {
-            uint32_t mode = multiRefMPM[x];
+            if (saveDataForISP)
             {
-              pu.intraDir[0] = mode;
+              // we save the regular intra modes list
+              m_ispCandListHor = uiRdModeList;
+            }
+            pu.multiRefIdx    = 1;
+            const int numMPMs = NUM_MOST_PROBABLE_MODES;
+            unsigned  multiRefMPM[numMPMs];
+            PU::getIntraMPMs(pu, multiRefMPM);
+            for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++)
+            {
+              int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
 
-              if (useDPCMForFirstPassIntraEstimation(pu, mode))
+              pu.multiRefIdx = multiRefIdx;
               {
-                encPredIntraDPCM(COMPONENT_Y, piOrg, piPred, mode);
+                initIntraPatternChType(cu, pu.Y(), true);
               }
-              else
+              for (int x = 1; x < numMPMs; x++)
               {
-                predIntraAng(COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples(COMPONENT_Y, pu, true, pu));
+                uint32_t mode = multiRefMPM[x];
+                {
+                  pu.intraDir[0] = mode;
+                  initPredIntraParams(pu, pu.Y(), sps);
+
+                  predIntraAng(COMPONENT_Y, piPred, pu);
+
+                  // Use the min between SAD and SATD as the cost criterion
+                  // SAD is scaled by 2 to align with the scaling of HAD
+                  Distortion minSadHad =
+                    std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
+
+                  // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
+                  m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+                  m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
+                  m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
+                  m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
+                  m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx);
+
+                  uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
+
+                  double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
+                  updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), cost, uiRdModeList,
+                                 CandCostList, numModesForFullRD);
+                  updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
+                                 uiHadModeList, CandHadList, numHadCand);
+                }
               }
+            }
+            CHECKD(uiRdModeList.size() != numModesForFullRD, "Error: RD mode list size");
 
-              // use Hadamard transform here
-              Distortion sad = distParam.distFunc(distParam);
+            if (LFNSTSaveFlag && testMip
+                && !allowLfnstWithMip(cu.firstPU->lumaSize()))   // save a different set for the next run
+            {
+              // save found best modes
+              m_uiSavedRdModeListLFNST = uiRdModeList;
+              m_dSavedModeCostLFNST    = CandCostList;
+              // PBINTRA fast
+              m_uiSavedHadModeListLFNST = uiHadModeList;
+              m_dSavedHadListLFNST      = CandHadList;
+              m_uiSavedNumRdModesLFNST =
+                g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
+              m_uiSavedRdModeListLFNST.resize(m_uiSavedNumRdModesLFNST);
+              m_dSavedModeCostLFNST.resize(m_uiSavedNumRdModesLFNST);
+              // PBINTRA fast
+              m_uiSavedHadModeListLFNST.resize(3);
+              m_dSavedHadListLFNST.resize(3);
+              LFNSTSaveFlag = false;
+            }
+            //*** Derive MIP candidates using Hadamard
+            if (testMip && !supportedMipBlkSize)
+            {
+              // avoid estimation for unsupported blk sizes
+              const int transpOff    = getNumModesMip(pu.Y());
+              const int numModesFull = (transpOff << 1);
+              for (uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++)
+              {
+                const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
+                const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
 
-              // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
-              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
-              m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode );
-              m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
+                numModesForFullRD++;
+                uiRdModeList.push_back(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode));
+                CandCostList.push_back(0);
+              }
+            }
+            else if (testMip)
+            {
+              cu.mipFlag     = true;
+              pu.multiRefIdx = 0;
 
-              uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
+              double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE };
 
-              double cost = (double)sad + (double)fracModeBits * sqrtLambdaForFirstPass;
-              updateCandList(mode, cost, uiRdModeList, CandCostList, extendRefList, multiRefIdx, numModesForFullRD);
-            }
-          }
-        }
-#endif
-        CandCostList.resize(numModesForFullRD);
-        extendRefList.resize(numModesForFullRD);
-        if( m_pcEncCfg->getFastUDIUseMPMEnabled() )
-        {
-          const int numMPMs = NUM_MOST_PROBABLE_MODES;
-          unsigned  uiPreds[numMPMs];
+              initIntraPatternChType(cu, pu.Y());
+              initIntraMip(pu, pu.Y());
 
-          pu.multiRefIdx = 0;
+              const int transpOff    = getNumModesMip(pu.Y());
+              const int numModesFull = (transpOff << 1);
+              for (uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++)
+              {
+                const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
+                const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
 
-          const int numCand = PU::getIntraMPMs( pu, uiPreds );
+                pu.mipTransposedFlag           = isTransposed;
+                pu.intraDir[CHANNEL_TYPE_LUMA] = uiMode;
+                predIntraMip(COMPONENT_Y, piPred, pu);
 
-          for( int j = 0; j < numCand; j++ )
-          {
-            bool mostProbableModeIncluded = false;
-            int  mostProbableMode         = uiPreds[j];
+                // Use the min between SAD and HAD as the cost criterion
+                // SAD is scaled by 2 to align with the scaling of HAD
+                Distortion minSadHad =
+                  std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
 
+                m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
 
-            for( int i = 0; i < numModesForFullRD; i++ )
-            {
-              mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i] && extendRefList[i] == 0);
+                uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
+
+                double cost            = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
+                mipHadCost[uiModeFull] = cost;
+                DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost,
+                       uiModeFull);
+
+                updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList,
+                               CandCostList, numModesForFullRD + 1);
+                updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode),
+                               0.8 * double(minSadHad), uiHadModeList, CandHadList, numHadCand);
+              }
+
+              const double thresholdHadCost = 1.0 + 1.4 / sqrt((double) (pu.lwidth() * pu.lheight()));
+              reduceHadCandList(uiRdModeList, CandCostList, numModesForFullRD, thresholdHadCost, mipHadCost, pu,
+                                fastMip);
             }
-            if( !mostProbableModeIncluded )
+            if (sps.getUseMIP() && LFNSTSaveFlag)
             {
-              extendRefList.push_back(0);
-              numModesForFullRD++;
-              uiRdModeList.push_back( mostProbableMode );
+              // save found best modes
+              m_uiSavedNumRdModesLFNST = numModesForFullRD;
+              m_uiSavedRdModeListLFNST = uiRdModeList;
+              m_dSavedModeCostLFNST    = CandCostList;
+              // PBINTRA fast
+              m_uiSavedHadModeListLFNST = uiHadModeList;
+              m_dSavedHadListLFNST      = CandHadList;
+              LFNSTSaveFlag             = false;
             }
           }
-          if( nOptionsForISP > 1 )
+          else   // if( sps.getUseMIP() && LFNSTLoadFlag)
           {
-            //we add the ISP MPMs to the list without mrl modes
-            m_rdModeListWithoutMrlHor = m_rdModeListWithoutMrl;
-            m_rdModeListWithoutMrlVer = m_rdModeListWithoutMrl;
-            static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM>* listPointer;
-            for( int k = 1; k < nOptionsForISP; k++ )
+            // restore saved modes
+            numModesForFullRD = m_uiSavedNumRdModesLFNST;
+            uiRdModeList      = m_uiSavedRdModeListLFNST;
+            CandCostList      = m_dSavedModeCostLFNST;
+            // PBINTRA fast
+            uiHadModeList = m_uiSavedHadModeListLFNST;
+            CandHadList   = m_dSavedHadListLFNST;
+          }
+
+          if (m_pcEncCfg->getFastUDIUseMPMEnabled())
+          {
+            const int numMPMs = NUM_MOST_PROBABLE_MODES;
+            unsigned  uiPreds[numMPMs];
+
+            pu.multiRefIdx = 0;
+
+            const int numCand = PU::getIntraMPMs(pu, uiPreds);
+
+            for (int j = 0; j < numCand; j++)
+            {
+              bool     mostProbableModeIncluded = false;
+              ModeInfo mostProbableMode( false, false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j] );
+
+              for (int i = 0; i < numModesForFullRD; i++)
+              {
+                mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
+              }
+              if (!mostProbableModeIncluded)
+              {
+                numModesForFullRD++;
+                uiRdModeList.push_back(mostProbableMode);
+                CandCostList.push_back(0);
+              }
+            }
+            if (saveDataForISP)
             {
-              cu.ispMode = ispOptions[k];
-              listPointer = &( cu.ispMode == HOR_INTRA_SUBPARTITIONS ? m_rdModeListWithoutMrlHor : m_rdModeListWithoutMrlVer );
-              const int numCandISP = PU::getIntraMPMs( pu, uiPreds );
-              for( int j = 0; j < numCandISP; j++ )
+              // we add the MPMs to the list that contains only regular intra modes
+              for (int j = 0; j < numCand; j++)
               {
-                bool mostProbableModeIncluded = false;
-                int  mostProbableMode = uiPreds[j];
+                bool     mostProbableModeIncluded = false;
+                ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j]);
 
-                for( int i = 0; i < listPointer->size(); i++ )
+                for (int i = 0; i < m_ispCandListHor.size(); i++)
                 {
-                  mostProbableModeIncluded |= ( mostProbableMode == listPointer->at( i ) );
+                  mostProbableModeIncluded |= (mostProbableMode == m_ispCandListHor[i]);
                 }
-                if( !mostProbableModeIncluded )
+                if (!mostProbableModeIncluded)
                 {
-                  listPointer->push_back( mostProbableMode );
+                  m_ispCandListHor.push_back(mostProbableMode);
                 }
               }
             }
-            cu.ispMode = NOT_INTRA_SUBPARTITIONS;
           }
         }
-      }
-      else
-      {
-        for( int i = 0; i < numModesForFullRD; i++ )
+        else
+        {
+          THROW("Full search not supported for MIP");
+        }
+        if (sps.getUseLFNST() && mtsUsageFlag == 1)
         {
-          uiRdModeList.push_back( i );
+          // Store the modes to be checked with RD
+          m_savedNumRdModes[lfnstIdx] = numModesForFullRD;
+          std::copy_n(uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[lfnstIdx]);
         }
       }
-    }
-
-    if( nOptionsForISP > 1 ) // we remove the non-MPMs from the ISP lists
-    {
-      static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeListCopyHor = m_rdModeListWithoutMrlHor;
-      m_rdModeListWithoutMrlHor.clear();
-      static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > uiRdModeListCopyVer = m_rdModeListWithoutMrlVer;
-      m_rdModeListWithoutMrlVer.clear();
-      static_vector< uint32_t, FAST_UDI_MAX_RDMODE_NUM > *listPointerCopy, *listPointer;
-      for( int ispOptionIdx = 1; ispOptionIdx < nOptionsForISP; ispOptionIdx++ )
+      else   // mtsUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked)
       {
-        cu.ispMode = ispOptions[ispOptionIdx];
-        //we get the mpm cand list
-        const int numMPMs = NUM_MOST_PROBABLE_MODES;
-        unsigned  uiPreds[numMPMs];
-
-        pu.multiRefIdx = 0;
+        if ((m_pcEncCfg->getUseFastLFNST() || !cu.slice->isIntra()) && m_bestModeCostValid[lfnstIdx])
+        {
+          numModesForFullRD = 0;
 
-        PU::getIntraMPMs( pu, uiPreds );
+          double thresholdSkipMode = 1.0 + ((cu.lfnstIdx > 0) ? 0.1 : 1.0) * (1.4 / sqrt((double) (width * height)));
 
-        //we copy only the ISP MPMs
-        listPointerCopy = &( cu.ispMode == HOR_INTRA_SUBPARTITIONS ? uiRdModeListCopyHor : uiRdModeListCopyVer );
-        listPointer     = &( cu.ispMode == HOR_INTRA_SUBPARTITIONS ? m_rdModeListWithoutMrlHor : m_rdModeListWithoutMrlVer );
-        for( int k = 0; k < listPointerCopy->size(); k++ )
-        {
-          for( int q = 0; q < numMPMs; q++ )
+          // Skip checking the modes with much larger R-D cost than the best mode
+          for (int i = 0; i < m_savedNumRdModes[lfnstIdx]; i++)
           {
-            if( listPointerCopy->at( k ) == uiPreds[q] )
+            if (m_modeCostStore[lfnstIdx][i] <= thresholdSkipMode * m_bestModeCostStore[lfnstIdx])
             {
-              listPointer->push_back( listPointerCopy->at( k ) );
-              break;
+              uiRdModeList.push_back(m_savedRdModeList[lfnstIdx][i]);
+              numModesForFullRD++;
             }
           }
         }
+        else   // this is necessary because we skip the candidates list calculation, since it was already obtained for
+               // the DCT-II. Now we load it
+        {
+          // Restore the modes to be checked with RD
+          numModesForFullRD = m_savedNumRdModes[lfnstIdx];
+          uiRdModeList.resize(numModesForFullRD);
+          std::copy_n(m_savedRdModeList[lfnstIdx], m_savedNumRdModes[lfnstIdx], uiRdModeList.begin());
+          CandCostList.resize(numModesForFullRD);
+        }
       }
-      cu.ispMode = NOT_INTRA_SUBPARTITIONS;
-    }
 
+      CHECK(numModesForFullRD != uiRdModeList.size(), "Inconsistent state!");
 
-    CHECK( numModesForFullRD != uiRdModeList.size(), "Inconsistent state!" );
+      // after this point, don't use numModesForFullRD
 
-    // after this point, don't use numModesForFullRD
-
-    // PBINTRA fast
-    if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable )
-    {
-      if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO )
+      // PBINTRA fast
+      if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable
+          && !cs.slice->getDisableSATDForRD() && (mtsUsageFlag != 2 || lfnstIdx > 0))
       {
-        uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 2 ) );
-        extendRefList.resize( std::min<size_t>( extendRefList.size(), 2 ) );
-        if( nOptionsForISP > 1 )
+        double   pbintraRatio = (lfnstIdx > 0) ? 1.25 : PBINTRA_RATIO;
+        int      maxSize      = -1;
+        ModeInfo bestMipMode;
+        int      bestMipIdx = -1;
+        for (int idx = 0; idx < uiRdModeList.size(); idx++)
         {
-          m_rdModeListWithoutMrlHor.resize( std::min<size_t>( m_rdModeListWithoutMrlHor.size(), 2 ) );
-          m_rdModeListWithoutMrlVer.resize( std::min<size_t>( m_rdModeListWithoutMrlVer.size(), 2 ) );
+          if (uiRdModeList[idx].mipFlg)
+          {
+            bestMipMode = uiRdModeList[idx];
+            bestMipIdx  = idx;
+            break;
+          }
         }
-      }
-      if( CandHadList.size() < 2 || CandHadList[1] > cs.interHad * PBINTRA_RATIO )
-      {
-        uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 1 ) );
-        extendRefList.resize( std::min<size_t>( extendRefList.size(), 1 ) );
-        if( nOptionsForISP > 1 )
+        const int numHadCand = 3;
+        for (int k = numHadCand - 1; k >= 0; k--)
         {
-          m_rdModeListWithoutMrlHor.resize( std::min<size_t>( m_rdModeListWithoutMrlHor.size(), 1 ) );
-          m_rdModeListWithoutMrlVer.resize( std::min<size_t>( m_rdModeListWithoutMrlVer.size(), 1 ) );
+          if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio)
+          {
+            maxSize = k;
+          }
         }
-      }
-      if( CandHadList.size() < 1 || CandHadList[0] > cs.interHad * PBINTRA_RATIO )
-      {
-        cs.dist = std::numeric_limits<Distortion>::max();
-        cs.interHad = 0;
+        if (maxSize > 0)
+        {
+          uiRdModeList.resize(std::min<size_t>(uiRdModeList.size(), maxSize));
+          if (bestMipIdx >= 0)
+          {
+            if (uiRdModeList.size() <= bestMipIdx)
+            {
+              uiRdModeList.push_back(bestMipMode);
+            }
+          }
+          if (saveDataForISP)
+          {
+            m_ispCandListHor.resize(std::min<size_t>(m_ispCandListHor.size(), maxSize));
+          }
+        }
+        if (maxSize == 0)
+        {
+          cs.dist     = std::numeric_limits<Distortion>::max();
+          cs.interHad = 0;
 
-        //===== reset context models =====
-        m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
-        m_CABACEstimator->getCtx() = SubCtx( Ctx::MHIntraPredMode, ctxStartMHIntraMode );
-        m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
+          //===== reset context models =====
+          m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+          m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
+          m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
+          m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
+          m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx);
 
-        return;
+          return false;
+        }
       }
     }
 
-    if ( nOptionsForISP > 1 )
-    {
-      //we create a single full RD list that includes all intra modes using regular intra, MRL and ISP
-      auto* firstIspList  = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlHor : &m_rdModeListWithoutMrlVer;
-      auto* secondIspList = ispOptions[1] == HOR_INTRA_SUBPARTITIONS ? &m_rdModeListWithoutMrlVer : &m_rdModeListWithoutMrlHor;
-
-      if ( m_pcEncCfg->getUseFastISP() )
-      {
-        // find the first non-MRL mode
-        size_t indexFirstMode = std::find( extendRefList.begin(), extendRefList.end(), 0 ) - extendRefList.begin();
-        // if not found, just take the last mode
-        if( indexFirstMode >= extendRefList.size() ) indexFirstMode = extendRefList.size() - 1;
-        // move the mode indicated by indexFirstMode to the beginning
-        for( int idx = ((int)indexFirstMode) - 1; idx >= 0; idx-- )
-        {
-          std::swap( extendRefList[idx], extendRefList[idx + 1] );
-          std::swap( uiRdModeList [idx], uiRdModeList [idx + 1] );
-        }
-        //insert all ISP modes after the first non-mrl mode
-        uiRdModeList.insert( uiRdModeList.begin() + 1, secondIspList->begin(), secondIspList->end() );
-        uiRdModeList.insert( uiRdModeList.begin() + 1, firstIspList->begin() , firstIspList->end()  );
+    int numNonISPModes = (int)uiRdModeList.size();
 
-        extendRefList.insert( extendRefList.begin() + 1, secondIspList->size(), MRL_NUM_REF_LINES + ispOptions[2] );
-        extendRefList.insert( extendRefList.begin() + 1, firstIspList->size() , MRL_NUM_REF_LINES + ispOptions[1] );
-      }
-      else
+    if ( testISP )
+    {
+      // we reserve positions for ISP in the common full RD list
+      const int maxNumRDModesISP = sps.getUseLFNST() ? 16 * NUM_LFNST_NUM_PER_SET : 16;
+      m_curIspLfnstIdx = 0;
+      for (int i = 0; i < maxNumRDModesISP; i++)
       {
-        //insert all ISP modes at the end of the current list
-        uiRdModeList.insert( uiRdModeList.end(), secondIspList->begin(), secondIspList->end() );
-        uiRdModeList.insert( uiRdModeList.end(), firstIspList->begin() , firstIspList->end()  );
-
-        extendRefList.insert( extendRefList.end(), secondIspList->size(), MRL_NUM_REF_LINES + ispOptions[2] );
-        extendRefList.insert( extendRefList.end(), firstIspList->size() , MRL_NUM_REF_LINES + ispOptions[1] );
+        uiRdModeList.push_back( ModeInfo( false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0 ) );
       }
     }
-    CHECKD(uiRdModeList.size() != extendRefList.size(),"uiRdModeList and extendRefList do not have the same size!");
 
     //===== check modes (using r-d costs) =====
-    uint32_t       uiBestPUMode  = 0;
-    int            bestExtendRef = 0;
+    ModeInfo       uiBestPUMode;
+    int            bestBDPCMMode = 0;
+    double         bestCostNonBDPCM = MAX_DOUBLE;
 
     CodingStructure *csTemp = m_pTempCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
     CodingStructure *csBest = m_pBestCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
@@ -758,49 +987,72 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner,
     csBest->slice = cs.slice;
     csTemp->initStructData();
     csBest->initStructData();
+    csTemp->picture = cs.picture;
+    csBest->picture = cs.picture;
 
     // just to be sure
     numModesForFullRD = ( int ) uiRdModeList.size();
-    PartSplit intraSubPartitionsProcOrder = TU_NO_ISP;
-    int       bestNormalIntraModeIndex    = -1;
-    uint8_t   bestIspOption               = NOT_INTRA_SUBPARTITIONS;
     TUIntraSubPartitioner subTuPartitioner( partitioner );
-    bool      ispHorAllZeroCbfs = false, ispVerAllZeroCbfs = false;
-
-    for (uint32_t uiMode = 0; uiMode < numModesForFullRD; uiMode++)
+    if ( testISP )
     {
-      // set luma prediction mode
-      uint32_t uiOrgMode = uiRdModeList[uiMode];
+      m_modeCtrl->setIspCost( MAX_DOUBLE );
+      m_modeCtrl->setMtsFirstPassNoIspCost( MAX_DOUBLE );
+    }
+    int bestLfnstIdx = cu.lfnstIdx;
 
-      cu.ispMode = extendRefList[uiMode] > MRL_NUM_REF_LINES ? extendRefList[uiMode] - MRL_NUM_REF_LINES : NOT_INTRA_SUBPARTITIONS;
-        pu.intraDir[0] = uiOrgMode;
+    for (int mode = isSecondColorSpace ? 0 : -2 * int(testBDPCM); mode < (int)uiRdModeList.size(); mode++)
+    {
+      // set CU/PU to luma prediction mode
+      ModeInfo uiOrgMode;
+      if (sps.getUseColorTrans() && !m_pcEncCfg->getRGBFormatFlag() && isSecondColorSpace && mode)
+      {
+        continue;
+      }
 
-        int multiRefIdx = 0;
-        pu.multiRefIdx = multiRefIdx;
-        if( cu.ispMode )
+      if (mode < 0 || (isSecondColorSpace && m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx][mode]))
+      {
+        cu.bdpcmMode = mode < 0 ? -mode : m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx][mode];
+        uiOrgMode = ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmMode == 2 ? VER_IDX : HOR_IDX );
+      }
+      else
+      {
+        cu.bdpcmMode = 0;
+        uiOrgMode = uiRdModeList[mode];
+      }
+      if (!cu.bdpcmMode && uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
+      {
+        if (mode == numNonISPModes)   // the list needs to be sorted only once
         {
-          intraSubPartitionsProcOrder = CU::getISPType( cu, COMPONENT_Y );
-          bool tuIsDividedInRows = CU::divideTuInRows( cu );
-          if ( ( tuIsDividedInRows && ispHorAllZeroCbfs ) || ( !tuIsDividedInRows && ispVerAllZeroCbfs ) )
-          {
-            continue;
-          }
-          if( m_intraModeDiagRatio.at( bestNormalIntraModeIndex ) > 1.25 )
+          if (m_pcEncCfg->getUseFastISP())
           {
-            continue;
+            m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId);
           }
-          if( ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) > 1.25 && tuIsDividedInRows ) || ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) < 0.8 && !tuIsDividedInRows ) )
+          if (!xSortISPCandList(bestCurrentCost, csBest->cost, uiBestPUMode))
           {
-            continue;
+            break;
           }
         }
-        else
+        xGetNextISPMode(uiRdModeList[mode], (mode > 0 ? &uiRdModeList[mode - 1] : nullptr), Size(width, height));
+        if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
         {
-          multiRefIdx = extendRefList[uiMode];
-          pu.multiRefIdx = multiRefIdx;
-          CHECK( pu.multiRefIdx && ( pu.intraDir[0] == DC_IDX || pu.intraDir[0] == PLANAR_IDX ), "ERL" );
+          continue;
         }
+        cu.lfnstIdx = m_curIspLfnstIdx;
+        uiOrgMode   = uiRdModeList[mode];
+      }
+      cu.mipFlag                     = uiOrgMode.mipFlg;
+      pu.mipTransposedFlag           = uiOrgMode.mipTrFlg;
+      cu.ispMode                     = uiOrgMode.ispMod;
+      pu.multiRefIdx                 = uiOrgMode.mRefId;
+      pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
+
+      CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported");
+      CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
+      CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
+      CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported");
+      CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported");
 
+      pu.intraDir[CHANNEL_TYPE_CHROMA] = cu.colorTransform ? DM_CHROMA_IDX : pu.intraDir[CHANNEL_TYPE_CHROMA];
 
       // set context models
       m_CABACEstimator->getCtx() = ctxStart;
@@ -808,64 +1060,166 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner,
       // determine residual for partition
       cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
 
+      bool tmpValidReturn = false;
       if( cu.ispMode )
       {
-        xRecurIntraCodingLumaQT( *csTemp, subTuPartitioner, bestCurrentCost, 0, intraSubPartitionsProcOrder );
+        if ( m_pcEncCfg->getUseFastISP() )
+        {
+          m_modeCtrl->setISPWasTested(true);
+        }
+        tmpValidReturn = xIntraCodingLumaISP(*csTemp, subTuPartitioner, bestCurrentCost);
+        if (csTemp->tus.size() == 0)
+        {
+          // no TUs were coded
+          csTemp->cost = MAX_DOUBLE;
+          continue;
+        }
+        // we save the data for future tests
+        m_ispTestedModes[m_curIspLfnstIdx].setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost);
+        csTemp->cost = !tmpValidReturn ? MAX_DOUBLE : csTemp->cost;
       }
       else
       {
-        xRecurIntraCodingLumaQT( *csTemp, partitioner, bestIspOption ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, bestIspOption );
-      }
-
-      if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
-      {
-        if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS )
+        if (cu.colorTransform)
         {
-          ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost );
+          tmpValidReturn = xRecurIntraCodingACTQT(*csTemp, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
         }
         else
         {
-          ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost );
+          tmpValidReturn = xRecurIntraCodingLumaQT(
+            *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP,
+            uiBestPUMode.ispMod, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
         }
+      }
+
+      if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP)
+      {
+        m_regIntraRDListWithCosts.push_back( ModeInfoWithCost( cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost ) );
+      }
+
+      if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
+      {
         csTemp->cost = MAX_DOUBLE;
         csTemp->costDbOffset = 0;
+        tmpValidReturn = false;
       }
+      validReturn |= tmpValidReturn;
 
+      if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 )
+      {
+        m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ??
+      }
 
+      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
+             cu.blocks[0].y, (int) width, (int) height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod,
+             pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
 
-      DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode );
-
-      // check r-d cost
-      if( csTemp->cost < csBest->cost )
+      if( tmpValidReturn )
       {
-        std::swap( csTemp, csBest );
-
-        uiBestPUMode  = uiOrgMode;
-        bestExtendRef = multiRefIdx;
-        bestIspOption = cu.ispMode;
-        if( csBest->cost < bestCurrentCost )
+        if (isFirstColorSpace)
+        {
+          if (m_pcEncCfg->getRGBFormatFlag() || !cu.ispMode)
+          {
+            sortRdModeListFirstColorSpace(uiOrgMode, csTemp->cost, cu.bdpcmMode, m_savedRdModeFirstColorSpace[m_savedRdModeIdx], m_savedRdCostFirstColorSpace[m_savedRdModeIdx], m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx], m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]);
+          }
+        }
+        // check r-d cost
+        if( csTemp->cost < csBest->cost )
         {
-          bestCurrentCost = csBest->cost;
+          std::swap( csTemp, csBest );
+
+          uiBestPUMode  = uiOrgMode;
+          bestBDPCMMode = cu.bdpcmMode;
+          if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
+          {
+            m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;
+            m_bestModeCostValid[ lfnstIdx ] = true;
+          }
+          if( csBest->cost < bestCurrentCost )
+          {
+            bestCurrentCost = csBest->cost;
+          }
+          if ( cu.ispMode )
+          {
+            m_modeCtrl->setIspCost(csBest->cost);
+            bestLfnstIdx = cu.lfnstIdx;
+          }
+          else if ( testISP )
+          {
+            m_modeCtrl->setMtsFirstPassNoIspCost(csBest->cost);
+          }
         }
-        if( !cu.ispMode )
+        if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM )
         {
-          bestNormalIntraModeIndex = uiMode;
+          bestCostNonBDPCM = csBest->cost;
         }
       }
 
       csTemp->releaseIntermediateData();
+      if( m_pcEncCfg->getFastLocalDualTreeMode() )
+      {
+        if( cu.isConsIntra() && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0 )
+        {
+          if( m_pcEncCfg->getFastLocalDualTreeMode() == 2 )
+          {
+            //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
+            break;
+          }
+          else
+          {
+            if( csBest->cost > costInterCU * 1.5 )
+            {
+              break;
+            }
+          }
+        }
+      }
+      if (sps.getUseColorTrans() && !CS::isDualITree(cs))
+      {
+        if ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) && csBest->cost != MAX_DOUBLE && bestCS->cost != MAX_DOUBLE && mode >= 0)
+        {
+          if (csBest->cost > bestCS->cost)
+          {
+            break;
+          }
+        }
+      }
     } // Mode loop
-    cu.ispMode = bestIspOption;
+    cu.ispMode = uiBestPUMode.ispMod;
+    cu.lfnstIdx = bestLfnstIdx;
 
-    cs.useSubStructure(*csBest, partitioner.chType, pu.singleChan(CHANNEL_TYPE_LUMA), true, true, keepResi, keepResi);
+    if( validReturn )
+    {
+      if (cu.colorTransform)
+      {
+        cs.useSubStructure(*csBest, partitioner.chType, pu, true, true, keepResi, keepResi);
+      }
+      else
+      {
+        cs.useSubStructure(*csBest, partitioner.chType, pu.singleChan(CHANNEL_TYPE_LUMA), true, true, keepResi,
+                           keepResi);
+      }
+    }
     csBest->releaseIntermediateData();
-    //=== update PU data ====
-    pu.intraDir[0] = uiBestPUMode;
-    pu.multiRefIdx = bestExtendRef;
+    if( validReturn )
+    {
+      //=== update PU data ====
+      cu.mipFlag = uiBestPUMode.mipFlg;
+      pu.mipTransposedFlag             = uiBestPUMode.mipTrFlg;
+      pu.multiRefIdx = uiBestPUMode.mRefId;
+      pu.intraDir[ CHANNEL_TYPE_LUMA ] = uiBestPUMode.modeId;
+      cu.bdpcmMode = bestBDPCMMode;
+      if (cu.colorTransform)
+      {
+        CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
+      }
+    }
   }
 
   //===== reset context models =====
   m_CABACEstimator->getCtx() = ctxStart;
+
+  return validReturn;
 }
 
 void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner, const double maxCostAllowed )
@@ -878,7 +1232,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
   cs.setDecomp( cs.area.Cb(), false );
 
   double    bestCostSoFar = maxCostAllowed;
-  bool      lumaUsesISP   = !CS::isDualITree( *cu.cs ) && cu.ispMode;
+  bool      lumaUsesISP   = !cu.isSepTree() && cu.ispMode;
   PartSplit ispType       = lumaUsesISP ? CU::getISPType( cu, COMPONENT_Y ) : TU_NO_ISP;
   CHECK( cu.ispMode && bestCostSoFar < 0, "bestCostSoFar must be positive!" );
 
@@ -888,12 +1242,12 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
     uint32_t       uiBestMode = 0;
     Distortion uiBestDist = 0;
     double     dBestCost = MAX_DOUBLE;
+    int32_t bestBDPCMMode = 0;
 
     //----- init mode list ----
     {
-      uint32_t  uiMinMode = 0;
-      uint32_t  uiMaxMode = NUM_CHROMA_MODE;
-
+      int32_t  uiMinMode = 0;
+      int32_t  uiMaxMode = NUM_CHROMA_MODE;
       //----- check chroma modes -----
       uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
       PU::getIntraChromaCandModes( pu, chromaCandModes );
@@ -905,13 +1259,13 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
       saveCS.area.repositionTo( cs.area );
       saveCS.clearTUs();
 
-      if( !CS::isDualITree( cs ) && cu.ispMode )
+      if( !cu.isSepTree() && cu.ispMode )
       {
         saveCS.clearCUs();
         saveCS.clearPUs();
       }
 
-      if( CS::isDualITree( cs ) )
+      if( cu.isSepTree() )
       {
         if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
         {
@@ -966,9 +1320,8 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
       {
         modeIsEnable[i] = 1;
       }
-
-      DistParam distParam;
-      const bool useHadamard = true;
+      DistParam distParamSad;
+      DistParam distParamSatd;
       pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
 
       initIntraPatternChType(cu, pu.Cb());
@@ -990,42 +1343,50 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
         pu.intraDir[1] = mode; // temporary assigned, for SATD checking.
 
         int64_t sad = 0;
+        int64_t sadCb = 0;
+        int64_t satdCb = 0;
+        int64_t sadCr = 0;
+        int64_t satdCr = 0;
         CodingStructure& cs = *(pu.cs);
 
         CompArea areaCb = pu.Cb();
         PelBuf orgCb = cs.getOrgBuf(areaCb);
         PelBuf predCb = cs.getPredBuf(areaCb);
-
-        m_pcRdCost->setDistParam(distParam, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, useHadamard);
-        distParam.applyWeight = false;
-
+        m_pcRdCost->setDistParam(distParamSad, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, false);
+        m_pcRdCost->setDistParam(distParamSatd, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, true);
+        distParamSad.applyWeight = false;
+        distParamSatd.applyWeight = false;
         if (PU::isLMCMode(mode))
         {
           predIntraChromaLM(COMPONENT_Cb, predCb, pu, areaCb, mode);
         }
         else
         {
-          predIntraAng(COMPONENT_Cb, predCb, pu, false);
+          initPredIntraParams(pu, pu.Cb(), *pu.cs->sps);
+          predIntraAng(COMPONENT_Cb, predCb, pu);
         }
-
-        sad += distParam.distFunc(distParam);
-
+        sadCb = distParamSad.distFunc(distParamSad) * 2;
+        satdCb = distParamSatd.distFunc(distParamSatd);
+        sad += std::min(sadCb, satdCb);
         CompArea areaCr = pu.Cr();
         PelBuf orgCr = cs.getOrgBuf(areaCr);
         PelBuf predCr = cs.getPredBuf(areaCr);
-
-        m_pcRdCost->setDistParam(distParam, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, useHadamard);
-        distParam.applyWeight = false;
-
+        m_pcRdCost->setDistParam(distParamSad, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, false);
+        m_pcRdCost->setDistParam(distParamSatd, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, true);
+        distParamSad.applyWeight = false;
+        distParamSatd.applyWeight = false;
         if (PU::isLMCMode(mode))
         {
           predIntraChromaLM(COMPONENT_Cr, predCr, pu, areaCr, mode);
         }
         else
         {
-          predIntraAng(COMPONENT_Cr, predCr, pu, false);
+          initPredIntraParams(pu, pu.Cr(), *pu.cs->sps);
+          predIntraAng(COMPONENT_Cr, predCr, pu);
         }
-        sad += distParam.distFunc(distParam);
+        sadCr = distParamSad.distFunc(distParamSad) * 2;
+        satdCr = distParamSatd.distFunc(distParamSatd);
+        sad += std::min(sadCr, satdCr);
         satdSortedCost[idx] = sad;
       }
       // sort the mode based on the cost from small to large.
@@ -1056,10 +1417,20 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
 
       // save the dist
       Distortion baseDist = cs.dist;
-
-      for (uint32_t uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++)
+      bool testBDPCM = true;
+      testBDPCM = testBDPCM && CU::bdpcmAllowed(cu, COMPONENT_Cb) && cu.ispMode == 0 && cu.mtsFlag == 0 && cu.lfnstIdx == 0;
+      for (int32_t uiMode = uiMinMode - (2 * int(testBDPCM)); uiMode < uiMaxMode; uiMode++)
       {
-        const int chromaIntraMode = chromaCandModes[uiMode];
+        int chromaIntraMode = chromaCandModes[uiMode];
+
+        if (uiMode < 0)
+        {
+            cu.bdpcmModeChroma = -uiMode;
+            chromaIntraMode = chromaCandModes[0];
+        }
+        else
+        {
+            cu.bdpcmModeChroma = 0;
         if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) )
         {
           continue;
@@ -1068,6 +1439,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
         {
           continue;
         }
+        }
         cs.setDecomp( pu.Cb(), false );
         cs.dist = baseDist;
         //----- restore context models -----
@@ -1082,7 +1454,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
           continue;
         }
 
-        if (cs.pps->getUseTransformSkip())
+        if (cs.sps->getTransformSkipEnabledFlag())
         {
           m_CABACEstimator->getCtx() = ctxStart;
         }
@@ -1120,6 +1492,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
           dBestCost  = dCost;
           uiBestDist = uiDist;
           uiBestMode = chromaIntraMode;
+          bestBDPCMMode = cu.bdpcmModeChroma;
         }
       }
 
@@ -1146,6 +1519,7 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
 
     pu.intraDir[1] = uiBestMode;
     cs.dist        = uiBestDist;
+    cu.bdpcmModeChroma = bestBDPCMMode;
   }
 
   //----- restore context models -----
@@ -1156,1184 +1530,4127 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
   }
 }
 
-void IntraSearch::IPCMSearch(CodingStructure &cs, Partitioner& partitioner)
+
+void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
 {
-  ComponentID compStr = (CS::isDualITree(cs) && !isLuma(partitioner.chType)) ? COMPONENT_Cb: COMPONENT_Y;
-  ComponentID compEnd = (CS::isDualITree(cs) && isLuma(partitioner.chType)) ? COMPONENT_Y : COMPONENT_Cr;
-  for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) )
+  if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
   {
-
-    xEncPCM(cs, partitioner, compID);
+    m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
+    m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
+    m_numCuInSCIPU++;
   }
-
-  cs.getPredBuf().fill(0);
-  cs.getResiBuf().fill(0);
-  cs.getOrgResiBuf().fill(0);
-
-  cs.dist     = 0;
-  cs.fracBits = 0;
-  cs.cost     = 0;
-
-  cs.setDecomp(cs.area);
-  cs.picture->getPredBuf(cs.area).copyFrom(cs.getPredBuf());
 }
 
-void IntraSearch::xEncPCM(CodingStructure &cs, Partitioner& partitioner, const ComponentID &compID)
+void IntraSearch::initCuAreaCostInSCIPU()
 {
-  TransformUnit &tu = *cs.getTU( partitioner.chType );
-
-  const int  channelBitDepth = cs.sps->getBitDepth(toChannelType(compID));
-  const uint32_t uiPCMBitDepth = cs.sps->getPCMBitDepth(toChannelType(compID));
-
-  const int pcmShiftRight = (channelBitDepth - int(uiPCMBitDepth));
-
-  CompArea  area    = tu.blocks[compID];
-  PelBuf    pcmBuf  = tu.getPcmbuf  (compID);
-  PelBuf    recBuf  = cs.getRecoBuf ( area );
-  CPelBuf   orgBuf  = cs.getOrgBuf  ( area );
-
-  CHECK(pcmShiftRight < 0, "Negative shift");
-  CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-  PelBuf tempOrgBuf = m_tmpStorageLCU.getBuf(tmpArea);
-  tempOrgBuf.copyFrom(orgBuf);
-  if (cs.slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
+  for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
   {
-    tempOrgBuf.rspSignal(m_pcReshape->getFwdLUT());
-  }
-  for (uint32_t uiY = 0; uiY < pcmBuf.height; uiY++)
-  {
-    for (uint32_t uiX = 0; uiX < pcmBuf.width; uiX++)
-    {
-      // Encode
-      pcmBuf.at(uiX, uiY) = tempOrgBuf.at(uiX, uiY) >> pcmShiftRight;
-      // Reconstruction
-      recBuf.at(uiX, uiY) = pcmBuf.at(uiX, uiY) << pcmShiftRight;
-    }
+    m_cuAreaInSCIPU[i] = Area();
+    m_cuCostInSCIPU[i] = 0;
   }
+  m_numCuInSCIPU = 0;
 }
-
-// -------------------------------------------------------------------------------------------------------------------
-// Intra search
-// -------------------------------------------------------------------------------------------------------------------
-
-void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx )
+void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
 {
-  CodingUnit &cu = *cs.getCU( partitioner.chType );
+  CodingUnit    &cu = *cs.getCU(partitioner.chType);
+  TransformUnit &tu = *cs.getTU(partitioner.chType);
+  uint32_t height = cu.block(compBegin).height;
+  uint32_t width = cu.block(compBegin).width;
 
-  if (bLuma)
+  if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
   {
-    bool isFirst = cu.ispMode ? subTuIdx == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
-
-    // CU header
-    if( isFirst )
+    cs.getPredBuf().copyFrom(cs.getOrgBuf());
+    cs.getPredBuf().Y().rspSignal(m_pcReshape->getFwdLUT());
+  }
+  cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin];
+  //derive palette
+  derivePLTLossy(cs, partitioner, compBegin, numComp);
+  reorderPLT(cs, partitioner, compBegin, numComp);
+
+  preCalcPLTIndexRD(cs, partitioner, compBegin, numComp); // Pre-calculate distortions for each pixel 
+  double rdCost = MAX_DOUBLE;
+  deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_HORTRAV, rdCost); // Optimize palette index map (horizontal scan)
+  if ((cu.curPLTSize[compBegin] + cu.useEscape[compBegin]) > 1)
+  {
+    deriveIndexMap(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, rdCost); // Optimize palette index map (vertical scan)
+  }
+  cu.useRotation[compBegin] = m_bestScanRotationMode;
+  int indexMaxSize = cu.useEscape[compBegin] ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
+  if (indexMaxSize <= 1)
+  {
+    cu.useRotation[compBegin] = false;
+  }
+  //reconstruct pixel
+  PelBuf    curPLTIdx = tu.getcurPLTIdx(compBegin);
+  for (uint32_t y = 0; y < height; y++)
+  {
+    for (uint32_t x = 0; x < width; x++)
     {
-      if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag())
-        && cu.Y().valid()
-        )
+      if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
       {
-        if( cs.pps->getTransquantBypassEnabledFlag() )
-        {
-          m_CABACEstimator->cu_transquant_bypass_flag( cu );
-        }
-        m_CABACEstimator->cu_skip_flag( cu );
-        m_CABACEstimator->pred_mode   ( cu );
+        calcPixelPred(cs, partitioner, y, x, compBegin, numComp);
       }
-      if( CU::isIntra(cu) )
+      else
       {
-        m_CABACEstimator->pcm_data( cu, partitioner );
-        if( cu.ipcm )
+        for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
         {
-          return;
+          CompArea area = cu.blocks[compID];
+          PelBuf   recBuf = cs.getRecoBuf(area);
+          uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+          uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+          if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
+          {
+            recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)];
+          }
+          else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
+          {
+            recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)];
+          }
         }
       }
-      m_CABACEstimator->extend_ref_line(cu);
-      m_CABACEstimator->isp_mode      ( cu );
-    }
-
-    PredictionUnit &pu = *cs.getPU(partitioner.currArea().lumaPos(), partitioner.chType);
-
-    // luma prediction mode
-    if (isFirst)
-    {
-      if ( !cu.Y().valid())
-        m_CABACEstimator->pred_mode( cu );
-      m_CABACEstimator->intra_luma_pred_mode( pu );
     }
   }
 
-  if (bChroma)
-  {
-    bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
-
-    PredictionUnit &pu = *cs.getPU( partitioner.currArea().chromaPos(), CHANNEL_TYPE_CHROMA );
+  cs.getPredBuf().fill(0);
+  cs.getResiBuf().fill(0);
+  cs.getOrgResiBuf().fill(0);
 
-    if( isFirst )
+  cs.fracBits = MAX_UINT;
+  cs.cost = MAX_DOUBLE;
+  Distortion distortion = 0;
+  for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    const ComponentID compID = ComponentID(comp);
+    CPelBuf reco = cs.getRecoBuf(compID);
+    CPelBuf org = cs.getOrgBuf(compID);
+#if WCG_EXT
+    if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
+      m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
     {
-      m_CABACEstimator->intra_chroma_pred_mode( pu );
+      const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]);
+
+      if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+      {
+        const CompArea &areaY = cu.Y();
+        CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
+        PelBuf   tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
+        tmpRecLuma.copyFrom(reco);
+        tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
+        distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+      }
+      else
+      {
+        distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+      }
     }
+    else
+#endif
+      distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE);
   }
-}
 
-void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType )
+  cs.dist += distortion;
+  const CompArea &area = cu.blocks[compBegin];
+  cs.setDecomp(area);
+  cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
+}
+void IntraSearch::calcPixelPredRD(CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* paPixelValue, Pel* paRecoValue, ComponentID compBegin, uint32_t numComp)
 {
-  const UnitArea &currArea = partitioner.currArea();
-          int subTuCounter = subTuIdx;
-  TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter );
-  CodingUnit    &currCU = *currTU.cu;
-  uint32_t currDepth           = partitioner.currTrDepth;
-
-  const bool subdiv        = currTU.depth > currDepth;
-  ComponentID compID = partitioner.chType == CHANNEL_TYPE_LUMA ? COMPONENT_Y : COMPONENT_Cb;
-  const bool chromaCbfISP = currArea.blocks[COMPONENT_Cb].valid() && currCU.ispMode && !subdiv;
-
-  if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
-  {
-    CHECK( !subdiv, "TU split implied" );
-  }
-  else
+  CodingUnit &cu = *cs.getCU(partitioner.chType);
+  TransformUnit &tu = *cs.getTU(partitioner.chType);
+
+  int qp[3];
+  int qpRem[3];
+  int qpPer[3];
+  int quantiserScale[3];
+  int quantiserRightShift[3];
+  int rightShiftOffset[3];
+  int invquantiserRightShift[3];
+  int add[3];
+  for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
   {
-    CHECK( subdiv && !currCU.ispMode && isLuma( compID ), "No TU subdivision is allowed with QTBT" );
+    QpParam cQP(tu, ComponentID(ch));
+    qp[ch] = cQP.Qp(true);
+    qpRem[ch] = qp[ch] % 6;
+    qpPer[ch] = qp[ch] / 6;
+    quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
+    quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
+    rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
+    invquantiserRightShift[ch] = IQUANT_SHIFT;
+    add[ch] = 1 << (invquantiserRightShift[ch] - 1);
   }
 
-  if( bChroma && ( !currCU.ispMode || chromaCbfISP ) )
+  for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
   {
-    const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
-    const uint32_t cbfDepth = ( chromaCbfISP ? currDepth - 1 : currDepth );
-
-    for (uint32_t ch = COMPONENT_Cb; ch < numberValidComponents; ch++)
-    {
-      const ComponentID compID = ComponentID(ch);
-
-      if( currDepth == 0 || TU::getCbfAtDepth( currTU, compID, currDepth - 1 ) || chromaCbfISP )
-      {
-        const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) : false );
-        m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, currDepth ), currArea.blocks[compID], cbfDepth, prevCbf );
-
-      }
-    }
+    const int  channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
+    paPixelValue[ch] = Pel(std::max<int>(0, ((orgBuf[ch] * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
+    assert(paPixelValue[ch] < (1 << (channelBitDepth + 1)));
+    paRecoValue[ch] = (((paPixelValue[ch] * g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
+    paRecoValue[ch] = Pel(ClipBD<int>(paRecoValue[ch], channelBitDepth));//to be checked
   }
+}
 
-  if (subdiv)
-  {
+void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
+{
+  CodingUnit &cu = *cs.getCU(partitioner.chType);
+  uint32_t height = cu.block(compBegin).height;
+  uint32_t width = cu.block(compBegin).width;
 
-    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
-    {
-      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
-    }
-    else if( currCU.ispMode && isLuma( compID ) )
+  CPelBuf   orgBuf[3];
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    CompArea  area = cu.blocks[comp];
+    if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
     {
-      partitioner.splitCurrArea( ispType, cs );
+      orgBuf[comp] = cs.getPredBuf(area);
     }
     else
-    THROW( "Cannot perform an implicit split!" );
-
-    do
     {
-      xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuCounter, ispType );
-      subTuCounter += subTuCounter != -1 ? 1 : 0;
-    } while( partitioner.nextPart( cs ) );
-
-    partitioner.exitCurrSplit();
+      orgBuf[comp] = cs.getOrgBuf(area);
+    }
   }
-  else
+
+  int rasPos;
+  uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+  uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+  for (uint32_t y = 0; y < height; y++)
   {
-    //===== Cbfs =====
-    if (bLuma)
+    for (uint32_t x = 0; x < width; x++)
     {
-      bool previousCbf       = false;
-      bool lastCbfIsInferred = false;
-      if( ispType != TU_NO_ISP )
+      rasPos = y * width + x;;
+      // chroma discard
+      bool discardChroma = (compBegin == COMPONENT_Y) && (y&scaleY || x&scaleX);
+      Pel curPel[3];
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
       {
-        bool rootCbfSoFar = false;
-        uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> g_aucLog2[currTU.lheight()] : currCU.lwidth() >> g_aucLog2[currTU.lwidth()];
-        if( subTuCounter == nTus - 1 )
+        uint32_t pX1 = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
+        uint32_t pY1 = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
+        curPel[comp] = orgBuf[comp].at(pX1, pY1);
+      }
+
+      uint8_t  pltIdx = 0;
+      double minError = MAX_DOUBLE;
+      uint8_t  bestIdx = 0;
+      while (pltIdx < cu.curPLTSize[compBegin])
+      {
+        uint64_t sqrtError = 0;
+        for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
         {
-          TransformUnit* tuPointer = currCU.firstTU;
-          for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
+          int64_t tmpErr = int64_t(curPel[comp] - cu.curPLT[comp][pltIdx]);
+          if (isChroma((ComponentID)comp))
           {
-            rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, currDepth );
-            tuPointer = tuPointer->next;
+            sqrtError += uint64_t(tmpErr*tmpErr*ENC_CHROMA_WEIGHTING);
           }
-          if( !rootCbfSoFar )
+          else
           {
-            lastCbfIsInferred = true;
+            sqrtError += tmpErr*tmpErr;
           }
         }
-        if( !lastCbfIsInferred )
+        m_indexError[pltIdx][rasPos] = (double)sqrtError;
+        if (sqrtError < minError)
         {
-          previousCbf = TU::getPrevTuCbfAtDepth( currTU, COMPONENT_Y, partitioner.currTrDepth );
+          minError = (double)sqrtError;
+          bestIdx = pltIdx;
         }
+        pltIdx++;
       }
-      if( !lastCbfIsInferred )
+
+      Pel paPixelValue[3], paRecoValue[3];
+      calcPixelPredRD(cs, partitioner, curPel, paPixelValue, paRecoValue, compBegin, numComp);
+      uint64_t error = 0, rate = 0;
+      for (int comp = compBegin; comp < (discardChroma ? 1 : (compBegin + numComp)); comp++)
       {
-        m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode );
+        int64_t tmpErr = int64_t(curPel[comp] - paRecoValue[comp]);
+        if (isChroma((ComponentID)comp))
+        {
+          error += uint64_t(tmpErr*tmpErr*ENC_CHROMA_WEIGHTING);
+        }
+        else
+        {
+          error += tmpErr*tmpErr;
+        }
+        rate += m_escapeNumBins[paPixelValue[comp]]; // encode quantized escape color
       }
+      double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate;
+      m_indexError[cu.curPLTSize[compBegin]][rasPos] = rdCost;
+      if (rdCost < minError) 
+      {
+        minError = rdCost;
+        bestIdx = (uint8_t)cu.curPLTSize[compBegin];
+      }
+      m_minErrorIndexMap[rasPos] = bestIdx; // save the optimal index of the current pixel
     }
   }
 }
 
-void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID, const int subTuIdx, const PartSplit ispType )
+void IntraSearch::deriveIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dMinCost)
 {
-  const UnitArea &currArea  = partitioner.currArea();
-
-       int subTuCounter     = subTuIdx;
-  TransformUnit &currTU     = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuIdx );
-  uint32_t      currDepth       = partitioner.currTrDepth;
-  const bool subdiv         = currTU.depth > currDepth;
+  CodingUnit    &cu = *cs.getCU(partitioner.chType);
+  TransformUnit &tu = *cs.getTU(partitioner.chType);
+  uint32_t      height = cu.block(compBegin).height;
+  uint32_t      width = cu.block(compBegin).width;
+
+  int   total     = height*width;
+  Pel  *runIndex = tu.getPLTIndex(compBegin);
+  bool *runType  = tu.getRunTypes(compBegin);
+  m_scanOrder = g_scanOrder[SCAN_UNGROUPED][pltScanMode ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
+// Trellis initialization
+  for (int i = 0; i < 2; i++)
+  {
+    memset(m_prevRunTypeRDOQ[i], 0, sizeof(Pel)*NUM_TRELLIS_STATE);
+    memset(m_prevRunPosRDOQ[i],  0, sizeof(int)*NUM_TRELLIS_STATE);
+    memset(m_stateCostRDOQ[i],  0, sizeof (double)*NUM_TRELLIS_STATE);
+  }
+  for (int state = 0; state < NUM_TRELLIS_STATE; state++)
+  {
+    m_statePtRDOQ[state][0] = 0;
+  }
+// Context modeling
+  const FracBitsAccess& fracBits = m_CABACEstimator->getCtx().getFracBitsAcess();
+  BinFracBits fracBitsPltCopyFlagIndex[RUN_IDX_THRE + 1];
+  for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
+  {
+    const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_INDEX, dist);
+    fracBitsPltCopyFlagIndex[dist] = fracBits.getFracBitsArray(Ctx::IdxRunModel( ctxId ) );
+  }
+  BinFracBits fracBitsPltCopyFlagAbove[RUN_IDX_THRE + 1];
+  for (int dist = 0; dist <= RUN_IDX_THRE; dist++)
+  {
+    const unsigned  ctxId = DeriveCtx::CtxPltCopyFlag(PLT_RUN_COPY, dist);
+    fracBitsPltCopyFlagAbove[dist] = fracBits.getFracBitsArray(Ctx::CopyRunModel( ctxId ) );
+  }
+  const BinFracBits fracBitsPltRunType = fracBits.getFracBitsArray( Ctx::RunTypeFlag() );
 
-  if (subdiv)
+// Trellis RDO per CG
+  bool contTrellisRD = true;
+  for (int subSetId = 0; ( subSetId <= (total - 1) >> LOG2_PALETTE_CG_SIZE ) && contTrellisRD; subSetId++)
   {
-    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
-    {
-      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
-    }
-    else if( currTU.cu->ispMode )
+    int minSubPos = subSetId << LOG2_PALETTE_CG_SIZE;
+    int maxSubPos = minSubPos + (1 << LOG2_PALETTE_CG_SIZE);
+    maxSubPos = (maxSubPos > total) ? total : maxSubPos; // if last position is out of the current CU size
+    contTrellisRD = deriveSubblockIndexMap(cs, partitioner, compBegin, pltScanMode, minSubPos, maxSubPos, fracBitsPltRunType, fracBitsPltCopyFlagIndex, fracBitsPltCopyFlagAbove, dMinCost, (bool)pltScanMode);
+  }
+  if (!contTrellisRD)
+  {
+    return;
+  }
+
+
+// best state at the last scan position
+  double  sumRdCost = MAX_DOUBLE;
+  uint8_t bestState = 0;
+  for (uint8_t state = 0; state < NUM_TRELLIS_STATE; state++)
+  {
+    if (m_stateCostRDOQ[0][state] < sumRdCost)
     {
-      partitioner.splitCurrArea( ispType, cs );
+      sumRdCost = m_stateCostRDOQ[0][state];
+      bestState = state;
     }
-    else
-      THROW("Implicit TU split not available!");
+  }
 
-    do
+     bool checkRunTable  [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
+  uint8_t checkIndexTable[MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
+  uint8_t bestStateTable [MAX_CU_BLKSIZE_PLT*MAX_CU_BLKSIZE_PLT];
+  uint8_t nextState = bestState;
+// best trellis path
+  for (int i = (width*height - 1); i >= 0; i--)
+  {
+    bestStateTable[i] = nextState;
+    int rasterPos = m_scanOrder[i].idx;
+    nextState = m_statePtRDOQ[nextState][rasterPos];
+  }
+// reconstruct index and runs based on the state pointers
+  for (int i = 0; i < (width*height); i++)
+  {
+    int rasterPos = m_scanOrder[i].idx;
+    int  abovePos = (pltScanMode == PLT_SCAN_HORTRAV) ? m_scanOrder[i].idx - width : m_scanOrder[i].idx - 1;
+        nextState = bestStateTable[i];
+    if ( nextState == 0 ) // same as the previous
     {
-      xEncCoeffQT( cs, partitioner, compID, subTuCounter, ispType );
-      subTuCounter += subTuCounter != -1 ? 1 : 0;
-    } while( partitioner.nextPart( cs ) );
-
-    partitioner.exitCurrSplit();
+      checkRunTable[rasterPos] = checkRunTable[ m_scanOrder[i - 1].idx ];
+      if ( checkRunTable[rasterPos] == PLT_RUN_INDEX )
+      {
+        checkIndexTable[rasterPos] = checkIndexTable[m_scanOrder[i - 1].idx];
+      }
+      else
+      {
+        checkIndexTable[rasterPos] = checkIndexTable[ abovePos ];
+      }
+    }
+    else if (nextState == 1) // CopyAbove mode
+    {
+      checkRunTable[rasterPos] = PLT_RUN_COPY;
+      checkIndexTable[rasterPos] = checkIndexTable[abovePos];
+    }
+    else if (nextState == 2) // Index mode
+    {
+      checkRunTable[rasterPos] = PLT_RUN_INDEX;
+      checkIndexTable[rasterPos] = m_minErrorIndexMap[rasterPos];
+    }
   }
-  else
 
-  if( currArea.blocks[compID].valid() )
+// Escape flag
+  m_bestEscape = false;
+  for (int pos = 0; pos < (width*height); pos++)
   {
-    if( TU::hasCrossCompPredInfo( currTU, compID ) )
+    uint8_t index = checkIndexTable[pos];
+    if (index == cu.curPLTSize[compBegin])
     {
-      m_CABACEstimator->cross_comp_pred( currTU, compID );
+      m_bestEscape = true;
+      break;
     }
-    if( TU::getCbf( currTU, compID ) )
+  }
+
+// Horizontal scan v.s vertical scan
+  if (sumRdCost < dMinCost)
+  {
+    cu.useEscape[compBegin] = m_bestEscape;
+    m_bestScanRotationMode = pltScanMode;
+    for (int pos = 0; pos < (width*height); pos++)
     {
-      m_CABACEstimator->residual_coding( currTU, compID );
+      runIndex[pos] = checkIndexTable[pos];
+      runType[pos] = checkRunTable[pos];
     }
+    dMinCost = sumRdCost;
   }
 }
 
-uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType )
+bool IntraSearch::deriveSubblockIndexMap(
+  CodingStructure& cs,
+  Partitioner&  partitioner,
+  ComponentID   compBegin,
+  PLTScanMode   pltScanMode,
+  int           minSubPos,
+  int           maxSubPos,
+  const BinFracBits& fracBitsPltRunType,
+  const BinFracBits* fracBitsPltIndexINDEX,
+  const BinFracBits* fracBitsPltIndexCOPY,
+  const double minCost,
+  bool         useRotate
+)
 {
-  m_CABACEstimator->resetBits();
+  CodingUnit &cu    = *cs.getCU(partitioner.chType);
+  uint32_t   height = cu.block(compBegin).height;
+  uint32_t   width  = cu.block(compBegin).width;
+  int indexMaxValue = cu.curPLTSize[compBegin];
+
+  int refId = 0;
+  int currRasterPos, currScanPos, prevScanPos, aboveScanPos, roffset;
+  int log2Width = (pltScanMode == PLT_SCAN_HORTRAV) ? floorLog2(width): floorLog2(height);
+  int buffersize = (pltScanMode == PLT_SCAN_HORTRAV) ? 2*width: 2*height;
+  for (int curPos = minSubPos; curPos < maxSubPos; curPos++)
+  {
+    currRasterPos = m_scanOrder[curPos].idx;
+    prevScanPos = (curPos == 0) ? 0 : (curPos - 1) % buffersize;
+    roffset = (curPos >> log2Width) << log2Width;
+    aboveScanPos = roffset - (curPos - roffset + 1);
+    aboveScanPos %= buffersize;
+    currScanPos = curPos % buffersize;
+    if ((pltScanMode == PLT_SCAN_HORTRAV && curPos < width) || (pltScanMode == PLT_SCAN_VERTRAV && curPos < height))
+    {
+      aboveScanPos = -1; // first column/row: above row is not valid
+    }
 
-  xEncIntraHeader( cs, partitioner, bLuma, bChroma, subTuIdx );
-  xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuIdx, ispType );
+// Trellis stats: 
+// 1st state: same as previous scanned sample
+// 2nd state: Copy_Above mode
+// 3rd state: Index mode 
+// Loop of current state
+    for ( int curState = 0; curState < NUM_TRELLIS_STATE; curState++ ) 
+    {
+      double    minRdCost          = MAX_DOUBLE;
+      int       minState           = 0; // best prevState
+      uint8_t   bestRunIndex       = 0;
+      bool      bestRunType        = 0;
+      bool      bestPrevCodedType  = 0;
+      int       bestPrevCodedPos   = 0;
+      if ( ( curState == 0 && curPos == 0 ) || ( curState == 1 && aboveScanPos < 0 ) ) // state not available
+      {
+        m_stateCostRDOQ[1 - refId][curState] = MAX_DOUBLE;
+        continue;
+      }
 
+      bool    runType  = 0;
+      uint8_t runIndex = 0;
+      if ( curState == 1 ) // 2nd state: Copy_Above mode
+      {
+        runType = PLT_RUN_COPY;
+      }
+      else if ( curState == 2 ) // 3rd state: Index mode 
+      {
+        runType = PLT_RUN_INDEX;
+        runIndex = m_minErrorIndexMap[currRasterPos];
+      }
 
-  if( bLuma )
-  {
-    xEncCoeffQT( cs, partitioner, COMPONENT_Y, subTuIdx, ispType );
-  }
-  if( bChroma )
-  {
-    xEncCoeffQT( cs, partitioner, COMPONENT_Cb, subTuIdx, ispType );
-    xEncCoeffQT( cs, partitioner, COMPONENT_Cr, subTuIdx, ispType );
-  }
+// Loop of previous state
+      for ( int stateID = 0; stateID < NUM_TRELLIS_STATE; stateID++ ) 
+      {
+        if ( m_stateCostRDOQ[refId][stateID] == MAX_DOUBLE )
+        {
+          continue;
+        }
+        if ( curState == 0 ) // 1st state: same as previous scanned sample
+        {
+          runType = m_runMapRDOQ[refId][stateID][prevScanPos];
+          runIndex = ( runType == PLT_RUN_INDEX ) ? m_indexMapRDOQ[refId][stateID][ prevScanPos ] : m_indexMapRDOQ[refId][stateID][ aboveScanPos ];
+        }
+        else if ( curState == 1 ) // 2nd state: Copy_Above mode
+        {
+          runIndex = m_indexMapRDOQ[refId][stateID][aboveScanPos];
+        }
+        bool    prevRunType   = m_runMapRDOQ[refId][stateID][prevScanPos];
+        uint8_t prevRunIndex  = m_indexMapRDOQ[refId][stateID][prevScanPos];
+        uint8_t aboveRunIndex = (aboveScanPos >= 0) ? m_indexMapRDOQ[refId][stateID][aboveScanPos] : 0;
+        int      dist = curPos - m_prevRunPosRDOQ[refId][stateID] - 1;
+        double rdCost = m_stateCostRDOQ[refId][stateID];
+        if ( rdCost >= minRdCost ) continue;
+
+// Calculate Rd cost 
+        bool prevCodedRunType = m_prevRunTypeRDOQ[refId][stateID];
+        int  prevCodedPos     = m_prevRunPosRDOQ [refId][stateID];
+        const BinFracBits* fracBitsPt = (m_prevRunTypeRDOQ[refId][stateID] == PLT_RUN_INDEX) ? fracBitsPltIndexINDEX : fracBitsPltIndexCOPY;
+        rdCost += rateDistOptPLT(runType, runIndex, prevRunType, prevRunIndex, aboveRunIndex, prevCodedRunType, prevCodedPos, curPos, (pltScanMode == PLT_SCAN_HORTRAV) ? width : height, dist, indexMaxValue, fracBitsPt, fracBitsPltRunType);
+        if (rdCost < minRdCost) // update minState ( minRdCost )
+        {
+          minRdCost    = rdCost;
+          minState     = stateID;
+          bestRunType  = runType;
+          bestRunIndex = runIndex;
+          bestPrevCodedType = prevCodedRunType;
+          bestPrevCodedPos  = prevCodedPos;
+        }
+      }
+// Update trellis info of current state
+      m_stateCostRDOQ  [1 - refId][curState]  = minRdCost;
+      m_prevRunTypeRDOQ[1 - refId][curState]  = bestPrevCodedType;
+      m_prevRunPosRDOQ [1 - refId][curState]  = bestPrevCodedPos;
+      m_statePtRDOQ[curState][currRasterPos] = minState;
+      int buffer2update = std::min(buffersize, curPos);
+      memcpy(m_indexMapRDOQ[1 - refId][curState], m_indexMapRDOQ[refId][minState], sizeof(uint8_t)*buffer2update);
+      memcpy(m_runMapRDOQ[1 - refId][curState], m_runMapRDOQ[refId][minState], sizeof(bool)*buffer2update);
+      m_indexMapRDOQ[1 - refId][curState][currScanPos] = bestRunIndex;
+      m_runMapRDOQ  [1 - refId][curState][currScanPos] = bestRunType;
+    }
 
-  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
-  return fracBits;
+    if (useRotate) // early terminate: Rd cost >= min cost in horizontal scan
+    {
+      if ((m_stateCostRDOQ[1 - refId][0] >= minCost) &&
+         (m_stateCostRDOQ[1 - refId][1] >= minCost) &&
+         (m_stateCostRDOQ[1 - refId][2] >= minCost) )
+      {
+        return 0;
+      }
+    }
+    refId = 1 - refId;
+  }
+  return 1;
 }
 
-uint64_t IntraSearch::xGetIntraFracBitsQTSingleChromaComponent( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID )
+double IntraSearch::rateDistOptPLT(
+  bool      runType,
+  uint8_t   runIndex,
+  bool      prevRunType,
+  uint8_t   prevRunIndex,
+  uint8_t   aboveRunIndex,
+  bool&     prevCodedRunType,
+  int&      prevCodedPos,
+  int       scanPos,
+  uint32_t  width,
+  int       dist,
+  int       indexMaxValue,
+  const BinFracBits* IndexfracBits,
+  const BinFracBits& TypefracBits)
 {
-  m_CABACEstimator->resetBits();
-
-  if( compID == COMPONENT_Cb )
-  {
-    //intra mode coding
-    PredictionUnit &pu = *cs.getPU( partitioner.currArea().lumaPos(), partitioner.chType );
-    m_CABACEstimator->intra_chroma_pred_mode( pu );
-    //xEncIntraHeader(cs, partitioner, false, true);
-  }
-  CHECK( partitioner.currTrDepth != 1, "error in the depth!" );
-  const UnitArea &currArea = partitioner.currArea();
-
-  TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType );
+  double rdCost = 0.0;
+  bool identityFlag = !( (runType != prevRunType) || ( (runType == PLT_RUN_INDEX) && (runIndex != prevRunIndex) ) );
 
-  //cbf coding
-  m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, partitioner.currTrDepth ), currArea.blocks[compID], partitioner.currTrDepth - 1 );
-  //coeffs coding and cross comp coding
-  if( TU::hasCrossCompPredInfo( currTU, compID ) )
+  if ( ( !identityFlag && runType == PLT_RUN_INDEX ) || scanPos == 0 ) // encode index value
   {
-    m_CABACEstimator->cross_comp_pred( currTU, compID );
+    uint8_t refIndex = (prevRunType == PLT_RUN_INDEX) ? prevRunIndex : aboveRunIndex;
+    refIndex = (scanPos == 0) ? ( indexMaxValue + 1) : refIndex;
+    if ( runIndex == refIndex )
+    {
+      rdCost = MAX_DOUBLE;
+      return rdCost;
+    }
+    rdCost += m_pcRdCost->getLambda()*m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue];
   }
-  if( TU::getCbf( currTU, compID ) )
+  rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx];
+  if (scanPos > 0)
   {
-    m_CABACEstimator->residual_coding( currTU, compID );
+    rdCost += m_pcRdCost->getLambda()*( identityFlag ? (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[1] >> SCALE_BITS) : (IndexfracBits[(dist < RUN_IDX_THRE) ? dist : RUN_IDX_THRE].intBits[0] >> SCALE_BITS));
   }
-
-  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
-  return fracBits;
-}
-
-uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const ComponentID &compID)
-{
-  m_CABACEstimator->resetBits();
-
-  if( TU::hasCrossCompPredInfo( currTU, compID ) )
+  if ( !identityFlag && scanPos >= width && prevRunType != PLT_RUN_COPY )
   {
-    m_CABACEstimator->cross_comp_pred( currTU, compID );
+    rdCost += m_pcRdCost->getLambda()*(TypefracBits.intBits[runType] >> SCALE_BITS);
   }
-  if( TU::getCbf( currTU, compID ) )
+  if (!identityFlag || scanPos == 0)
   {
-    m_CABACEstimator->residual_coding( currTU, compID );
+    prevCodedRunType = runType;
+    prevCodedPos = scanPos;
   }
-
-  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
-  return fracBits;
+  return rdCost;
 }
-
-void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr)
+uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count)
 {
-  if (!tu.blocks[compID].valid())
+  uint32_t numBins = 0;
+  while (symbol >= (uint32_t)(1 << count))
   {
-    return;
+    numBins++;
+    symbol -= 1 << count;
+    count++;
   }
+  numBins++;
+  numBins += count;
+  assert(numBins <= 32);
+  return numBins;
+}
 
-  CodingStructure &cs                       = *tu.cs;
-
-  const CompArea      &area                 = tu.blocks[compID];
-  const SPS           &sps                  = *cs.sps;
-  const PPS           &pps                  = *cs.pps;
-
-  const ChannelType    chType               = toChannelType(compID);
-  const int            bitDepth             = sps.getBitDepth(chType);
-
-  PelBuf         piOrg                      = cs.getOrgBuf    (area);
-  PelBuf         piPred                     = cs.getPredBuf   (area);
-  PelBuf         piResi                     = cs.getResiBuf   (area);
-  PelBuf         piOrgResi                  = cs.getOrgResiBuf(area);
-  PelBuf         piReco                     = cs.getRecoBuf   (area);
-
-  const PredictionUnit &pu                  = *cs.getPU(area.pos(), chType);
-  const uint32_t           uiChFinalMode        = PU::getFinalIntraMode(pu, chType);
-
-  const bool           bUseCrossCPrediction = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma( compID ) && PU::isChromaIntraModeCrossCheckMode( pu ) && checkCrossCPrediction;
-  const bool           ccUseRecoResi        = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate();
-  const bool           ispSplitIsAllowed    = CU::canUseISPSplit( *tu.cu, compID );
-
-
-  //===== init availability pattern =====
-  PelBuf sharedPredTS( m_pSharedPredTransformSkip[compID], area );
-  if( default0Save1Load2 != 2 )
+uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
+{
+  uint32_t idxCodeBit = 0;
+  uint32_t thresh;
+  if (maxSymbol > 256)
   {
-    const bool bUseFilteredPredictions = IntraPrediction::useFilteredIntraRefSamples( compID, pu, true, tu );
-    initIntraPatternChType( *tu.cu, area, bUseFilteredPredictions );
-
-    //===== get prediction signal =====
-    if( compID != COMPONENT_Y && PU::isLMCMode( uiChFinalMode ) )
-    {
-      {
-        xGetLumaRecPixels( pu, area );
-      }
-      predIntraChromaLM( compID, piPred, pu, area, uiChFinalMode );
-    }
-    else
+    uint32_t threshVal = 1 << 8;
+    thresh = 8;
+    while (threshVal <= maxSymbol)
     {
-      predIntraAng( compID, piPred, pu, bUseFilteredPredictions );
-    }
-
-
-    // save prediction
-    if( default0Save1Load2 == 1 )
-    {
-      sharedPredTS.copyFrom( piPred );
+      thresh++;
+      threshVal <<= 1;
     }
+    thresh--;
   }
   else
   {
-    // load prediction
-    piPred.copyFrom( sharedPredTS );
+    thresh = g_tbMax[maxSymbol];
   }
-
-
-  DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), uiChFinalMode );
-  //DTRACE_PEL_BUF( D_PRED, piPred, tu, tu.cu->predMode, COMPONENT_Y );
-
-  const Slice           &slice = *cs.slice;
-  bool flag = slice.getReshapeInfo().getUseSliceReshaper() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag()));
-  if (flag && slice.getReshapeInfo().getSliceReshapeChromaAdj() && isChroma(compID))
+  uint32_t uiVal = 1 << thresh;
+  assert(uiVal <= maxSymbol);
+  assert((uiVal << 1) > maxSymbol);
+  assert(symbol < maxSymbol);
+  uint32_t b = maxSymbol - uiVal;
+  assert(b < uiVal);
+  if (symbol < uiVal - b)
   {
-    const Area area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size()));
-    const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area );
-    PelBuf piPredY;
-    piPredY = cs.picture->getPredBuf(areaY);
-    const Pel avgLuma = piPredY.computeAvg();
-    int adj = m_pcReshape->calculateChromaAdj(avgLuma);
-    tu.setChromaAdj(adj);
+    idxCodeBit = thresh;
   }
-  //===== get residual signal =====
-  piResi.copyFrom( piOrg  );
-  if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID==COMPONENT_Y)
+  else
   {
-    CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-    PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
-    tmpPred.copyFrom(piPred);
-    piResi.rspSignal(m_pcReshape->getFwdLUT());
-    piResi.subtract(tmpPred);
+    idxCodeBit = thresh + 1;
   }
-  else
-  piResi.subtract( piPred );
+  return idxCodeBit;
+}
 
-  if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isLuma(compID))
+void IntraSearch::initTBCTable(int bitDepth)
+{
+  for (uint32_t i = 0; i < m_symbolSize; i++)
   {
-    piOrgResi.copyFrom (piResi);
+    memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1));
   }
-
-  if (bUseCrossCPrediction)
+  for (uint32_t i = 0; i < (m_symbolSize + 1); i++)
   {
-    if (xCalcCrossComponentPredictionAlpha(tu, compID, ccUseRecoResi) == 0)
+    for (uint32_t j = 0; j < i; j++)
     {
-      return;
+      m_truncBinBits[j][i] = getTruncBinBits(j, i);
     }
-    CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, false);
   }
-
-  //===== transform and quantization =====
-  //--- init rate estimation arrays for RDOQ ---
-  //--- transform and quantization           ---
-  TCoeff uiAbsSum = 0;
-
-  const QpParam cQP(tu, compID);
-
-#if RDOQ_CHROMA_LAMBDA
-  m_pcTrQuant->selectLambda(compID);
-#endif
-
-  flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
-  if (flag && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() )
+  memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize);
+  for (uint32_t i = 0; i < m_symbolSize; i++)
   {
-    int cResScaleInv = tu.getChromaAdj();
-    double cResScale = round((double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv);
-    m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cResScale*cResScale));
-    piResi.scaleSignal(cResScaleInv, 1, tu.cu->cs->slice->clpRng(compID));
+    m_escapeNumBins[i] = getEpExGolombNumBins(i, 3);
   }
+}
+void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
+{
+  CodingUnit    &cu = *cs.getCU(partitioner.chType);
+  TransformUnit &tu = *cs.getTU(partitioner.chType);
 
-  double diagRatio = 0, horVerRatio = 0;
-
-  if( trModes )
+  CPelBuf   orgBuf[3];
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
   {
-    m_pcTrQuant->transformNxN( tu, compID, cQP, trModes, CU::isIntra( *tu.cu ) ? m_pcEncCfg->getIntraMTSMaxCand() : m_pcEncCfg->getInterMTSMaxCand(), ispSplitIsAllowed ? &diagRatio : nullptr, ispSplitIsAllowed ? &horVerRatio : nullptr );
-    tu.mtsIdx = trModes->at(0).first;
+    CompArea  area = cu.blocks[comp];
+    if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
+    {
+      orgBuf[comp] = cs.getPredBuf(area);
+    }
+    else
+    {
+      orgBuf[comp] = cs.getOrgBuf(area);
+    }
   }
-  m_pcTrQuant->transformNxN( tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr, &diagRatio, &horVerRatio );
-  if (!tu.cu->ispMode && isLuma(compID) && ispSplitIsAllowed &&
-    tu.mtsIdx == 0
-    )
+
+  int qp[3];
+  int qpRem[3];
+  int qpPer[3];
+  int quantiserScale[3];
+  int quantiserRightShift[3];
+  int rightShiftOffset[3];
+  int invquantiserRightShift[3];
+  int add[3];
+  for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
   {
-    m_intraModeDiagRatio        .push_back(diagRatio);
-    m_intraModeHorVerRatio      .push_back(horVerRatio);
-    m_intraModeTestedNormalIntra.push_back((int)uiChFinalMode);
+    QpParam cQP(tu, ComponentID(ch));
+    qp[ch] = cQP.Qp(true);
+    qpRem[ch] = qp[ch] % 6;
+    qpPer[ch] = qp[ch] / 6;
+    quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
+    quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
+    rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
+    invquantiserRightShift[ch] = IQUANT_SHIFT;
+    add[ch] = 1 << (invquantiserRightShift[ch] - 1);
   }
 
+  uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+  uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+  for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
+  {
+    const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
+    CompArea  area = cu.blocks[ch];
+    PelBuf    recBuf = cs.getRecoBuf(area);
+    PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch);
+    if (compBegin != COMPONENT_Y || ch == 0)
+    {
+      escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
+      assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1)));
+      recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
+      recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked
+    }
+    else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0)
+    {
+      uint32_t yPosC = yPos >> scaleY;
+      uint32_t xPosC = xPos >> scaleX;
+      escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
+      assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1)));
+      recBuf.at(xPosC, yPosC) = (((escapeValue.at(xPosC, yPosC)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> invquantiserRightShift[ch];
+      recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth));//to be checked
+    }
+  }
+}
+void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
+{
+  CodingUnit &cu = *cs.getCU(partitioner.chType);
+  const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
+  const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
+  const int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
+  const int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
 
-  DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum );
-
+  uint32_t height = cu.block(compBegin).height;
+  uint32_t width = cu.block(compBegin).width;
 
-  //--- inverse transform ---
-  if (uiAbsSum > 0)
+  CPelBuf   orgBuf[3];
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
   {
-    m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
+    CompArea  area = cu.blocks[comp];
+    if (m_pcEncCfg->getLmcs() && (cs.picHeader->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
+    {
+      orgBuf[comp] = cs.getPredBuf(area);
+    }
+    else
+    {
+      orgBuf[comp] = cs.getOrgBuf(area);
+    }
   }
-  else
+
+  int errorLimit = g_paletteQuant[cu.qp];
+  uint32_t totalSize = height*width;
+  SortingElement *pelList = new SortingElement[totalSize];
+  SortingElement  element;
+  SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1];
+  uint32_t dictMaxSize = MAXPLTSIZE;
+  uint32_t idx = 0;
+  int last = -1;
+
+  uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+  uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
+  for (uint32_t y = 0; y < height; y++)
   {
-    piResi.fill(0);
+    for (uint32_t x = 0; x < width; x++)
+    {
+      uint32_t org[3], pX, pY;
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+      {
+        pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
+        pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
+        org[comp] = orgBuf[comp].at(pX, pY);
+      }
+      element.setAll(org, compBegin, numComp);
+      int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp);
+      if (bestSAD)
+      {
+        for (int i = idx - 1; i >= 0; i--)
+        {
+          uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp);
+          if (sad < bestSAD)
+          {
+            bestSAD = sad;
+            besti = i;
+            if (!sad) break;
+          }
+        }
+      }
+      if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), compBegin, numComp))
+      {
+        pelList[besti].addElement(element, compBegin, numComp);
+        last = besti;
+      }
+      else
+      {
+        pelList[idx].copyDataFrom(element, compBegin, numComp);
+        pelList[idx].setCnt(1);
+        last = idx;
+        idx++;
+      }
+    }
   }
 
-  //===== reconstruction =====
-  if (flag && uiAbsSum > 0 && isChroma(compID) && slice.getReshapeInfo().getSliceReshapeChromaAdj() )
+  for (int i = 0; i < dictMaxSize; i++)
   {
-    piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID));
+    pelListSort[i].setCnt(0);
+    pelListSort[i].resetAll(compBegin, numComp);
+  }
+
+  //bubble sorting
+  dictMaxSize = 1;
+  for (int i = 0; i < idx; i++)
+  {
+    if (pelList[i].getCnt() > pelListSort[dictMaxSize - 1].getCnt())
+    {
+      int j;
+      for (j = dictMaxSize; j > 0; j--)
+      {
+        if (pelList[i].getCnt() > pelListSort[j - 1].getCnt() )
+        {
+          pelListSort[j].copyAllFrom(pelListSort[j - 1], compBegin, numComp);
+          dictMaxSize = std::min(dictMaxSize + 1, (uint32_t)MAXPLTSIZE);
+        }
+        else
+        {
+          break;
+        }
+      }
+      pelListSort[j].copyAllFrom(pelList[i], compBegin, numComp);
+    }
+  }
+
+  uint32_t paletteSize = 0;
+  uint64_t numColorBits = 0;
+  for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+  {
+    numColorBits += (comp > 0) ? channelBitDepth_C : channelBitDepth_L;
+  }
+  const int plt_lambda_shift = (compBegin > 0) ? pcmShiftRight_C : pcmShiftRight_L;
+  double    bitCost          = m_pcRdCost->getLambda() / (double) (1 << (2 * plt_lambda_shift)) * numColorBits;
+  for (int i = 0; i < MAXPLTSIZE; i++)
+  {
+    if (pelListSort[i].getCnt())
+    {
+      int half = pelListSort[i].getCnt() >> 1;
+      for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+      {
+        cu.curPLT[comp][paletteSize] = (pelListSort[i].getSumData(comp) + half) / pelListSort[i].getCnt();
+      }
+
+      int best = -1;
+      if (errorLimit)
+      {
+        double pal[MAX_NUM_COMPONENT], err = 0.0, bestCost = 0.0;
+        for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+        {
+          pal[comp] = pelListSort[i].getSumData(comp) / (double)pelListSort[i].getCnt();
+          err = pal[comp] - cu.curPLT[comp][paletteSize];
+          if (isChroma((ComponentID) comp))
+          {
+            bestCost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C));
+          }
+          else
+          {
+            bestCost += (err * err) / (1 << (2 * pcmShiftRight_L));
+          }
+        }
+        bestCost = bestCost * pelListSort[i].getCnt() + bitCost;
+
+        for (int t = 0; t < cs.prevPLT.curPLTSize[compBegin]; t++)
+        {
+          double cost = 0.0;
+          for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+          {
+            err = pal[comp] - cs.prevPLT.curPLT[comp][t];
+            if (isChroma((ComponentID) comp))
+            {
+              cost += (err * err * PLT_CHROMA_WEIGHTING) / (1 << (2 * pcmShiftRight_C));
+            }
+            else
+            {
+              cost += (err * err) / (1 << (2 * pcmShiftRight_L));
+            }
+          }
+          cost *= pelListSort[i].getCnt();
+          if (cost < bestCost)
+          {
+            best = t;
+            bestCost = cost;
+          }
+        }
+        if (best != -1)
+        {
+          for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+          {
+            cu.curPLT[comp][paletteSize] = cs.prevPLT.curPLT[comp][best];
+          }
+        }
+      }
+
+      bool duplicate = false;
+      if (pelListSort[i].getCnt() == 1 && best == -1)
+      {
+        duplicate = true;
+      }
+      else
+      {
+        for (int t = 0; t<paletteSize; t++)
+        {
+          bool duplicateTmp = true;
+          for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+          {
+            duplicateTmp = duplicateTmp && (cu.curPLT[comp][paletteSize] == cu.curPLT[comp][t]);
+          }
+          if (duplicateTmp)
+          {
+            duplicate = true;
+            break;
+          }
+        }
+      }
+      if (!duplicate) paletteSize++;
+    }
+    else
+    {
+      break;
+    }
+  }
+  cu.curPLTSize[compBegin] = paletteSize;
+
+  delete[] pelList;
+  delete[] pelListSort;
+}
+// -------------------------------------------------------------------------------------------------------------------
+// Intra search
+// -------------------------------------------------------------------------------------------------------------------
+
+void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx )
+{
+  CodingUnit &cu = *cs.getCU( partitioner.chType );
+
+  if (bLuma)
+  {
+    bool isFirst = cu.ispMode ? subTuIdx == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
+
+    // CU header
+    if( isFirst )
+    {
+      if ((!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag() || cs.slice->getSPS()->getPLTMode())
+      && cu.Y().valid()
+      )
+      {
+        m_CABACEstimator->cu_skip_flag( cu );
+        m_CABACEstimator->pred_mode   ( cu );
+      }
+      if (CU::isPLT(cu))
+      {
+        return;
+      }
+      m_CABACEstimator->bdpcm_mode  ( cu, ComponentID(partitioner.chType) );
+      if (!CS::isDualITree(cs) && isLuma(partitioner.chType))
+          m_CABACEstimator->bdpcm_mode(cu, ComponentID(CHANNEL_TYPE_CHROMA));
+    }
+
+    PredictionUnit &pu = *cs.getPU(partitioner.currArea().lumaPos(), partitioner.chType);
+
+    // luma prediction mode
+    if (isFirst)
+    {
+      if ( !cu.Y().valid())
+        m_CABACEstimator->pred_mode( cu );
+      m_CABACEstimator->intra_luma_pred_mode( pu );
+    }
+  }
+
+  if (bChroma)
+  {
+    bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
+
+    PredictionUnit &pu = *cs.getPU( partitioner.currArea().chromaPos(), CHANNEL_TYPE_CHROMA );
+
+    if( isFirst )
+    {
+      m_CABACEstimator->intra_chroma_pred_mode( pu );
+    }
+  }
+}
+
+void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType )
+{
+  const UnitArea &currArea = partitioner.currArea();
+          int subTuCounter = subTuIdx;
+  TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter );
+  CodingUnit    &currCU = *currTU.cu;
+  uint32_t currDepth           = partitioner.currTrDepth;
+
+  const bool subdiv        = currTU.depth > currDepth;
+  ComponentID compID = partitioner.chType == CHANNEL_TYPE_LUMA ? COMPONENT_Y : COMPONENT_Cb;
+  const bool chromaCbfISP = currArea.blocks[COMPONENT_Cb].valid() && currCU.ispMode && !subdiv;
+
+  if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
+  {
+    CHECK( !subdiv, "TU split implied" );
+  }
+  else
+  {
+    CHECK( subdiv && !currCU.ispMode && isLuma( compID ), "No TU subdivision is allowed with QTBT" );
+  }
+
+  if( bChroma && ( !currCU.ispMode || chromaCbfISP ) )
+  {
+    const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
+    const uint32_t cbfDepth = ( chromaCbfISP ? currDepth - 1 : currDepth );
+
+    for (uint32_t ch = COMPONENT_Cb; ch < numberValidComponents; ch++)
+    {
+      const ComponentID compID = ComponentID(ch);
+
+      if( currDepth == 0 || TU::getCbfAtDepth( currTU, compID, currDepth - 1 ) || chromaCbfISP )
+      {
+        const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) : false );
+        m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, currDepth ), currArea.blocks[compID], cbfDepth, prevCbf );
+
+      }
+    }
+  }
+
+  if (subdiv)
+  {
+
+    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
+    {
+      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
+    }
+    else if( currCU.ispMode && isLuma( compID ) )
+    {
+      partitioner.splitCurrArea( ispType, cs );
+    }
+    else
+    THROW( "Cannot perform an implicit split!" );
+
+    do
+    {
+      xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuCounter, ispType );
+      subTuCounter += subTuCounter != -1 ? 1 : 0;
+    } while( partitioner.nextPart( cs ) );
+
+    partitioner.exitCurrSplit();
+  }
+  else
+  {
+    //===== Cbfs =====
+    if (bLuma)
+    {
+      bool previousCbf       = false;
+      bool lastCbfIsInferred = false;
+      if( ispType != TU_NO_ISP )
+      {
+        bool rootCbfSoFar = false;
+        uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight()) : currCU.lwidth() >> floorLog2(currTU.lwidth());
+        if( subTuCounter == nTus - 1 )
+        {
+          TransformUnit* tuPointer = currCU.firstTU;
+          for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
+          {
+            rootCbfSoFar |= TU::getCbfAtDepth( *tuPointer, COMPONENT_Y, currDepth );
+            tuPointer = tuPointer->next;
+          }
+          if( !rootCbfSoFar )
+          {
+            lastCbfIsInferred = true;
+          }
+        }
+        if( !lastCbfIsInferred )
+        {
+          previousCbf = TU::getPrevTuCbfAtDepth( currTU, COMPONENT_Y, partitioner.currTrDepth );
+        }
+      }
+      if( !lastCbfIsInferred )
+      {
+        m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode );
+      }
+    }
+  }
+}
+
+void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID, const int subTuIdx, const PartSplit ispType, CUCtx* cuCtx )
+{
+  const UnitArea &currArea  = partitioner.currArea();
+
+       int subTuCounter     = subTuIdx;
+  TransformUnit &currTU     = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType, subTuIdx );
+  uint32_t      currDepth       = partitioner.currTrDepth;
+  const bool subdiv         = currTU.depth > currDepth;
+
+  if (subdiv)
+  {
+    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
+    {
+      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
+    }
+    else if( currTU.cu->ispMode )
+    {
+      partitioner.splitCurrArea( ispType, cs );
+    }
+    else
+      THROW("Implicit TU split not available!");
+
+    do
+    {
+      xEncCoeffQT( cs, partitioner, compID, subTuCounter, ispType, cuCtx );
+      subTuCounter += subTuCounter != -1 ? 1 : 0;
+    } while( partitioner.nextPart( cs ) );
+
+    partitioner.exitCurrSplit();
+  }
+  else
+
+  if( currArea.blocks[compID].valid() )
+  {
+    if( compID == COMPONENT_Cr )
+    {
+      const int cbfMask = ( TU::getCbf( currTU, COMPONENT_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMPONENT_Cr ) ? 1 : 0 );
+      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
+    }
+    if( TU::hasCrossCompPredInfo( currTU, compID ) )
+    {
+      m_CABACEstimator->cross_comp_pred( currTU, compID );
+    }
+    if( TU::getCbf( currTU, compID ) )
+    {
+      if( isLuma(compID) )
+      {
+        m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
+        m_CABACEstimator->mts_idx( *currTU.cu, cuCtx );
+      }
+      else
+      m_CABACEstimator->residual_coding( currTU, compID );
+    }
+  }
+}
+
+uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool &bLuma, const bool &bChroma, const int subTuIdx, const PartSplit ispType, CUCtx* cuCtx )
+{
+  m_CABACEstimator->resetBits();
+
+  xEncIntraHeader( cs, partitioner, bLuma, bChroma, subTuIdx );
+  xEncSubdivCbfQT( cs, partitioner, bLuma, bChroma, subTuIdx, ispType );
+
+
+  if( bLuma )
+  {
+    xEncCoeffQT( cs, partitioner, COMPONENT_Y, subTuIdx, ispType, cuCtx );
+  }
+  if( bChroma )
+  {
+    xEncCoeffQT( cs, partitioner, COMPONENT_Cb, subTuIdx, ispType );
+    xEncCoeffQT( cs, partitioner, COMPONENT_Cr, subTuIdx, ispType );
+  }
+
+  CodingUnit& cu = *cs.getCU(partitioner.chType);
+  if ( cuCtx && bLuma && cu.isSepTree() && ( !cu.ispMode || ( cu.lfnstIdx && subTuIdx == 0 ) || ( !cu.lfnstIdx && subTuIdx == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1 ) ) )
+  {
+    m_CABACEstimator->residual_lfnst_mode(cu, *cuCtx);
+  }
+
+  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
+  return fracBits;
+}
+
+uint64_t IntraSearch::xGetIntraFracBitsQTSingleChromaComponent( CodingStructure &cs, Partitioner &partitioner, const ComponentID compID )
+{
+  m_CABACEstimator->resetBits();
+
+  if( compID == COMPONENT_Cb )
+  {
+    //intra mode coding
+    PredictionUnit &pu = *cs.getPU( partitioner.currArea().lumaPos(), partitioner.chType );
+    m_CABACEstimator->intra_chroma_pred_mode( pu );
+    //xEncIntraHeader(cs, partitioner, false, true);
+  }
+  CHECK( partitioner.currTrDepth != 1, "error in the depth!" );
+  const UnitArea &currArea = partitioner.currArea();
+
+  TransformUnit &currTU = *cs.getTU( currArea.blocks[partitioner.chType], partitioner.chType );
+
+  //cbf coding
+  const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( currTU, COMPONENT_Cb, partitioner.currTrDepth ) : false );
+  m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, compID, partitioner.currTrDepth ), currArea.blocks[compID], partitioner.currTrDepth - 1, prevCbf );
+  //coeffs coding and cross comp coding
+  if( TU::hasCrossCompPredInfo( currTU, compID ) )
+  {
+    m_CABACEstimator->cross_comp_pred( currTU, compID );
+  }
+  if( TU::getCbf( currTU, compID ) )
+  {
+    m_CABACEstimator->residual_coding( currTU, compID );
+  }
+
+  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
+  return fracBits;
+}
+
+uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const ComponentID &compID)
+{
+  m_CABACEstimator->resetBits();
+
+  if( TU::hasCrossCompPredInfo( currTU, compID ) )
+  {
+    m_CABACEstimator->cross_comp_pred( currTU, compID );
+  }
+
+  // Include Cbf and jointCbCr flags here as we make decisions across components
+  CodingStructure &cs = *currTU.cs;
+
+  if ( currTU.jointCbCr )
+  {
+    const int cbfMask = ( TU::getCbf( currTU, COMPONENT_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMPONENT_Cr ) ? 1 : 0 );
+    m_CABACEstimator->cbf_comp( cs, cbfMask>>1, currTU.blocks[ COMPONENT_Cb ], currTU.depth, false );
+    m_CABACEstimator->cbf_comp( cs, cbfMask &1, currTU.blocks[ COMPONENT_Cr ], currTU.depth, cbfMask>>1 );
+    if( cbfMask )
+      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
+    if( cbfMask >> 1 )
+      m_CABACEstimator->residual_coding( currTU, COMPONENT_Cb );
+    if( cbfMask & 1 )
+      m_CABACEstimator->residual_coding( currTU, COMPONENT_Cr );
+  }
+  else
+  {
+    if ( compID == COMPONENT_Cb )
+      m_CABACEstimator->cbf_comp( cs, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false );
+    else
+    {
+      const bool cbCbf    = TU::getCbf( currTU, COMPONENT_Cb );
+      const bool crCbf    = TU::getCbf( currTU, compID );
+      const int  cbfMask  = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 );
+      m_CABACEstimator->cbf_comp( cs, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf );
+      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
+    }
+  }
+
+  if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) )
+  {
+    m_CABACEstimator->residual_coding( currTU, compID );
+  }
+
+  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
+  return fracBits;
+}
+
+void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr)
+{
+  if (!tu.blocks[compID].valid())
+  {
+    return;
+  }
+
+  CodingStructure &cs                       = *tu.cs;
+  m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
+
+  const CompArea      &area                 = tu.blocks[compID];
+  const SPS           &sps                  = *cs.sps;
+  const PPS           &pps                  = *cs.pps;
+
+  const ChannelType    chType               = toChannelType(compID);
+  const int            bitDepth             = sps.getBitDepth(chType);
+
+  PelBuf         piOrg                      = cs.getOrgBuf    (area);
+  PelBuf         piPred                     = cs.getPredBuf   (area);
+  PelBuf         piResi                     = cs.getResiBuf   (area);
+  PelBuf         piOrgResi                  = cs.getOrgResiBuf(area);
+  PelBuf         piReco                     = cs.getRecoBuf   (area);
+
+  const PredictionUnit &pu                  = *cs.getPU(area.pos(), chType);
+  const uint32_t           uiChFinalMode        = PU::getFinalIntraMode(pu, chType);
+
+  const bool           bUseCrossCPrediction = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma( compID ) && PU::isChromaIntraModeCrossCheckMode( pu ) && checkCrossCPrediction;
+  const bool           ccUseRecoResi        = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate();
+
+
+  //===== init availability pattern =====
+  CHECK( tu.jointCbCr && compID == COMPONENT_Cr, "wrong combination of compID and jointCbCr" );
+  bool jointCbCr = tu.jointCbCr && compID == COMPONENT_Cb;
+
+  if (compID == COMPONENT_Y || (isChroma(compID) && tu.cu->bdpcmModeChroma))
+  {
+  PelBuf sharedPredTS( m_pSharedPredTransformSkip[compID], area );
+  if( default0Save1Load2 != 2 )
+  {
+    bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu, compID);
+    bool firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, compID, area);
+    CompArea areaPredReg(COMPONENT_Y, tu.chromaFormat, area);
+    if (tu.cu->ispMode && isLuma(compID))
+    {
+      if (predRegDiffFromTB)
+      {
+        if (firstTBInPredReg)
+        {
+          CU::adjustPredArea(areaPredReg);
+          initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco);
+        }
+      }
+      else
+        initIntraPatternChTypeISP(*tu.cu, area, piReco);
+    }
+    else
+    {
+      initIntraPatternChType(*tu.cu, area);
+    }
+
+    //===== get prediction signal =====
+    if(compID != COMPONENT_Y && !tu.cu->bdpcmModeChroma && PU::isLMCMode(uiChFinalMode))
+    {
+      {
+        xGetLumaRecPixels( pu, area );
+      }
+      predIntraChromaLM( compID, piPred, pu, area, uiChFinalMode );
+    }
+    else
+    {
+      if( PU::isMIP( pu, chType ) )
+      {
+        initIntraMip( pu, area );
+        predIntraMip( compID, piPred, pu );
+      }
+      else
+      {
+        if (predRegDiffFromTB)
+        {
+          if (firstTBInPredReg)
+          {
+            PelBuf piPredReg = cs.getPredBuf(areaPredReg);
+            predIntraAng(compID, piPredReg, pu);
+          }
+        }
+        else
+          predIntraAng(compID, piPred, pu);
+      }
+    }
+
+
+    // save prediction
+    if( default0Save1Load2 == 1 )
+    {
+      sharedPredTS.copyFrom( piPred );
+    }
+  }
+  else
+  {
+    // load prediction
+    piPred.copyFrom( sharedPredTS );
+  }
+  }
+
+
+  DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), uiChFinalMode );
+  //DTRACE_PEL_BUF( D_PRED, piPred, tu, tu.cu->predMode, COMPONENT_Y );
+
+  const Slice           &slice = *cs.slice;
+  bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag()));
+  if (isLuma(compID))
+  {
+  //===== get residual signal =====
+  piResi.copyFrom( piOrg  );
+  if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
+  {
+    CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+    PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
+    tmpPred.copyFrom(piPred);
+    piResi.rspSignal(m_pcReshape->getFwdLUT());
+    piResi.subtract(tmpPred);
+  }
+  else
+  piResi.subtract( piPred );
+
+  if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isLuma(compID))
+  {
+    piOrgResi.copyFrom (piResi);
+  }
+
+  if (bUseCrossCPrediction)
+  {
+    if (xCalcCrossComponentPredictionAlpha(tu, compID, ccUseRecoResi) == 0)
+    {
+      return;
+    }
+    CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, false);
+  }
+  }
+
+  //===== transform and quantization =====
+  //--- init rate estimation arrays for RDOQ ---
+  //--- transform and quantization           ---
+  TCoeff uiAbsSum = 0;
+
+  const QpParam cQP(tu, compID);
+
+#if RDOQ_CHROMA_LAMBDA
+  m_pcTrQuant->selectLambda(compID);
+#endif
+
+  flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
+  if (flag && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() )
+  {
+    int cResScaleInv = tu.getChromaAdj();
+    double cResScale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv;
+    m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cResScale*cResScale));
+  }
+
+  const CompArea &crArea = tu.blocks     [ COMPONENT_Cr ];
+  PelBuf          crOrg  = cs.getOrgBuf  ( crArea );
+  PelBuf          crPred = cs.getPredBuf ( crArea );
+  PelBuf          crResi = cs.getResiBuf ( crArea );
+  PelBuf          crReco = cs.getRecoBuf ( crArea );
+
+  if ( jointCbCr )
+  {
+    // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
+    const int    absIct = abs( TU::getICTMode(tu) );
+    const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
+    m_pcTrQuant->setLambda( lfact * m_pcTrQuant->getLambda() );
+  }
+  if ( sps.getJointCbCrEnabledFlag() && isChroma(compID) && (tu.cu->cs->slice->getSliceQp() > 18) )
+  {
+    m_pcTrQuant->setLambda( 1.3 * m_pcTrQuant->getLambda() );
+  }
+
+  if( isLuma(compID) )
+  {
+    if (trModes)
+    {
+      m_pcTrQuant->transformNxN(tu, compID, cQP, trModes, m_pcEncCfg->getMTSIntraMaxCand());
+      tu.mtsIdx[compID] = trModes->at(0).first;
+    }
+#if JVET_AHG14_LOSSLESS
+    if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) || tu.cu->bdpcmMode != 0 )
+    {
+      m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
+    }
+#else
+    m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
+#endif
+
+
+  DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum );
+
+  if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu))
+  {
+    // ISP has to have at least one non-zero CBF
+    ruiDist = MAX_INT;
+    return;
+  }
+
+#if JVET_AHG14_LOSSLESS
+  if( ( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0 ) && 0 == tu.cu->bdpcmMode )
+  {
+    uiAbsSum = 0;
+    tu.getCoeffs( compID ).fill( 0 );
+    TU::setCbfAtDepth( tu, compID, tu.depth, 0 );
+  }
+#endif
+
+  //--- inverse transform ---
+  if (uiAbsSum > 0)
+  {
+    m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
+  }
+  else
+  {
+    piResi.fill(0);
+  }
+  }
+  else // chroma
+  {
+    int         codedCbfMask  = 0;
+    ComponentID codeCompId    = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr) : compID);
+    const QpParam qpCbCr(tu, codeCompId);
+
+    if( tu.jointCbCr )
+    {
+      ComponentID otherCompId = ( codeCompId==COMPONENT_Cr ? COMPONENT_Cb : COMPONENT_Cr );
+      tu.getCoeffs( otherCompId ).fill(0); // do we need that?
+      TU::setCbfAtDepth (tu, otherCompId, tu.depth, false );
+    }
+    PelBuf& codeResi = ( codeCompId == COMPONENT_Cr ? crResi : piResi );
+    uiAbsSum = 0;
+
+    if (trModes)
+    {
+        m_pcTrQuant->transformNxN(tu, compID, qpCbCr, trModes, m_pcEncCfg->getMTSIntraMaxCand());
+        tu.mtsIdx[compID] = trModes->at(0).first;
+    }
+    // encoder bugfix: Set loadTr to aovid redundant transform process
+#if JVET_AHG14_LOSSLESS
+    if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0) || tu.cu->bdpcmModeChroma != 0)
+    {
+        m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
+    }
+#else
+    m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
+#endif
+
+#if JVET_AHG14_LOSSLESS
+    if ((m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && tu.mtsIdx[compID] == 0) && 0 == tu.cu->bdpcmModeChroma)
+    {
+        uiAbsSum = 0;
+        tu.getCoeffs(compID).fill(0);
+        TU::setCbfAtDepth(tu, compID, tu.depth, 0);
+    }
+#endif
+
+    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum );
+    if( uiAbsSum > 0 )
+    {
+      m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
+      codedCbfMask += ( codeCompId == COMPONENT_Cb ? 2 : 1 );
+    }
+    else
+    {
+      codeResi.fill(0);
+    }
+
+    if( tu.jointCbCr )
+    {
+      if( tu.jointCbCr == 3 && codedCbfMask == 2 )
+      {
+        codedCbfMask = 3;
+        TU::setCbfAtDepth (tu, COMPONENT_Cr, tu.depth, true );
+      }
+      if( tu.jointCbCr != codedCbfMask )
+      {
+        ruiDist = std::numeric_limits<Distortion>::max();
+        return;
+      }
+      m_pcTrQuant->invTransformICT( tu, piResi, crResi );
+      uiAbsSum = codedCbfMask;
+    }
+  }
+
+  //===== reconstruction =====
+  if ( flag && uiAbsSum > 0 && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() )
+  {
+    piResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(compID));
+    if( jointCbCr )
+    {
+      crResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+    }
   }
   if (bUseCrossCPrediction)
   {
-    CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, true);
+    CrossComponentPrediction::crossComponentPrediction(tu, compID, cs.getResiBuf(tu.Y()), piResi, piResi, true);
+    if( jointCbCr )
+    {
+      CrossComponentPrediction::crossComponentPrediction(tu, COMPONENT_Cr, cs.getResiBuf(tu.Y()), crResi, crResi, true);
+    }
+  }
+
+  if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
+  {
+    CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0,0), area.size());
+    PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
+    tmpPred.copyFrom(piPred);
+    piReco.reconstruct(tmpPred, piResi, cs.slice->clpRng(compID));
+  }
+  else
+  {
+    piReco.reconstruct(piPred, piResi, cs.slice->clpRng( compID ));
+    if( jointCbCr )
+    {
+      crReco.reconstruct(crPred, crResi, cs.slice->clpRng( COMPONENT_Cr ));
+    }
+  }
+
+
+  //===== update distortion =====
+#if WCG_EXT
+  if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs()
+    && slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
+  {
+    const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] );
+    if (compID == COMPONENT_Y  && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+    {
+      CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+      PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
+      tmpRecLuma.copyFrom(piReco);
+      tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
+      ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+    }
+    else
+    {
+      ruiDist += m_pcRdCost->getDistPart(piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma);
+      if( jointCbCr )
+      {
+        ruiDist += m_pcRdCost->getDistPart(crOrg, crReco, bitDepth, COMPONENT_Cr, DF_SSE_WTD, &orgLuma);
+      }
+    }
+  }
+  else
+#endif
+  {
+    ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE );
+    if( jointCbCr )
+    {
+      ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMPONENT_Cr, DF_SSE );
+    }
+  }
+}
+
+void IntraSearch::xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes, const bool loadTr)
+{
+  if (!tu.blocks[compID].valid())
+  {
+    CHECK(1, "tu does not exist");
+  }
+
+  CodingStructure     &cs = *tu.cs;
+  const SPS           &sps = *cs.sps;
+  const Slice         &slice = *cs.slice;
+  const CompArea      &area = tu.blocks[compID];
+  const CompArea &crArea = tu.blocks[COMPONENT_Cr];
+
+  PelBuf              piOrgResi = cs.getOrgResiBuf(area);
+  PelBuf              piResi = cs.getResiBuf(area);
+  PelBuf              crOrgResi = cs.getOrgResiBuf(crArea);
+  PelBuf              crResi = cs.getResiBuf(crArea);
+  TCoeff              uiAbsSum = 0;
+
+  CHECK(tu.jointCbCr && compID == COMPONENT_Cr, "wrong combination of compID and jointCbCr");
+  bool jointCbCr = tu.jointCbCr && compID == COMPONENT_Cb;
+
+  m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
+
+  m_pcTrQuant->lambdaAdjustColorTrans(true);
+
+  if (jointCbCr)
+  {
+    ComponentID compIdCode = (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr);
+    m_pcTrQuant->selectLambda(compIdCode);
+  }
+  else
+  {
+    m_pcTrQuant->selectLambda(compID);
+  }
+
+  bool flag = slice.getPicHeader()->getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag())) && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
+  if (flag && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
+  {
+    int    cResScaleInv = tu.getChromaAdj();
+    double cResScale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv;
+    m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cResScale*cResScale));
+  }
+
+  if (jointCbCr)
+  {
+    // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
+    const int    absIct = abs(TU::getICTMode(tu));
+    const double lfact = (absIct == 1 || absIct == 3 ? 0.8 : 0.5);
+    m_pcTrQuant->setLambda(lfact * m_pcTrQuant->getLambda());
+  }
+  if (sps.getJointCbCrEnabledFlag() && isChroma(compID) && (slice.getSliceQp() > 18))
+  {
+    m_pcTrQuant->setLambda(1.3 * m_pcTrQuant->getLambda());
+  }
+
+  if (isLuma(compID))
+  {
+    QpParam cQP(tu, compID);
+    for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+    {
+      cQP.Qps[qpIdx] = cQP.Qps[qpIdx] + (compID == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+      cQP.pers[qpIdx] = cQP.Qps[qpIdx] / 6;
+      cQP.rems[qpIdx] = cQP.Qps[qpIdx] % 6;
+    }
+
+    if (trModes)
+    {
+      m_pcTrQuant->transformNxN(tu, compID, cQP, trModes, m_pcEncCfg->getMTSIntraMaxCand());
+      tu.mtsIdx[compID] = trModes->at(0).first;
+    }
+    m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
+
+    if (uiAbsSum > 0)
+    {
+      m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
+    }
+    else
+    {
+      piResi.fill(0);
+    }
+  }
+  else
+  {
+    int         codedCbfMask = 0;
+    ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMPONENT_Cb : COMPONENT_Cr) : compID);
+    QpParam qpCbCr(tu, codeCompId);
+    for (int qpIdx = 0; qpIdx < 2; qpIdx++)
+    {
+      qpCbCr.Qps[qpIdx] = qpCbCr.Qps[qpIdx] + (codeCompId == COMPONENT_Cr ? DELTA_QP_FOR_Co : DELTA_QP_FOR_Y_Cg);
+      qpCbCr.pers[qpIdx] = qpCbCr.Qps[qpIdx] / 6;
+      qpCbCr.rems[qpIdx] = qpCbCr.Qps[qpIdx] % 6;
+    }
+
+    if (tu.jointCbCr)
+    {
+      ComponentID otherCompId = (codeCompId == COMPONENT_Cr ? COMPONENT_Cb : COMPONENT_Cr);
+      tu.getCoeffs(otherCompId).fill(0);
+      TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
+    }
+
+    PelBuf& codeResi = (codeCompId == COMPONENT_Cr ? crResi : piResi);
+    uiAbsSum = 0;
+    m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx());
+    if (uiAbsSum > 0)
+    {
+      m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
+      codedCbfMask += (codeCompId == COMPONENT_Cb ? 2 : 1);
+    }
+    else
+    {
+      codeResi.fill(0);
+    }
+
+    if (tu.jointCbCr)
+    {
+      if (tu.jointCbCr == 3 && codedCbfMask == 2)
+      {
+        codedCbfMask = 3;
+        TU::setCbfAtDepth(tu, COMPONENT_Cr, tu.depth, true);
+      }
+      if (tu.jointCbCr != codedCbfMask)
+      {
+        ruiDist = std::numeric_limits<Distortion>::max();
+        m_pcTrQuant->lambdaAdjustColorTrans(false);
+        return;
+      }
+      m_pcTrQuant->invTransformICT(tu, piResi, crResi);
+      uiAbsSum = codedCbfMask;
+    }
+  }
+
+  if (flag && uiAbsSum > 0 && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
+  {
+    piResi.scaleSignal(tu.getChromaAdj(), 0, slice.clpRng(compID));
+    if (jointCbCr)
+    {
+      crResi.scaleSignal(tu.getChromaAdj(), 0, slice.clpRng(COMPONENT_Cr));
+    }
+  }
+
+  m_pcTrQuant->lambdaAdjustColorTrans(false);
+
+  ruiDist += m_pcRdCost->getDistPart(piOrgResi, piResi, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
+  if (jointCbCr)
+  {
+    ruiDist += m_pcRdCost->getDistPart(crOrgResi, crResi, sps.getBitDepth(toChannelType(COMPONENT_Cr)), COMPONENT_Cr, DF_SSE);
+  }
+}
+
+bool IntraSearch::xIntraCodingLumaISP(CodingStructure& cs, Partitioner& partitioner, const double bestCostSoFar)
+{
+  int               subTuCounter = 0;
+  const CodingUnit& cu = *cs.getCU(partitioner.currArea().lumaPos(), partitioner.chType);
+  bool              earlySkipISP = false;
+  bool              splitCbfLuma = false;
+  const PartSplit   ispType = CU::getISPType(cu, COMPONENT_Y);
+
+  cs.cost = 0;
+
+  partitioner.splitCurrArea(ispType, cs);
+
+  CUCtx cuCtx;
+  cuCtx.isDQPCoded = true;
+  cuCtx.isChromaQpAdjCoded = true;
+
+  do   // subpartitions loop
+  {
+    uint32_t   numSig = 0;
+    Distortion singleDistTmpLuma = 0;
+    uint64_t   singleTmpFracBits = 0;
+    double     singleCostTmp = 0;
+
+    TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType), partitioner.chType);
+    tu.depth = partitioner.currTrDepth;
+
+    // Encode TU
+    xIntraCodingTUBlock(tu, COMPONENT_Y, false, singleDistTmpLuma, 0, &numSig);
+
+    if (singleDistTmpLuma == MAX_INT)   // all zero CBF skip
+    {
+      earlySkipISP = true;
+      partitioner.exitCurrSplit();
+      cs.cost = MAX_DOUBLE;
+      return false;
+    }
+
+    {
+      if (m_pcRdCost->calcRdCost(cs.fracBits, cs.dist + singleDistTmpLuma) > bestCostSoFar)
+      {
+        // The accumulated cost + distortion is already larger than the best cost so far, so it is not necessary to calculate the rate
+        earlySkipISP = true;
+      }
+      else
+      {
+        singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, false, subTuCounter, ispType, &cuCtx);
+      }
+      singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
+    }
+
+    cs.cost += singleCostTmp;
+    cs.dist += singleDistTmpLuma;
+    cs.fracBits += singleTmpFracBits;
+
+    subTuCounter++;
+
+    splitCbfLuma |= TU::getCbfAtDepth(*cs.getTU(partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1), COMPONENT_Y, partitioner.currTrDepth);
+    int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
+    if (subTuCounter < nSubPartitions)
+    {
+      // exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
+      if (cs.cost > bestCostSoFar)
+      {
+        earlySkipISP = true;
+        break;
+      }
+      else if (subTuCounter < nSubPartitions)
+      {
+        // more restrictive exit condition
+        double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
+        if (subTuCounter < nSubPartitions && cs.cost > bestCostSoFar * threshold)
+        {
+          earlySkipISP = true;
+          break;
+        }
+      }
+    }
+  } while (partitioner.nextPart(cs));   // subpartitions loop
+
+  partitioner.exitCurrSplit();
+  const UnitArea& currArea = partitioner.currArea();
+  const uint32_t  currDepth = partitioner.currTrDepth;
+
+  if (earlySkipISP)
+  {
+    cs.cost = MAX_DOUBLE;
+  }
+  else
+  {
+    cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
+    // The cost check is necessary here again to avoid superfluous operations if the maximum number of coded subpartitions was reached and yet ISP did not win
+    if (cs.cost < bestCostSoFar)
+    {
+      cs.setDecomp(cu.Y());
+      cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
+
+      for (auto& ptu : cs.tus)
+      {
+        if (currArea.Y().contains(ptu->Y()))
+        {
+          TU::setCbfAtDepth(*ptu, COMPONENT_Y, currDepth, splitCbfLuma ? 1 : 0);
+        }
+      }
+    }
+    else
+    {
+      earlySkipISP = true;
+    }
+  }
+  return !earlySkipISP;
+}
+
+
+bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst )
+{
+        int   subTuCounter = subTuIdx;
+  const UnitArea &currArea = partitioner.currArea();
+  const CodingUnit     &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType );
+        bool  earlySkipISP = false;
+  uint32_t currDepth       = partitioner.currTrDepth;
+  const SPS &sps           = *cs.sps;
+  const PPS &pps           = *cs.pps;
+  const bool keepResi      = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
+  bool bCheckFull          = true;
+  bool bCheckSplit         = false;
+  bCheckFull               = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
+  bCheckSplit              = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
+
+  if( cu.ispMode )
+  {
+    bCheckSplit = partitioner.canSplit( ispType, cs );
+    bCheckFull = !bCheckSplit;
+  }
+  uint32_t    numSig           = 0;
+
+  double     dSingleCost                        = MAX_DOUBLE;
+  Distortion uiSingleDistLuma                   = 0;
+  uint64_t   singleFracBits                     = 0;
+  bool       checkTransformSkip                 = sps.getTransformSkipEnabledFlag();
+  int        bestModeId[ MAX_NUM_COMPONENT ]    = { 0, 0, 0 };
+  uint8_t    nNumTransformCands                 = cu.mtsFlag ? 4 : 1;
+  uint8_t    numTransformIndexCands             = nNumTransformCands;
+
+  const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
+  TempCtx       ctxBest   ( m_CtxCache );
+
+  CodingStructure *csSplit = nullptr;
+  CodingStructure *csFull  = nullptr;
+
+  CUCtx cuCtx;
+  cuCtx.isDQPCoded = true;
+  cuCtx.isChromaQpAdjCoded = true;
+
+  if( bCheckSplit )
+  {
+    csSplit = &cs;
+  }
+  else if( bCheckFull )
+  {
+    csFull = &cs;
+  }
+
+  bool validReturnFull = false;
+
+  if( bCheckFull )
+  {
+    csFull->cost = 0.0;
+
+    TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType );
+    tu.depth = currDepth;
+
+    const bool tsAllowed  = TU::isTSAllowed( tu, COMPONENT_Y );
+    const bool mtsAllowed = CU::isMTSAllowed( cu, COMPONENT_Y );
+    std::vector<TrMode> trModes;
+
+    if( sps.getUseLFNST() )
+    {
+      checkTransformSkip &= tsAllowed;
+      checkTransformSkip &= !cu.mtsFlag;
+      checkTransformSkip &= !cu.lfnstIdx;
+
+      if( !cu.mtsFlag && checkTransformSkip )
+      {
+        trModes.push_back( TrMode( 0, true ) ); //DCT2
+        trModes.push_back( TrMode( 1, true ) ); //TS
+      }
+    }
+    else
+    {
+      nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests
+
+      trModes.push_back( TrMode( 0, true ) ); //DCT2
+      if( tsAllowed )
+      {
+        trModes.push_back( TrMode( 1, true ) );
+      }
+      if( mtsAllowed )
+      {
+        for( int i = 2; i < 6; i++ )
+        {
+          trModes.push_back( TrMode( i, true ) );
+        }
+      }
+    }
+
+    CHECK( !tu.Y().valid(), "Invalid TU" );
+
+    CodingStructure &saveCS = *m_pSaveCS[0];
+
+    TransformUnit *tmpTU = nullptr;
+
+    Distortion singleDistTmpLuma = 0;
+    uint64_t     singleTmpFracBits = 0;
+    double     singleCostTmp     = 0;
+    int        firstCheckId      = ( sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag ) ? mtsFirstCheckId : 0;
+
+    //we add the MTS candidates to the loop. TransformSkip will still be the last one to be checked (when modeId == lastCheckId) as long as checkTransformSkip is true
+    int        lastCheckId       = sps.getUseLFNST() ? ( ( mtsCheckRangeFlag && cu.mtsFlag ) ? ( mtsLastCheckId + ( int ) checkTransformSkip ) : ( numTransformIndexCands - ( firstCheckId + 1 ) + ( int ) checkTransformSkip ) ) :
+                                   trModes[ nNumTransformCands - 1 ].first;
+    bool isNotOnlyOneMode        = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1;
+
+    if( isNotOnlyOneMode )
+    {
+      saveCS.pcv     = cs.pcv;
+      saveCS.picture = cs.picture;
+      saveCS.area.repositionTo(cs.area);
+      saveCS.clearTUs();
+      tmpTU = &saveCS.addTU(currArea, partitioner.chType);
+    }
+
+    bool    cbfBestMode      = false;
+    bool    cbfBestModeValid = false;
+    bool    cbfDCT2  = true;
+
+    double bestDCT2cost = MAX_DOUBLE;
+    double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1;
+    for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ )
+    {
+      uint8_t transformIndex = modeId;
+
+      if( sps.getUseLFNST() )
+      {
+        if( ( transformIndex < lastCheckId ) || ( ( transformIndex == lastCheckId ) && !checkTransformSkip ) ) //we avoid this if the mode is transformSkip
+        {
+          // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far
+          if( m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid )
+          {
+            continue;
+          }
+        }
+      }
+      else
+      {
+#if JVET_AHG14_LOSSLESS
+        if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) )
+        {
+#endif
+        if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[ COMPONENT_Y ] == MTS_SKIP))
+        {
+          break;
+        }
+        if( !trModes[ modeId ].second )
+        {
+          continue;
+        }
+        //we compare the DCT-II cost against the best ISP cost so far (except for TS)
+        if (m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && trModes[modeId].first != MTS_DCT2_DCT2 && (trModes[modeId].first != MTS_SKIP || !tsAllowed) && bestDCT2cost > bestCostSoFar * threshold)
+        {
+          continue;
+        }
+#if JVET_AHG14_LOSSLESS
+        }
+#endif
+        tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
+      }
+
+
+      if ((modeId != firstCheckId) && isNotOnlyOneMode)
+      {
+        m_CABACEstimator->getCtx() = ctxStart;
+      }
+
+      int default0Save1Load2 = 0;
+      singleDistTmpLuma = 0;
+
+      if( modeId == firstCheckId && ( sps.getUseLFNST() ? ( modeId != lastCheckId ) : ( nNumTransformCands > 1 ) ) )
+      {
+        default0Save1Load2 = 1;
+      }
+      else if (modeId != firstCheckId)
+      {
+        if( sps.getUseLFNST() && !cbfBestModeValid )
+        {
+          default0Save1Load2 = 1;
+        }
+        else
+        {
+          default0Save1Load2 = 2;
+        }
+      }
+      if( cu.ispMode )
+      {
+        default0Save1Load2 = 0;
+      }
+      if( sps.getUseLFNST() )
+      {
+        if( cu.mtsFlag )
+        {
+          if( moreProbMTSIdxFirst )
+          {
+            const ChannelType     chType      = toChannelType( COMPONENT_Y );
+            const CompArea&       area        = tu.blocks[ COMPONENT_Y ];
+            const PredictionUnit& pu          = *cs.getPU( area.pos(), chType );
+            uint32_t              uiIntraMode = pu.intraDir[ chType ];
+
+            if( transformIndex == 1 )
+            {
+              tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
+            }
+            else if( transformIndex == 2 )
+            {
+              tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
+            }
+            else
+            {
+              tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
+            }
+          }
+          else
+          {
+            tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
+          }
+        }
+        else
+        {
+          tu.mtsIdx[COMPONENT_Y] = transformIndex;
+        }
+
+        if( !cu.mtsFlag && checkTransformSkip )
+        {
+          xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true );
+          if( modeId == 0 )
+          {
+            for( int i = 0; i < 2; i++ )
+            {
+              if( trModes[ i ].second )
+              {
+                lastCheckId = trModes[ i ].first;
+              }
+            }
+          }
+        }
+        else
+        {
+          xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig );
+        }
+      }
+      else
+      {
+        if( nNumTransformCands > 1 )
+        {
+          xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true );
+          if( modeId == 0 )
+          {
+            for( int i = 0; i < nNumTransformCands; i++ )
+            {
+              if( trModes[ i ].second )
+              {
+                lastCheckId = trModes[ i ].first;
+              }
+            }
+          }
+        }
+        else
+        {
+          xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig );
+        }
+      }
+
+      //----- determine rate and r-d cost -----
+      if( ( sps.getUseLFNST() ? ( modeId == lastCheckId && modeId != 0 && checkTransformSkip ) : ( trModes[ modeId ].first != 0 ) ) && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) )
+      {
+        //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
+        singleCostTmp = MAX_DOUBLE;
+      }
+      else
+      {
+        if( cu.ispMode && m_pcRdCost->calcRdCost( csFull->fracBits, csFull->dist + singleDistTmpLuma ) > bestCostSoFar )
+        {
+          earlySkipISP = true;
+        }
+        else
+        {
+          singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType, &cuCtx );
+        }
+        singleCostTmp     = m_pcRdCost->calcRdCost( singleTmpFracBits, singleDistTmpLuma );
+      }
+
+      if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId )
+      {
+        bestDCT2cost = singleCostTmp;
+      }
+
+      if (singleCostTmp < dSingleCost)
+      {
+        dSingleCost       = singleCostTmp;
+        uiSingleDistLuma  = singleDistTmpLuma;
+        singleFracBits    = singleTmpFracBits;
+
+        if( sps.getUseLFNST() )
+        {
+          bestModeId[ COMPONENT_Y ] = modeId;
+          cbfBestMode = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth );
+          cbfBestModeValid = true;
+          validReturnFull = true;
+        }
+        else
+        {
+          bestModeId[ COMPONENT_Y ] = trModes[ modeId ].first;
+          if( trModes[ modeId ].first == 0 )
+          {
+            cbfDCT2 = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth );
+          }
+        }
+
+        if( bestModeId[COMPONENT_Y] != lastCheckId )
+        {
+          saveCS.getPredBuf( tu.Y() ).copyFrom( csFull->getPredBuf( tu.Y() ) );
+          saveCS.getRecoBuf( tu.Y() ).copyFrom( csFull->getRecoBuf( tu.Y() ) );
+
+          if( keepResi )
+          {
+            saveCS.getResiBuf   ( tu.Y() ).copyFrom( csFull->getResiBuf   ( tu.Y() ) );
+            saveCS.getOrgResiBuf( tu.Y() ).copyFrom( csFull->getOrgResiBuf( tu.Y() ) );
+          }
+
+          tmpTU->copyComponentFrom( tu, COMPONENT_Y );
+
+          ctxBest = m_CABACEstimator->getCtx();
+        }
+      }
+    }
+
+    if( sps.getUseLFNST() && !validReturnFull )
+    {
+      csFull->cost = MAX_DOUBLE;
+
+      if( bCheckSplit )
+      {
+        ctxBest = m_CABACEstimator->getCtx();
+      }
+    }
+    else
+    {
+      if( bestModeId[COMPONENT_Y] != lastCheckId )
+      {
+        csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) );
+        csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) );
+
+        if( keepResi )
+        {
+          csFull->getResiBuf   ( tu.Y() ).copyFrom( saveCS.getResiBuf   ( tu.Y() ) );
+          csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) );
+        }
+
+        tu.copyComponentFrom( *tmpTU, COMPONENT_Y );
+
+        if( !bCheckSplit )
+        {
+          m_CABACEstimator->getCtx() = ctxBest;
+        }
+      }
+      else if( bCheckSplit )
+      {
+        ctxBest = m_CABACEstimator->getCtx();
+      }
+
+      csFull->cost     += dSingleCost;
+      csFull->dist     += uiSingleDistLuma;
+      csFull->fracBits += singleFracBits;
+    }
+  }
+
+  bool validReturnSplit = false;
+  if( bCheckSplit )
+  {
+    //----- store full entropy coding status, load original entropy coding status -----
+    if( bCheckFull )
+    {
+      m_CABACEstimator->getCtx() = ctxStart;
+    }
+    //----- code splitted block -----
+    csSplit->cost = 0;
+
+    bool uiSplitCbfLuma  = false;
+    bool splitIsSelected = true;
+    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
+    {
+      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
+    }
+
+    if( cu.ispMode )
+    {
+      partitioner.splitCurrArea( ispType, *csSplit );
+    }
+    do
+    {
+      bool tmpValidReturnSplit = xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType, false, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId );
+      subTuCounter += subTuCounter != -1 ? 1 : 0;
+      if( sps.getUseLFNST() && !tmpValidReturnSplit )
+      {
+        splitIsSelected = false;
+        break;
+      }
+
+      if( !cu.ispMode )
+      {
+        csSplit->setDecomp( partitioner.currArea().Y() );
+      }
+      else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) )
+      {
+        csSplit->setDecomp( cu.Y() );
+      }
+
+      uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth );
+      if( cu.ispMode )
+      {
+        //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
+        if( csSplit->cost > bestCostSoFar )
+        {
+          earlySkipISP    = true;
+          splitIsSelected = false;
+          break;
+        }
+        else
+        {
+          //more restrictive exit condition
+          bool tuIsDividedInRows = CU::divideTuInRows( cu );
+          int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth());
+          double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
+          if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold )
+          {
+            earlySkipISP    = true;
+            splitIsSelected = false;
+            break;
+          }
+        }
+      }
+
+
+
+    } while( partitioner.nextPart( *csSplit ) );
+
+    partitioner.exitCurrSplit();
+
+    if( splitIsSelected )
+    {
+      for( auto &ptu : csSplit->tus )
+      {
+        if( currArea.Y().contains( ptu->Y() ) )
+        {
+          TU::setCbfAtDepth( *ptu, COMPONENT_Y, currDepth, uiSplitCbfLuma ? 1 : 0 );
+        }
+      }
+
+      //----- restore context states -----
+      m_CABACEstimator->getCtx() = ctxStart;
+
+      cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] = false;
+      cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
+      cuCtx.lfnstLastScanPos = false;
+      cuCtx.violatesMtsCoeffConstraint = false;
+
+      //----- determine rate and r-d cost -----
+      csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType, &cuCtx );
+
+      //--- update cost ---
+      csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
+
+      validReturnSplit = true;
+    }
+  }
+
+  bool retVal = false;
+  if( csFull || csSplit )
+  {
+    if( !sps.getUseLFNST() || validReturnFull || validReturnSplit )
+    {
+      {
+        // otherwise this would've happened in useSubStructure
+        cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) );
+        cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) );
+      }
+
+      if( cu.ispMode && earlySkipISP )
+      {
+        cs.cost = MAX_DOUBLE;
+      }
+      else
+      {
+        cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
+        retVal = true;
+      }
+    }
+  }
+  return retVal;
+}
+
+bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &partitioner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst)
+{
+  const UnitArea &currArea = partitioner.currArea();
+  uint32_t       currDepth = partitioner.currTrDepth;
+  const Slice    &slice = *cs.slice;
+  const SPS      &sps = *cs.sps;
+
+  bool bCheckFull = !partitioner.canSplit(TU_MAX_TR_SPLIT, cs);
+  bool bCheckSplit = !bCheckFull;
+
+  TempCtx ctxStart(m_CtxCache, m_CABACEstimator->getCtx());
+  TempCtx ctxBest(m_CtxCache);
+
+  CodingStructure *csSplit = nullptr;
+  CodingStructure *csFull = nullptr;
+  if (bCheckSplit)
+  {
+    csSplit = &cs;
+  }
+  else if (bCheckFull)
+  {
+    csFull = &cs;
+  }
+
+  bool validReturnFull = false;
+
+  if (bCheckFull)
+  {
+    TransformUnit        &tu = csFull->addTU(CS::getArea(*csFull, currArea, partitioner.chType), partitioner.chType);
+    tu.depth = currDepth;
+    const CodingUnit     &cu = *csFull->getCU(tu.Y().pos(), CHANNEL_TYPE_LUMA);
+    const PredictionUnit &pu = *csFull->getPU(tu.Y().pos(), CHANNEL_TYPE_LUMA);
+    CHECK(!tu.Y().valid() || !tu.Cb().valid() || !tu.Cr().valid(), "Invalid TU");
+    CHECK(tu.cu != &cu, "wrong CU fetch");
+    CHECK(cu.ispMode, "adaptive color transform cannot be applied to ISP");
+    CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
+
+    // 1. intra prediction and forward color transform
+
+    PelUnitBuf orgBuf = csFull->getOrgBuf(tu);
+    PelUnitBuf predBuf = csFull->getPredBuf(tu);
+    PelUnitBuf resiBuf = csFull->getResiBuf(tu);
+    PelUnitBuf orgResiBuf = csFull->getOrgResiBuf(tu);
+
+    for (int i = 0; i < getNumberValidComponents(tu.chromaFormat); i++)
+    {
+      ComponentID          compID = (ComponentID)i;
+      const CompArea       &area = tu.blocks[compID];
+      const ChannelType    chType = toChannelType(compID);
+
+      PelBuf         piOrg = orgBuf.bufs[compID];
+      PelBuf         piPred = predBuf.bufs[compID];
+      PelBuf         piResi = resiBuf.bufs[compID];
+
+      initIntraPatternChType(*tu.cu, area);
+      if (PU::isMIP(pu, chType))
+      {
+        initIntraMip(pu, area);
+        predIntraMip(compID, piPred, pu);
+      }
+      else
+      {
+        predIntraAng(compID, piPred, pu);
+      }
+
+      piResi.copyFrom(piOrg);
+      if (slice.getPicHeader()->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
+      {
+        CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+        PelBuf   tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
+        tmpPred.copyFrom(piPred);
+        piResi.rspSignal(m_pcReshape->getFwdLUT());
+        piResi.subtract(tmpPred);
+      }
+      else
+        piResi.subtract(piPred);
+    }
+
+    resiBuf.colorSpaceConvert(orgResiBuf, true);
+
+    // 2. luma residual optimization 
+    double     dSingleCostLuma = MAX_DOUBLE;
+    bool       checkTransformSkip = sps.getTransformSkipEnabledFlag();
+    int        bestLumaModeId = 0;
+    uint8_t    nNumTransformCands = cu.mtsFlag ? 4 : 1;
+    uint8_t    numTransformIndexCands = nNumTransformCands;
+
+    const bool tsAllowed = TU::isTSAllowed(tu, COMPONENT_Y);
+    const bool mtsAllowed = CU::isMTSAllowed(cu, COMPONENT_Y);
+    std::vector<TrMode> trModes;
+
+    if (sps.getUseLFNST())
+    {
+      checkTransformSkip &= tsAllowed;
+      checkTransformSkip &= !cu.mtsFlag;
+      checkTransformSkip &= !cu.lfnstIdx;
+
+      if (!cu.mtsFlag && checkTransformSkip)
+      {
+        trModes.push_back(TrMode(0, true)); //DCT2
+        trModes.push_back(TrMode(1, true)); //TS
+      }
+    }
+    else
+    {
+      nNumTransformCands = 1 + (tsAllowed ? 1 : 0) + (mtsAllowed ? 4 : 0); // DCT + TS + 4 MTS = 6 tests
+
+      trModes.push_back(TrMode(0, true)); //DCT2
+      if (tsAllowed)
+      {
+        trModes.push_back(TrMode(1, true));
+      }
+      if (mtsAllowed)
+      {
+        for (int i = 2; i < 6; i++)
+        {
+          trModes.push_back(TrMode(i, true));
+        }
+      }
+    }
+
+    CodingStructure &saveLumaCS = *m_pSaveCS[0];
+    TransformUnit   *tmpTU = nullptr;
+    Distortion      singleDistTmpLuma = 0;
+    uint64_t        singleTmpFracBits = 0;
+    double          singleCostTmp = 0;
+    int             firstCheckId = (sps.getUseLFNST() && mtsCheckRangeFlag && cu.mtsFlag) ? mtsFirstCheckId : 0;
+    int             lastCheckId = sps.getUseLFNST() ? ((mtsCheckRangeFlag && cu.mtsFlag) ? (mtsLastCheckId + (int)checkTransformSkip) : (numTransformIndexCands - (firstCheckId + 1) + (int)checkTransformSkip)) : trModes[nNumTransformCands - 1].first;
+    bool            isNotOnlyOneMode = sps.getUseLFNST() ? lastCheckId != firstCheckId : nNumTransformCands != 1;
+
+    if (isNotOnlyOneMode)
+    {
+      saveLumaCS.pcv = csFull->pcv;
+      saveLumaCS.picture = csFull->picture;
+      saveLumaCS.area.repositionTo(csFull->area);
+      saveLumaCS.clearTUs();
+      tmpTU = &saveLumaCS.addTU(currArea, partitioner.chType);
+    }
+
+    bool    cbfBestMode = false;
+    bool    cbfBestModeValid = false;
+    bool    cbfDCT2 = true;
+
+    m_pcRdCost->lambdaAdjustColorTrans(true, COMPONENT_Y);
+
+    for (int modeId = firstCheckId; modeId <= lastCheckId; modeId++)
+    {
+      uint8_t transformIndex = modeId;
+      csFull->getResiBuf(tu.Y()).copyFrom(csFull->getOrgResiBuf(tu.Y()));
+
+      m_CABACEstimator->getCtx() = ctxStart;
+      m_CABACEstimator->resetBits();
+
+      if (sps.getUseLFNST())
+      {
+        if ((transformIndex < lastCheckId) || ((transformIndex == lastCheckId) && !checkTransformSkip)) //we avoid this if the mode is transformSkip
+        {
+          // Skip checking other transform candidates if zero CBF is encountered and it is the best transform so far
+          if (m_pcEncCfg->getUseFastLFNST() && transformIndex && !cbfBestMode && cbfBestModeValid)
+          {
+            continue;
+          }
+        }
+      }
+      else
+      {
+#if JVET_AHG14_LOSSLESS
+        if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING))
+        {
+#endif
+        if (!cbfDCT2 || (m_pcEncCfg->getUseTransformSkipFast() && bestLumaModeId == 1))
+        {
+          break;
+        }
+        if (!trModes[modeId].second)
+        {
+          continue;
+        }
+#if JVET_AHG14_LOSSLESS
+        }
+#endif
+        tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
+      }
+
+      singleDistTmpLuma = 0;
+      if (sps.getUseLFNST())
+      {
+        if (cu.mtsFlag)
+        {
+          if (moreProbMTSIdxFirst)
+          {
+            uint32_t uiIntraMode = pu.intraDir[CHANNEL_TYPE_LUMA];
+
+            if (transformIndex == 1)
+            {
+              tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
+            }
+            else if (transformIndex == 2)
+            {
+              tu.mtsIdx[COMPONENT_Y] = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
+            }
+            else
+            {
+              tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
+            }
+          }
+          else
+          {
+            tu.mtsIdx[COMPONENT_Y] = MTS_DST7_DST7 + transformIndex;
+          }
+        }
+        else
+        {
+          tu.mtsIdx[COMPONENT_Y] = transformIndex;
+        }
+
+        if (!cu.mtsFlag && checkTransformSkip)
+        {
+          xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true);
+          if (modeId == 0)
+          {
+            for (int i = 0; i < 2; i++)
+            {
+              if (trModes[i].second)
+              {
+                lastCheckId = trModes[i].first;
+              }
+            }
+          }
+        }
+        else
+        {
+          xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma);
+        }
+      }
+      else
+      {
+        if (nNumTransformCands > 1)
+        {
+          xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, modeId == 0 ? &trModes : nullptr, true);
+          if (modeId == 0)
+          {
+            for (int i = 0; i < nNumTransformCands; i++)
+            {
+              if (trModes[i].second)
+              {
+                lastCheckId = trModes[i].first;
+              }
+            }
+          }
+        }
+        else
+        {
+          xIntraCodingACTTUBlock(tu, COMPONENT_Y, singleDistTmpLuma);
+        }
+      }
+
+      //----- determine rate and r-d cost -----
+      if ((sps.getUseLFNST() ? (modeId == lastCheckId && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth))
+      {
+        //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
+        singleCostTmp = MAX_DOUBLE;
+      }
+      else
+      {
+        singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP);
+        singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
+      }
+
+      if (singleCostTmp < dSingleCostLuma)
+      {
+        dSingleCostLuma = singleCostTmp;
+        validReturnFull = true;
+
+        if (sps.getUseLFNST())
+        {
+          bestLumaModeId = modeId;
+          cbfBestMode = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
+          cbfBestModeValid = true;
+        }
+        else
+        {
+          bestLumaModeId = trModes[modeId].first;
+          if (trModes[modeId].first == 0)
+          {
+            cbfDCT2 = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
+          }
+        }
+
+        if (bestLumaModeId != lastCheckId)
+        {
+          saveLumaCS.getResiBuf(tu.Y()).copyFrom(csFull->getResiBuf(tu.Y()));
+          tmpTU->copyComponentFrom(tu, COMPONENT_Y);
+          ctxBest = m_CABACEstimator->getCtx();
+        }
+      }
+    }
+
+    m_pcRdCost->lambdaAdjustColorTrans(false, COMPONENT_Y);
+
+    if (sps.getUseLFNST())
+    {
+      if (!validReturnFull)
+      {
+        csFull->cost = MAX_DOUBLE;
+        return false;
+      }
+    }
+    else
+    {
+      CHECK(!validReturnFull, "no transform mode was tested for luma");
+    }
+
+    csFull->setDecomp(currArea.Y(), true);
+    csFull->setDecomp(currArea.Cb(), true);
+
+    if (bestLumaModeId != lastCheckId)
+    {
+      csFull->getResiBuf(tu.Y()).copyFrom(saveLumaCS.getResiBuf(tu.Y()));
+      tu.copyComponentFrom(*tmpTU, COMPONENT_Y);
+      m_CABACEstimator->getCtx() = ctxBest;
+    }
+
+    // 3 chroma residual optimization
+    CodingStructure &saveChromaCS = *m_pSaveCS[1];
+    saveChromaCS.pcv = csFull->pcv;
+    saveChromaCS.picture = csFull->picture;
+    saveChromaCS.area.repositionTo(csFull->area);
+    saveChromaCS.initStructData(MAX_INT, true);
+    tmpTU = &saveChromaCS.addTU(currArea, partitioner.chType);
+
+    CompArea&  cbArea = tu.blocks[COMPONENT_Cb];
+    CompArea&  crArea = tu.blocks[COMPONENT_Cr];
+
+    ctxStart = m_CABACEstimator->getCtx();
+    m_CABACEstimator->resetBits();
+    tu.jointCbCr = 0;
+
+    bool doReshaping = (slice.getPicHeader()->getLmcsEnabledFlag() && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && (slice.isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4));
+    if (doReshaping)
+    {
+      const Area      area = tu.Y().valid() ? tu.Y() : Area(recalcPosition(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].pos()), recalcSize(tu.chromaFormat, tu.chType, CHANNEL_TYPE_LUMA, tu.blocks[tu.chType].size()));
+      const CompArea &areaY = CompArea(COMPONENT_Y, tu.chromaFormat, area);
+      int             adj = m_pcReshape->calculateChromaAdjVpduNei(tu, areaY);
+      tu.setChromaAdj(adj);
+    }
+
+    CompStorage  orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp
+    orgResiCb[0].create(cbArea);
+    orgResiCr[0].create(crArea);
+    orgResiCb[0].copyFrom(csFull->getOrgResiBuf(cbArea));
+    orgResiCr[0].copyFrom(csFull->getOrgResiBuf(crArea));
+    if (doReshaping)
+    {
+      int cResScaleInv = tu.getChromaAdj();
+      orgResiCb[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cb));
+      orgResiCr[0].scaleSignal(cResScaleInv, 1, slice.clpRng(COMPONENT_Cr));
+    }
+
+    // 3.1 regular chroma residual coding
+    csFull->getResiBuf(cbArea).copyFrom(orgResiCb[0]);
+    csFull->getResiBuf(crArea).copyFrom(orgResiCr[0]);
+
+    for (uint32_t c = COMPONENT_Cb; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
+    {
+      const ComponentID compID = ComponentID(c);
+      Distortion singleDistChroma = 0;
+      xIntraCodingACTTUBlock(tu, compID, singleDistChroma);
+      xGetIntraFracBitsQTChroma(tu, compID);
+    }
+
+    Position tuPos = tu.Y();
+    tuPos.relativeTo(cu.Y());
+    const UnitArea relativeUnitArea(tu.chromaFormat, Area(tuPos, tu.Y().size()));
+    PelUnitBuf     invColorTransResidual = m_colorTransResiBuf.getBuf(relativeUnitArea);
+    csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false);
+
+    Distortion totalDist = 0;
+    for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
+    {
+      const ComponentID compID = ComponentID(c);
+      const CompArea&   area = tu.blocks[compID];
+      PelBuf            piOrg = csFull->getOrgBuf(area);
+      PelBuf            piReco = csFull->getRecoBuf(area);
+      PelBuf            piPred = csFull->getPredBuf(area);
+      PelBuf            piResi = invColorTransResidual.bufs[compID];
+
+      piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID));
+
+      if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs()
+        & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
+      {
+        const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]);
+        if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+        {
+          CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+          PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
+          tmpRecLuma.copyFrom(piReco);
+          tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
+          totalDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+        }
+        else
+        {
+          totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+        }
+      }
+      else
+      {
+        totalDist += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
+      }
+    }
+
+    m_CABACEstimator->getCtx() = ctxStart;
+    uint64_t totalBits = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP);
+    double   totalCost = m_pcRdCost->calcRdCost(totalBits, totalDist);
+
+    saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
+    saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
+    saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
+    tmpTU->copyComponentFrom(tu, COMPONENT_Cb);
+    tmpTU->copyComponentFrom(tu, COMPONENT_Cr);
+    ctxBest = m_CABACEstimator->getCtx();
+
+    // 3.2 jointCbCr
+    double     bestCostJointCbCr = totalCost;
+    Distortion bestDistJointCbCr = totalDist;
+    uint64_t   bestBitsJointCbCr = totalBits;
+    int        bestJointCbCr = tu.jointCbCr; assert(!bestJointCbCr);
+
+    bool       lastIsBest = false;
+    std::vector<int>  jointCbfMasksToTest;
+    if (sps.getJointCbCrEnabledFlag() && (TU::getCbf(tu, COMPONENT_Cb) || TU::getCbf(tu, COMPONENT_Cr)))
+    {
+      jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, orgResiCb, orgResiCr);
+    }
+
+    for (int cbfMask : jointCbfMasksToTest)
+    {
+      m_CABACEstimator->getCtx() = ctxStart;
+      m_CABACEstimator->resetBits();
+
+      Distortion distTmp = 0;
+      tu.jointCbCr = (uint8_t)cbfMask;
+
+      csFull->getResiBuf(cbArea).copyFrom(orgResiCb[cbfMask]);
+      csFull->getResiBuf(crArea).copyFrom(orgResiCr[cbfMask]);
+      xIntraCodingACTTUBlock(tu, COMPONENT_Cb, distTmp);
+
+      double   costTmp = std::numeric_limits<double>::max();
+      uint64_t bitsTmp = 0;
+      if (distTmp < std::numeric_limits<Distortion>::max())
+      {
+        csFull->getResiBuf(tu).colorSpaceConvert(invColorTransResidual, false);
+        distTmp = 0;
+        for (uint32_t c = COMPONENT_Y; c < ::getNumberValidTBlocks(*csFull->pcv); c++)
+        {
+          const ComponentID compID = ComponentID(c);
+          const CompArea&   area = tu.blocks[compID];
+          PelBuf            piOrg = csFull->getOrgBuf(area);
+          PelBuf            piReco = csFull->getRecoBuf(area);
+          PelBuf            piPred = csFull->getPredBuf(area);
+          PelBuf            piResi = invColorTransResidual.bufs[compID];
+
+          piReco.reconstruct(piPred, piResi, cs.slice->clpRng(compID));
+          if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs()
+            & slice.getPicHeader()->getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
+          {
+            const CPelBuf orgLuma = csFull->getOrgBuf(csFull->area.blocks[COMPONENT_Y]);
+            if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+            {
+              CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+              PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
+              tmpRecLuma.copyFrom(piReco);
+              tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
+              distTmp += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+            }
+            else
+            {
+              distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+            }
+          }
+          else
+          {
+            distTmp += m_pcRdCost->getDistPart(piOrg, piReco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
+          }
+        }
+
+        bitsTmp = xGetIntraFracBitsQT(*csFull, partitioner, true, true, -1, TU_NO_ISP);
+        costTmp = m_pcRdCost->calcRdCost(bitsTmp, distTmp);
+      }
+
+      if (costTmp < bestCostJointCbCr)
+      {
+        bestCostJointCbCr = costTmp;
+        bestDistJointCbCr = distTmp;
+        bestBitsJointCbCr = bitsTmp;
+        bestJointCbCr = tu.jointCbCr;
+        lastIsBest = (cbfMask == jointCbfMasksToTest.back());
+
+        // store data
+        if (!lastIsBest)
+        {
+          saveChromaCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
+          saveChromaCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
+          saveChromaCS.getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
+          tmpTU->copyComponentFrom(tu, COMPONENT_Cb);
+          tmpTU->copyComponentFrom(tu, COMPONENT_Cr);
+
+          ctxBest = m_CABACEstimator->getCtx();
+        }
+      }
+    }
+
+    if (!lastIsBest)
+    {
+      csFull->getResiBuf(cbArea).copyFrom(saveChromaCS.getResiBuf(cbArea));
+      csFull->getResiBuf(crArea).copyFrom(saveChromaCS.getResiBuf(crArea));
+      csFull->getRecoBuf(tu).copyFrom(saveChromaCS.getRecoBuf(tu));
+      tu.copyComponentFrom(*tmpTU, COMPONENT_Cb);
+      tu.copyComponentFrom(*tmpTU, COMPONENT_Cr);
+
+      m_CABACEstimator->getCtx() = ctxBest;
+    }
+    tu.jointCbCr = bestJointCbCr;
+    csFull->picture->getRecoBuf(tu).copyFrom(csFull->getRecoBuf(tu));
+
+    csFull->dist += bestDistJointCbCr;
+    csFull->fracBits += bestBitsJointCbCr;
+    csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist);
   }
 
-  if (slice.getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
-  {
-    CompArea      tmpArea(COMPONENT_Y, area.chromaFormat, Position(0,0), area.size());
-    PelBuf tmpPred = m_tmpStorageLCU.getBuf(tmpArea);
-    tmpPred.copyFrom(piPred);
-    piReco.reconstruct(tmpPred, piResi, cs.slice->clpRng(compID));
+  bool validReturnSplit = false;
+  if (bCheckSplit)
+  {
+    if (partitioner.canSplit(TU_MAX_TR_SPLIT, *csSplit))
+    {
+      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, *csSplit);
+    }
+
+    bool splitIsSelected = true;
+    do
+    {
+      bool tmpValidReturnSplit = xRecurIntraCodingACTQT(*csSplit, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
+      if (sps.getUseLFNST())
+      {
+        if (!tmpValidReturnSplit)
+        {
+          splitIsSelected = false;
+          break;
+        }
+      }
+      else
+      {
+        CHECK(!tmpValidReturnSplit, "invalid RD of sub-TU partitions for ACT");
+      }
+    } while (partitioner.nextPart(*csSplit));
+
+    partitioner.exitCurrSplit();
+
+    if (splitIsSelected)
+    {
+      unsigned compCbf[3] = { 0, 0, 0 };
+      for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType))
+      {
+        for (unsigned ch = 0; ch < getNumberValidTBlocks(*csSplit->pcv); ch++)
+        {
+          compCbf[ch] |= (TU::getCbfAtDepth(currTU, ComponentID(ch), currDepth + 1) ? 1 : 0);
+        }
+      }
+
+      for (auto &currTU : csSplit->traverseTUs(currArea, partitioner.chType))
+      {
+        TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]);
+        TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]);
+        TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]);
+      }
+
+      m_CABACEstimator->getCtx() = ctxStart;
+      csSplit->fracBits = xGetIntraFracBitsQT(*csSplit, partitioner, true, true, -1, TU_NO_ISP);
+      csSplit->cost = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
+
+      validReturnSplit = true;
+    }
   }
-  else
-  piReco.reconstruct(piPred, piResi, cs.slice->clpRng( compID ));
 
-  //===== update distortion =====
-#if WCG_EXT
-  if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getReshaper()
-    && slice.getReshapeInfo().getUseSliceReshaper() && (m_pcReshape->getCTUFlag() || (isChroma(compID) && m_pcEncCfg->getReshapeIntraCMD()))))
+  bool retVal = false;
+  if (csFull || csSplit)
   {
-    const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] );
-    if (compID == COMPONENT_Y  && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+    if (sps.getUseLFNST())
     {
-      CompArea      tmpArea1(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-      PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
-      tmpRecLuma.copyFrom(piReco);
-      tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
-      ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+      if (validReturnFull || validReturnSplit)
+      {
+        retVal = true;
+      }
     }
     else
-      ruiDist += m_pcRdCost->getDistPart(piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma);
-  }
-  else
-#endif
-  {
-    ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE );
+    {
+      CHECK(!validReturnFull && !validReturnSplit, "illegal TU optimization");
+      retVal = true;
+    }
   }
+  return retVal;
 }
 
-void IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinnder )
+ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType )
 {
-        int   subTuCounter = subTuIdx;
-  const UnitArea &currArea = partitioner.currArea();
-  const CodingUnit     &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType );
-        bool  earlySkipISP = false;
-  uint32_t currDepth       = partitioner.currTrDepth;
-  const PPS &pps           = *cs.pps;
-  const bool keepResi      = pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
-  bool bCheckFull          = true;
-  bool bCheckSplit         = false;
-  bCheckFull               = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
-  bCheckSplit              = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
-
-  if( cu.ispMode )
-  {
-    bCheckSplit = partitioner.canSplit( ispType, cs );
-    bCheckFull = !bCheckSplit;
-  }
-  uint32_t    numSig           = 0;
+  UnitArea currArea                   = partitioner.currArea();
+  const bool keepResi                 = cs.sps->getUseLMChroma() || KEEP_PRED_AND_RESI_SIGNALS;
+  if( !currArea.Cb().valid() ) return ChromaCbfs( false );
 
-  double     dSingleCost                        = MAX_DOUBLE;
-  Distortion uiSingleDistLuma                   = 0;
-  uint64_t   singleFracBits                     = 0;
-  int        bestModeId[MAX_NUM_COMPONENT]      = { 0, 0, 0 };
 
-  const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
-  TempCtx       ctxBest   ( m_CtxCache );
+  TransformUnit &currTU               = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
+  const PredictionUnit &pu            = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
 
-  CodingStructure *csSplit = nullptr;
-  CodingStructure *csFull  = nullptr;
+  bool lumaUsesISP                    = false;
+  uint32_t     currDepth                  = partitioner.currTrDepth;
+  const PPS &pps                      = *cs.pps;
+  ChromaCbfs cbfs                     ( false );
 
-  if( bCheckSplit )
-  {
-    csSplit = &cs;
-  }
-  else if( bCheckFull )
+  if (currDepth == currTU.depth)
   {
-    csFull = &cs;
-  }
+    if (!currArea.Cb().valid() || !currArea.Cr().valid())
+    {
+      return cbfs;
+    }
 
-  if( bCheckFull )
-  {
-    csFull->cost = 0.0;
 
-    TransformUnit &tu = csFull->addTU( CS::getArea( *csFull, currArea, partitioner.chType ), partitioner.chType );
-    tu.depth = currDepth;
+    CodingStructure &saveCS = *m_pSaveCS[1];
+    saveCS.pcv      = cs.pcv;
+    saveCS.picture  = cs.picture;
+    saveCS.area.repositionTo( cs.area );
+    saveCS.initStructData( MAX_INT, true );
 
-    const bool tsAllowed  = TU::isTSAllowed ( tu, COMPONENT_Y );
-    const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y );
-    uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests
-    std::vector<TrMode> trModes;
-    trModes.push_back( TrMode( 0, true ) ); //DCT2
-    if( tsAllowed )
-    {
-      trModes.push_back( TrMode( 1, true ) );
-    }
-    if( mtsAllowed )
+    if( !currTU.cu->isSepTree() && currTU.cu->ispMode )
     {
-      for( int i = 2; i < 6; i++ )
-      {
-        trModes.push_back( TrMode( i, true) );
-      }
+      saveCS.clearCUs();
+      CodingUnit& auxCU = saveCS.addCU( *currTU.cu, partitioner.chType );
+      auxCU.ispMode = currTU.cu->ispMode;
+      saveCS.sps = currTU.cs->sps;
+      saveCS.clearPUs();
+      saveCS.addPU( *currTU.cu->firstPU, partitioner.chType );
     }
 
-    CHECK( !tu.Y().valid(), "Invalid TU" );
+    TransformUnit &tmpTU = saveCS.addTU(currArea, partitioner.chType);
 
-    CodingStructure &saveCS = *m_pSaveCS[0];
 
-    TransformUnit *tmpTU = nullptr;
+    cs.setDecomp(currArea.Cb(), true); // set in advance (required for Cb2/Cr2 in 4:2:2 video)
 
-    Distortion singleDistTmpLuma = 0;
-    uint64_t     singleTmpFracBits = 0;
-    double     singleCostTmp     = 0;
-    int        firstCheckId      = 0;
+    const unsigned      numTBlocks  = ::getNumberValidTBlocks( *cs.pcv );
 
-    int       lastCheckId        = trModes[nNumTransformCands-1].first;
-    bool isNotOnlyOneMode        = nNumTransformCands != 1;
+    CompArea&  cbArea         = currTU.blocks[COMPONENT_Cb];
+    CompArea&  crArea         = currTU.blocks[COMPONENT_Cr];
+    double     bestCostCb     = MAX_DOUBLE;
+    double     bestCostCr     = MAX_DOUBLE;
+    Distortion bestDistCb     = 0;
+    Distortion bestDistCr     = 0;
+    int        maxModesTested = 0;
+    bool       earlyExitISP   = false;
 
-    if( isNotOnlyOneMode )
+    TempCtx ctxStartTU( m_CtxCache );
+    TempCtx ctxStart  ( m_CtxCache );
+    TempCtx ctxBest   ( m_CtxCache );
+
+    ctxStartTU       = m_CABACEstimator->getCtx();
+    currTU.jointCbCr = 0;
+
+    // Do predictions here to avoid repeating the "default0Save1Load2" stuff
+    int  predMode   = pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA);
+
+    PelBuf piPredCb = cs.getPredBuf(cbArea);
+    PelBuf piPredCr = cs.getPredBuf(crArea);
+
+    initIntraPatternChType( *currTU.cu, cbArea);
+    initIntraPatternChType( *currTU.cu, crArea);
+
+    if( PU::isLMCMode( predMode ) )
     {
-      saveCS.pcv     = cs.pcv;
-      saveCS.picture = cs.picture;
-      saveCS.area.repositionTo(cs.area);
-      saveCS.clearTUs();
-      tmpTU = &saveCS.addTU(currArea, partitioner.chType);
+      xGetLumaRecPixels( pu, cbArea );
+      predIntraChromaLM( COMPONENT_Cb, piPredCb, pu, cbArea, predMode );
+      predIntraChromaLM( COMPONENT_Cr, piPredCr, pu, crArea, predMode );
+    }
+    else
+    {
+      predIntraAng( COMPONENT_Cb, piPredCb, pu);
+      predIntraAng( COMPONENT_Cr, piPredCr, pu);
     }
 
-    bool    cbfDCT2  = true;
+    // determination of chroma residuals including reshaping and cross-component prediction
+    //----- get chroma residuals -----
+    PelBuf resiCb  = cs.getResiBuf(cbArea);
+    PelBuf resiCr  = cs.getResiBuf(crArea);
+    resiCb.copyFrom( cs.getOrgBuf (cbArea) );
+    resiCr.copyFrom( cs.getOrgBuf (crArea) );
+    resiCb.subtract( piPredCb );
+    resiCr.subtract( piPredCr );
+
+    //----- get reshape parameter ----
+    bool doReshaping = ( cs.picHeader->getLmcsEnabledFlag() && cs.picHeader->getLmcsChromaResidualScaleFlag()
+                         && (cs.slice->isIntra() || m_pcReshape->getCTUFlag()) && (cbArea.width * cbArea.height > 4) );
+    if( doReshaping )
+    {
+      const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CHANNEL_TYPE_LUMA, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CHANNEL_TYPE_LUMA, currTU.blocks[currTU.chType].size()));
+      const CompArea &areaY = CompArea(COMPONENT_Y, currTU.chromaFormat, area);
+      int adj = m_pcReshape->calculateChromaAdjVpduNei(currTU, areaY);
+      currTU.setChromaAdj(adj);
+    }
 
-    double bestDCT2cost = MAX_DOUBLE;
-    double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinnder && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1;
-    for( int modeId = firstCheckId; modeId < nNumTransformCands; modeId++ )
+    //----- get cross component prediction parameters -----
+    bool checkCrossComponentPrediction = PU::isChromaIntraModeCrossCheckMode( pu ) && pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf( currTU, COMPONENT_Y );
+    int  compAlpha[MAX_NUM_COMPONENT] = { 0, 0, 0 };
+    if( checkCrossComponentPrediction )
     {
-      if( !cbfDCT2 || ( m_pcEncCfg->getUseTransformSkipFast() && bestModeId[COMPONENT_Y] == 1 ) )
+      compAlpha[COMPONENT_Cb] = xCalcCrossComponentPredictionAlpha( currTU, COMPONENT_Cb, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() );
+      compAlpha[COMPONENT_Cr] = xCalcCrossComponentPredictionAlpha( currTU, COMPONENT_Cr, m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate() );
+      if( compAlpha[COMPONENT_Cb] == 0 && compAlpha[COMPONENT_Cr] == 0 )
       {
-        break;
+        checkCrossComponentPrediction = false;
       }
-      if( !trModes[modeId].second )
-      {
-        continue;
+    }
+
+    //===== store original residual signals (std and crossCompPred) =====
+    CompStorage  orgResiCb[5], orgResiCr[5]; // 0:std, 1-3:jointCbCr (placeholder at this stage), 4:crossComp
+    for( int k = 0; k < (checkCrossComponentPrediction?5:1); k+=4 )
+    {
+      orgResiCb[k].create( cbArea );
+      orgResiCr[k].create( crArea );
+      if( k >= 4 ) {
+        CrossComponentPrediction::crossComponentPrediction( currTU, COMPONENT_Cb, cs.getResiBuf(currTU.Y()), resiCb, orgResiCb[k], false);
+        CrossComponentPrediction::crossComponentPrediction( currTU, COMPONENT_Cr, cs.getResiBuf(currTU.Y()), resiCr, orgResiCr[k], false);
+      } else {
+        orgResiCb[k].copyFrom( resiCb );
+        orgResiCr[k].copyFrom( resiCr );
       }
-      //we compare the DCT-II cost against the best ISP cost so far (except for TS)
-      if ( m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinnder && trModes[modeId].first != 0 && ( trModes[modeId].first != 1 || !tsAllowed ) && bestDCT2cost > bestCostSoFar * threshold )
+      if( doReshaping )
       {
-        continue;
+        int cResScaleInv = currTU.getChromaAdj();
+        orgResiCb[k].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cb) );
+        orgResiCr[k].scaleSignal( cResScaleInv, 1, currTU.cu->cs->slice->clpRng(COMPONENT_Cr) );
       }
-      tu.mtsIdx = trModes[modeId].first;
+    }
 
-      if ((modeId != firstCheckId) && isNotOnlyOneMode)
+    for( uint32_t c = COMPONENT_Cb; c < numTBlocks; c++)
+    {
+      const ComponentID compID  = ComponentID(c);
+      const CompArea&   area    = currTU.blocks[compID];
+
+      double     dSingleCost    = MAX_DOUBLE;
+      int        bestModeId     = 0;
+      Distortion singleDistCTmp = 0;
+      double     singleCostTmp  = 0;
+      const int  crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1;
+      const bool tsAllowed = TU::isTSAllowed(currTU, compID) && (m_pcEncCfg->getUseChromaTS());
+      uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
+      std::vector<TrMode> trModes;
+      trModes.push_back(TrMode(0, true)); // DCT2
+
+      if (tsAllowed)
       {
-        m_CABACEstimator->getCtx() = ctxStart;
+          trModes.push_back(TrMode(1, true));//TS
       }
+      CHECK(!currTU.Cb().valid(), "Invalid TU");
+
+      const int  totalModesToTest            = crossCPredictionModesToTest * nNumTransformCands;
+      bool cbfDCT2 = true;
+      const bool isOneMode                   = false;
+      maxModesTested                         = totalModesToTest > maxModesTested ? totalModesToTest : maxModesTested;
 
+      int currModeId = 0;
       int default0Save1Load2 = 0;
-      singleDistTmpLuma = 0;
 
-      if( modeId == firstCheckId && nNumTransformCands > 1 )
-      {
-        default0Save1Load2 = 1;
-      }
-      else if (modeId != firstCheckId)
-      {
-        default0Save1Load2 = 2;
-      }
-      if( cu.ispMode )
+
+      if (!isOneMode)
       {
-        default0Save1Load2 = 0;
+        ctxStart = m_CABACEstimator->getCtx();
       }
-      if( nNumTransformCands > 1 )
+
+      for (int modeId = 0; modeId < nNumTransformCands; modeId++)
       {
-        xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig, modeId == 0 ? &trModes : nullptr, true );
-        if( modeId == 0 )
+        for (int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
         {
-          for( int i = 0; i < nNumTransformCands; i++ )
+          resiCb.copyFrom( orgResiCb[4*crossCPredictionModeId] );
+          resiCr.copyFrom( orgResiCr[4*crossCPredictionModeId] );
+
+          currTU.compAlpha    [compID] = ( crossCPredictionModeId ? compAlpha[compID] : 0 );
+
+          currTU.mtsIdx[compID] = currTU.cu->bdpcmModeChroma ? MTS_SKIP : trModes[modeId].first;
+
+          currModeId++;
+
+          const bool isFirstMode = (currModeId == 1);
+          const bool isLastMode  = false; // Always store output to saveCS and tmpTU
+
+#if JVET_AHG14_LOSSLESS
+          if( !( m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING ) )
+          {
+#endif
+           //if DCT2's cbf==0, skip ts search
+          if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
+          {
+              break;
+          }
+          if (!trModes[modeId].second)
+          {
+              continue;
+          }
+#if JVET_AHG14_LOSSLESS
+          }
+#endif
+
+          if (!isFirstMode) // if not first mode to be tested
+          {
+            m_CABACEstimator->getCtx() = ctxStart;
+          }
+
+          singleDistCTmp = 0;
+
+          if (nNumTransformCands > 1)
+          {
+              xIntraCodingTUBlock(currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2, nullptr, modeId == 0 ? &trModes : nullptr, true);
+          }
+          else
+          {
+              xIntraCodingTUBlock(currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2);
+          }
+
+          if (((crossCPredictionModeId == 1) && (currTU.compAlpha[compID] == 0)) || ((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmModeChroma) && !TU::getCbf(currTU, compID))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
+          {
+            singleCostTmp = MAX_DOUBLE;
+          }
+          else if( lumaUsesISP && bestCostSoFar != MAX_DOUBLE && c == COMPONENT_Cb )
+          {
+            uint64_t fracBitsTmp = xGetIntraFracBitsQTSingleChromaComponent( cs, partitioner, ComponentID( c ) );
+            singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp );
+            if( isOneMode || ( !isOneMode && !isLastMode ) )
+            {
+              m_CABACEstimator->getCtx() = ctxStart;
+            }
+          }
+          else if( !isOneMode )
+          {
+            uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma( currTU, compID );
+            singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp );
+          }
+
+          if( singleCostTmp < dSingleCost )
           {
-            if( trModes[i].second )
+            dSingleCost = singleCostTmp;
+            bestModeId  = currModeId;
+
+            if ( c == COMPONENT_Cb )
+            {
+              bestCostCb = singleCostTmp;
+              bestDistCb = singleDistCTmp;
+            }
+            else
+            {
+              bestCostCr = singleCostTmp;
+              bestDistCr = singleDistCTmp;
+            }
+
+            if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
+            {
+                cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
+            }
+
+            if( !isLastMode )
             {
-              lastCheckId = trModes[i].first;
+#if KEEP_PRED_AND_RESI_SIGNALS
+              saveCS.getPredBuf   (area).copyFrom(cs.getPredBuf   (area));
+              saveCS.getOrgResiBuf(area).copyFrom(cs.getOrgResiBuf(area));
+#endif
+              saveCS.getPredBuf   (area).copyFrom(cs.getPredBuf   (area));
+              if( keepResi )
+              {
+                saveCS.getResiBuf (area).copyFrom(cs.getResiBuf   (area));
+              }
+              saveCS.getRecoBuf   (area).copyFrom(cs.getRecoBuf   (area));
+
+              tmpTU.copyComponentFrom(currTU, compID);
+
+              ctxBest = m_CABACEstimator->getCtx();
             }
           }
         }
       }
-      else
-      {
-        xIntraCodingTUBlock( tu, COMPONENT_Y, false, singleDistTmpLuma, default0Save1Load2, &numSig );
-      }
 
-      //----- determine rate and r-d cost -----
-      if( ( trModes[modeId].first != 0 && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) ) )
-      {
-        //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
-        singleCostTmp = MAX_DOUBLE;
-      }
-      else
+      if( lumaUsesISP && dSingleCost > bestCostSoFar && c == COMPONENT_Cb )
       {
-        if( cu.ispMode && m_pcRdCost->calcRdCost( csFull->fracBits, csFull->dist + singleDistTmpLuma ) > bestCostSoFar )
-        {
-          earlySkipISP = true;
-        }
-        else
-        {
-          singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType );
-        }
-        singleCostTmp     = m_pcRdCost->calcRdCost( singleTmpFracBits, singleDistTmpLuma );
+        //Luma + Cb cost is already larger than the best cost, so we don't need to test Cr
+        cs.dist = MAX_UINT;
+        m_CABACEstimator->getCtx() = ctxStart;
+        earlyExitISP               = true;
+        break;
+        //return cbfs;
       }
 
-      if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId )
+      // Done with one component of separate coding of Cr and Cb, just switch to the best Cb contexts if Cr coding is still to be done
+      if ( c == COMPONENT_Cb && bestModeId < totalModesToTest)
       {
-        bestDCT2cost = singleCostTmp;
+        m_CABACEstimator->getCtx() = ctxBest;
+
+        currTU.copyComponentFrom(tmpTU, COMPONENT_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
       }
+    }
 
-      if (singleCostTmp < dSingleCost)
+    if ( !earlyExitISP )
+    {
+      // Test using joint chroma residual coding
+      double     bestCostCbCr   = bestCostCb + bestCostCr;
+      Distortion bestDistCbCr   = bestDistCb + bestDistCr;
+      int        bestJointCbCr  = 0;
+      bool       lastIsBest     = false;
+      std::vector<int>  jointCbfMasksToTest;
+      if ( cs.sps->getJointCbCrEnabledFlag() && (TU::getCbf(tmpTU, COMPONENT_Cb) || TU::getCbf(tmpTU, COMPONENT_Cr)))
       {
-        dSingleCost       = singleCostTmp;
-        uiSingleDistLuma  = singleDistTmpLuma;
-        singleFracBits    = singleTmpFracBits;
+        jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, orgResiCb, orgResiCr);
+      }
+      for( int cbfMask : jointCbfMasksToTest )
+      {
+        Distortion distTmp = 0;
+
+        currTU.jointCbCr               = (uint8_t)cbfMask;
+        currTU.compAlpha[COMPONENT_Cb] = 0;
+        currTU.compAlpha[COMPONENT_Cr] = 0;
+        // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode.
+        currTU.mtsIdx[COMPONENT_Cb] = currTU.mtsIdx[COMPONENT_Cr]  = MTS_DCT2_DCT2;
+        m_CABACEstimator->getCtx() = ctxStartTU;
 
-        bestModeId[COMPONENT_Y] = trModes[modeId].first;
-        if( trModes[modeId].first == 0 )
+        resiCb.copyFrom( orgResiCb[cbfMask] );
+        resiCr.copyFrom( orgResiCr[cbfMask] );
+        xIntraCodingTUBlock( currTU, COMPONENT_Cb, false, distTmp, 0 );
+
+        double costTmp = std::numeric_limits<double>::max();
+        if( distTmp < std::numeric_limits<Distortion>::max() )
         {
-          cbfDCT2  = TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth );
+          uint64_t bits  = xGetIntraFracBitsQTChroma( currTU, COMPONENT_Cb );
+          costTmp = m_pcRdCost->calcRdCost( bits, distTmp );
         }
 
-        if( bestModeId[COMPONENT_Y] != lastCheckId )
+        if( costTmp < bestCostCbCr )
         {
-          saveCS.getPredBuf( tu.Y() ).copyFrom( csFull->getPredBuf( tu.Y() ) );
-          saveCS.getRecoBuf( tu.Y() ).copyFrom( csFull->getRecoBuf( tu.Y() ) );
+          bestCostCbCr  = costTmp;
+          bestDistCbCr  = distTmp;
+          bestJointCbCr = currTU.jointCbCr;
 
-          if( keepResi )
+          // store data
+          if( cbfMask != jointCbfMasksToTest.back() )
           {
-            saveCS.getResiBuf   ( tu.Y() ).copyFrom( csFull->getResiBuf   ( tu.Y() ) );
-            saveCS.getOrgResiBuf( tu.Y() ).copyFrom( csFull->getOrgResiBuf( tu.Y() ) );
-          }
+#if KEEP_PRED_AND_RESI_SIGNALS
+            saveCS.getOrgResiBuf(cbArea).copyFrom(cs.getOrgResiBuf(cbArea));
+            saveCS.getOrgResiBuf(crArea).copyFrom(cs.getOrgResiBuf(crArea));
+#endif
+            saveCS.getPredBuf   (cbArea).copyFrom(cs.getPredBuf   (cbArea));
+            saveCS.getPredBuf   (crArea).copyFrom(cs.getPredBuf   (crArea));
+            if( keepResi )
+            {
+              saveCS.getResiBuf (cbArea).copyFrom(cs.getResiBuf   (cbArea));
+              saveCS.getResiBuf (crArea).copyFrom(cs.getResiBuf   (crArea));
+            }
+            saveCS.getRecoBuf   (cbArea).copyFrom(cs.getRecoBuf   (cbArea));
+            saveCS.getRecoBuf   (crArea).copyFrom(cs.getRecoBuf   (crArea));
 
-          tmpTU->copyComponentFrom( tu, COMPONENT_Y );
+            tmpTU.copyComponentFrom(currTU, COMPONENT_Cb);
+            tmpTU.copyComponentFrom(currTU, COMPONENT_Cr);
 
-          ctxBest = m_CABACEstimator->getCtx();
+            ctxBest = m_CABACEstimator->getCtx();
+          }
+          else
+          {
+            lastIsBest = true;
+          }
         }
       }
-    }
-
-    if( bestModeId[COMPONENT_Y] != lastCheckId )
-    {
-      csFull->getPredBuf( tu.Y() ).copyFrom( saveCS.getPredBuf( tu.Y() ) );
-      csFull->getRecoBuf( tu.Y() ).copyFrom( saveCS.getRecoBuf( tu.Y() ) );
 
-      if( keepResi )
+      // Retrieve the best CU data (unless it was the very last one tested)
+      if ( !( maxModesTested == 1 && jointCbfMasksToTest.empty() ) && !lastIsBest )
       {
-        csFull->getResiBuf   ( tu.Y() ).copyFrom( saveCS.getResiBuf   ( tu.Y() ) );
-        csFull->getOrgResiBuf( tu.Y() ).copyFrom( saveCS.getOrgResiBuf( tu.Y() ) );
-      }
+#if KEEP_PRED_AND_RESI_SIGNALS
+        cs.getPredBuf   (cbArea).copyFrom(saveCS.getPredBuf   (cbArea));
+        cs.getOrgResiBuf(cbArea).copyFrom(saveCS.getOrgResiBuf(cbArea));
+        cs.getPredBuf   (crArea).copyFrom(saveCS.getPredBuf   (crArea));
+        cs.getOrgResiBuf(crArea).copyFrom(saveCS.getOrgResiBuf(crArea));
+#endif
+        cs.getPredBuf   (cbArea).copyFrom(saveCS.getPredBuf   (cbArea));
+        cs.getPredBuf   (crArea).copyFrom(saveCS.getPredBuf   (crArea));
 
-      tu.copyComponentFrom( *tmpTU, COMPONENT_Y );
+        if( keepResi )
+        {
+          cs.getResiBuf (cbArea).copyFrom(saveCS.getResiBuf   (cbArea));
+          cs.getResiBuf (crArea).copyFrom(saveCS.getResiBuf   (crArea));
+        }
+        cs.getRecoBuf   (cbArea).copyFrom(saveCS.getRecoBuf   (cbArea));
+        cs.getRecoBuf   (crArea).copyFrom(saveCS.getRecoBuf   (crArea));
+
+        currTU.copyComponentFrom(tmpTU, COMPONENT_Cb);
+        currTU.copyComponentFrom(tmpTU, COMPONENT_Cr);
 
-      if( !bCheckSplit )
-      {
         m_CABACEstimator->getCtx() = ctxBest;
       }
-    }
-    else if( bCheckSplit )
-    {
-      ctxBest = m_CABACEstimator->getCtx();
-    }
 
-    csFull->cost     += dSingleCost;
-    csFull->dist     += uiSingleDistLuma;
-    csFull->fracBits += singleFracBits;
-  }
+      // Copy results to the picture structures
+      cs.picture->getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
+      cs.picture->getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
+      cs.picture->getPredBuf(cbArea).copyFrom(cs.getPredBuf(cbArea));
+      cs.picture->getPredBuf(crArea).copyFrom(cs.getPredBuf(crArea));
 
-  if( bCheckSplit )
-  {
-    //----- store full entropy coding status, load original entropy coding status -----
-    if( bCheckFull )
-    {
-      m_CABACEstimator->getCtx() = ctxStart;
+      cbfs.cbf(COMPONENT_Cb) = TU::getCbf(currTU, COMPONENT_Cb);
+      cbfs.cbf(COMPONENT_Cr) = TU::getCbf(currTU, COMPONENT_Cr);
+
+      currTU.jointCbCr = ( (cbfs.cbf(COMPONENT_Cb) + cbfs.cbf(COMPONENT_Cr)) ? bestJointCbCr : 0 );
+      cs.dist         += bestDistCbCr;
     }
-    //----- code splitted block -----
-    csSplit->cost = 0;
+  }
+  else
+  {
+    unsigned    numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
+    ChromaCbfs  SplitCbfs         ( false );
 
-    bool uiSplitCbfLuma  = false;
-    bool splitIsSelected = true;
     if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
     {
       partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
     }
-
-    if( cu.ispMode )
+    else if( currTU.cu->ispMode )
     {
-      partitioner.splitCurrArea( ispType, *csSplit );
+      partitioner.splitCurrArea( ispType, cs );
     }
+    else
+      THROW( "Implicit TU split not available" );
+
     do
     {
-      xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType );
-      subTuCounter += subTuCounter != -1 ? 1 : 0;
+      ChromaCbfs subCbfs = xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
 
-      if( !cu.ispMode )
-      {
-        csSplit->setDecomp( partitioner.currArea().Y() );
-      }
-      else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) )
+      for( uint32_t ch = COMPONENT_Cb; ch < numValidTBlocks; ch++ )
       {
-        csSplit->setDecomp( cu.Y() );
+        const ComponentID compID = ComponentID( ch );
+        SplitCbfs.cbf( compID ) |= subCbfs.cbf( compID );
       }
+    } while( partitioner.nextPart( cs ) );
 
-      uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth );
-      if( cu.ispMode )
+    partitioner.exitCurrSplit();
+
+    if( lumaUsesISP && cs.dist == MAX_UINT )
+    {
+      return cbfs;
+    }
+    {
+
+      cbfs.Cb |= SplitCbfs.Cb;
+      cbfs.Cr |= SplitCbfs.Cr;
+
+      if( !lumaUsesISP )
       {
-        //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
-        if( csSplit->cost > bestCostSoFar )
-        {
-          earlySkipISP    = true;
-          splitIsSelected = false;
-          break;
-        }
-        else
+        for( auto &ptu : cs.tus )
         {
-          //more restrictive exit condition
-          bool tuIsDividedInRows = CU::divideTuInRows( cu );
-          int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> g_aucLog2[cu.firstTU->lheight()] : cu.lwidth() >> g_aucLog2[cu.firstTU->lwidth()];
-          double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
-          if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold )
+          if( currArea.Cb().contains( ptu->Cb() ) || ( !ptu->Cb().valid() && currArea.Y().contains( ptu->Y() ) ) )
           {
-            earlySkipISP    = true;
-            splitIsSelected = false;
-            break;
+            TU::setCbfAtDepth( *ptu, COMPONENT_Cb, currDepth, SplitCbfs.Cb );
+            TU::setCbfAtDepth( *ptu, COMPONENT_Cr, currDepth, SplitCbfs.Cr );
           }
         }
       }
+    }
+  }
 
+  return cbfs;
+}
 
+uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType)
+{
+  uint32_t orgMode = uiMode;
 
-    } while( partitioner.nextPart( *csSplit ) );
+  if (!pu.ciipFlag)
+  std::swap(orgMode, pu.intraDir[chType]);
 
-    partitioner.exitCurrSplit();
+  m_CABACEstimator->resetBits();
 
-    if( splitIsSelected )
+  if( isLuma( chType ) )
+  {
+    if (!pu.ciipFlag)
     {
-      for( auto &ptu : csSplit->tus )
-      {
-        if( currArea.Y().contains( ptu->Y() ) )
-        {
-          TU::setCbfAtDepth( *ptu, COMPONENT_Y, currDepth, uiSplitCbfLuma ? 1 : 0 );
-        }
-      }
+      m_CABACEstimator->intra_luma_pred_mode(pu);
+    }
+  }
+  else
+  {
+    m_CABACEstimator->intra_chroma_pred_mode( pu );
+  }
 
-      //----- restore context states -----
-      m_CABACEstimator->getCtx() = ctxStart;
+  if ( !pu.ciipFlag )
+  std::swap(orgMode, pu.intraDir[chType]);
 
-      //----- determine rate and r-d cost -----
-      csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType );
+  return m_CABACEstimator->getEstFracBits();
+}
 
-      //--- update cost ---
-      csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
-    }
+void IntraSearch::sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum)
+{
+  if (candNum == 0)
+  {
+    rdModeList[0] = mode;
+    rdCostList[0] = cost;
+    bdpcmModeList[0] = bdpcmMode;
+    candNum++;
+    return;
   }
 
-  if( csFull || csSplit )
+  int insertPos = -1;
+  for (int pos = candNum - 1; pos >= 0; pos--)
   {
+    if (cost < rdCostList[pos])
     {
-      // otherwise this would've happened in useSubStructure
-      cs.picture->getRecoBuf( currArea.Y() ).copyFrom( cs.getRecoBuf( currArea.Y() ) );
-      cs.picture->getPredBuf( currArea.Y() ).copyFrom( cs.getPredBuf( currArea.Y() ) );
+      insertPos = pos;
     }
+  }
 
-    if( cu.ispMode && earlySkipISP )
-    {
-      cs.cost = MAX_DOUBLE;
-    }
-    else
+  if (insertPos >= 0)
+  {
+    for (int i = candNum - 1; i >= insertPos; i--)
     {
-      cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
+      rdModeList[i + 1] = rdModeList[i];
+      rdCostList[i + 1] = rdCostList[i];
+      bdpcmModeList[i + 1] = bdpcmModeList[i];
     }
+    rdModeList[insertPos] = mode;
+    rdCostList[insertPos] = cost;
+    bdpcmModeList[insertPos] = bdpcmMode;
+    candNum++;
+  }
+  else
+  {
+    rdModeList[candNum] = mode;
+    rdCostList[candNum] = cost;
+    bdpcmModeList[candNum] = bdpcmMode;
+    candNum++;
   }
-}
-
-ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& partitioner, const double bestCostSoFar, const PartSplit ispType )
-{
-  UnitArea currArea                   = partitioner.currArea();
-  const bool keepResi                 = cs.sps->getUseLMChroma() || KEEP_PRED_AND_RESI_SIGNALS;
-  if( !currArea.Cb().valid() ) return ChromaCbfs( false );
 
+  CHECK(candNum > FAST_UDI_MAX_RDMODE_NUM, "exceed intra mode candidate list capacity");
 
-  TransformUnit &currTU               = *cs.getTU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
-  const PredictionUnit &pu            = *cs.getPU( currArea.chromaPos(), CHANNEL_TYPE_CHROMA );
+  return;
+}
 
-  bool lumaUsesISP                    = !CS::isDualITree( cs ) && currTU.cu->ispMode;
-  uint32_t     currDepth                  = partitioner.currTrDepth;
-  const PPS &pps                      = *cs.pps;
-  ChromaCbfs cbfs                     ( false );
+void IntraSearch::invalidateBestRdModeFirstColorSpace()
+{
+  int numSaveRdClass = 4 * NUM_LFNST_NUM_PER_SET * 2;
+  int savedRdModeListSize = FAST_UDI_MAX_RDMODE_NUM;
 
-  if (currDepth == currTU.depth)
+  for (int i = 0; i < numSaveRdClass; i++)
   {
-    if (!currArea.Cb().valid() || !currArea.Cr().valid())
+    m_numSavedRdModeFirstColorSpace[i] = 0;
+    for (int j = 0; j < savedRdModeListSize; j++)
     {
-      return cbfs;
+      m_savedRdModeFirstColorSpace[i][j] = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
+      m_savedBDPCMModeFirstColorSpace[i][j] = 0;
+      m_savedRdCostFirstColorSpace[i][j] = MAX_DOUBLE;
     }
+  }
+}
 
+template<typename T, size_t N>
+void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const PredictionUnit &pu, const bool fastMip)
+{
+  const int maxCandPerType = numModesForFullRD >> 1;
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
+  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
+  const double minCost = candCostList[0];
+  bool keepOneMip = candModeList.size() > numModesForFullRD;
+
+  int numConv = 0;
+  int numMip = 0;
+  for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
+  {
+    bool addMode = false;
+    const ModeInfo& orgMode = candModeList[idx];
 
-    CodingStructure &saveCS = *m_pSaveCS[1];
-    saveCS.pcv      = cs.pcv;
-    saveCS.picture  = cs.picture;
-    saveCS.area.repositionTo( cs.area );
-    saveCS.initStructData( MAX_INT, false, true );
-
-    if( !CS::isDualITree( cs ) && currTU.cu->ispMode )
+    if (!orgMode.mipFlg)
     {
-      saveCS.clearCUs();
-      CodingUnit& auxCU = saveCS.addCU( *currTU.cu, partitioner.chType );
-      auxCU.ispMode = currTU.cu->ispMode;
-      saveCS.sps = currTU.cs->sps;
-      saveCS.clearPUs();
-      saveCS.addPU( *currTU.cu->firstPU, partitioner.chType );
+      addMode = (numConv < 3);
+      numConv += addMode ? 1:0;
     }
+    else
+    {
+      addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
+      keepOneMip = false;
+      numMip += addMode ? 1:0;
+    }
+    if( addMode )
+    {
+      tempRdModeList.push_back(orgMode);
+      tempCandCostList.push_back(candCostList[idx]);
+    }
+  }
 
-    TransformUnit &tmpTU = saveCS.addTU(currArea, partitioner.chType);
+  if ((pu.lwidth() > 8 && pu.lheight() > 8))
+  {
+    // Sort MIP candidates by Hadamard cost
+    const int transpOff = getNumModesMip( pu.Y() );
+    static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
+    static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
+    for( uint8_t mode : { 0, 1, 2 } )
+    {
+      uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
+      updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
+    }
 
+    // Append MIP mode to RD mode list
+    const int modeListSize = int(tempRdModeList.size());
+    for (int idx = 0; idx < 3; idx++)
+    {
+      const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
+      const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
+      const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
+      bool alreadyIncluded = false;
+      for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
+      {
+        if (tempRdModeList[modeListIdx] == mipMode)
+        {
+          alreadyIncluded = true;
+          break;
+        }
+      }
 
-    cs.setDecomp(currArea.Cb(), true); // set in advance (required for Cb2/Cr2 in 4:2:2 video)
+      if (!alreadyIncluded)
+      {
+        tempRdModeList.push_back(mipMode);
+        tempCandCostList.push_back(0);
+        if( fastMip ) break;
+      }
+    }
+  }
 
-    const unsigned      numTBlocks  = ::getNumberValidTBlocks( *cs.pcv );
+  candModeList = tempRdModeList;
+  candCostList = tempCandCostList;
+  numModesForFullRD = int(candModeList.size());
+}
 
-    for( uint32_t c = COMPONENT_Cb; c < numTBlocks; c++)
-    {
-      const ComponentID compID  = ComponentID(c);
-      const CompArea&   area    = currTU.blocks[compID];
+// It decides which modes from the ISP lists can be full RD tested
+void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize)
+{
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>* rdModeLists[2] = { &m_ispCandListHor, &m_ispCandListVer };
 
-      double     dSingleCost    = MAX_DOUBLE;
-      int        bestModeId     = 0;
-      Distortion singleDistC    = 0;
-      Distortion singleDistCTmp = 0;
-      double     singleCostTmp  = 0;
+  const int curIspLfnstIdx = m_curIspLfnstIdx;
+  if (curIspLfnstIdx >= NUM_LFNST_NUM_PER_SET)
+  {
+    //All lfnst indices have been checked
+    return;
+  }
 
-      const bool checkCrossComponentPrediction = PU::isChromaIntraModeCrossCheckMode( pu ) && pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && TU::getCbf( currTU, COMPONENT_Y );
+  ISPType nextISPcandSplitType;
+  auto& ispTestedModes = m_ispTestedModes[curIspLfnstIdx];
+  const bool horSplitIsTerminated = ispTestedModes.splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1];
+  const bool verSplitIsTerminated = ispTestedModes.splitIsFinished[VER_INTRA_SUBPARTITIONS - 1];
+  if (!horSplitIsTerminated && !verSplitIsTerminated)
+  {
+    nextISPcandSplitType = !lastMode ? HOR_INTRA_SUBPARTITIONS : lastMode->ispMod == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
+  }
+  else if (!horSplitIsTerminated && verSplitIsTerminated)
+  {
+    nextISPcandSplitType = HOR_INTRA_SUBPARTITIONS;
+  }
+  else if (horSplitIsTerminated && !verSplitIsTerminated)
+  {
+    nextISPcandSplitType = VER_INTRA_SUBPARTITIONS;
+  }
+  else
+  {
+    xFinishISPModes();
+    return;   // no more modes will be tested
+  }
 
-      const int  crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1;
-      const int  totalModesToTest            = crossCPredictionModesToTest;
-      const bool isOneMode                   = (totalModesToTest == 1);
+  int maxNumSubPartitions = ispTestedModes.numTotalParts[nextISPcandSplitType - 1];
 
-      int currModeId = 0;
-      int default0Save1Load2 = 0;
+  // We try to break the split here for lfnst > 0 according to the first mode 
+  if (curIspLfnstIdx > 0 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] == 1)
+  {
+    int firstModeThisSplit = ispTestedModes.getTestedIntraMode(nextISPcandSplitType, 0);
+    int numSubPartsFirstModeThisSplit = ispTestedModes.getNumCompletedSubParts(nextISPcandSplitType, firstModeThisSplit);
+    CHECK(numSubPartsFirstModeThisSplit < 0, "wrong number of subpartitions!");
+    bool stopThisSplit = false;
+    bool stopThisSplitAllLfnsts = false;
+    if (numSubPartsFirstModeThisSplit < maxNumSubPartitions)
+    {
+      stopThisSplit = true;
+      if (m_pcEncCfg->getUseFastISP() && curIspLfnstIdx == 1 && numSubPartsFirstModeThisSplit < maxNumSubPartitions - 1)
+      {
+        stopThisSplitAllLfnsts = true;
+      }
+    }
 
-      TempCtx ctxStart  ( m_CtxCache );
-      TempCtx ctxBest   ( m_CtxCache );
+    if (stopThisSplit)
+    {
+      ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
+      if (curIspLfnstIdx == 1 && stopThisSplitAllLfnsts)
+      {
+        m_ispTestedModes[2].splitIsFinished[nextISPcandSplitType - 1] = true;
+      }
+      return;
+    }
+  }
 
-      if (!isOneMode)
+  // We try to break the split here for lfnst = 0 or all lfnst indices according to the first two modes 
+  if (curIspLfnstIdx == 0 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] == 2)
+  {
+    // Split stop criteria after checking the performance of previously tested intra modes
+    const int thresholdSplit1 = maxNumSubPartitions;
+    bool stopThisSplit = false;
+    bool stopThisSplitForAllLFNSTs = false;
+    const int thresholdSplit1ForAllLFNSTs = maxNumSubPartitions - 1;
+
+    int mode1 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0);
+    mode1 = mode1 == DC_IDX ? -1 : mode1;
+    int numSubPartsBestMode1 = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1;
+    int mode2 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1);
+    mode2 = mode2 == DC_IDX ? -1 : mode2;
+    int numSubPartsBestMode2 = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1;
+
+    // 1) The 2 most promising modes do not reach a certain number of sub-partitions
+    if (numSubPartsBestMode1 != -1 && numSubPartsBestMode2 != -1)
+    {
+      if (numSubPartsBestMode1 < thresholdSplit1 && numSubPartsBestMode2 < thresholdSplit1)
       {
-        ctxStart = m_CABACEstimator->getCtx();
+        stopThisSplit = true;
+        if (curIspLfnstIdx == 0 && numSubPartsBestMode1 < thresholdSplit1ForAllLFNSTs && numSubPartsBestMode2 < thresholdSplit1ForAllLFNSTs)
+        {
+          stopThisSplitForAllLFNSTs = true;
+        }
       }
-
+      else
       {
-        for (int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
+        //we stop also if the cost is MAX_DOUBLE for both modes
+        double mode1Cost = ispTestedModes.getRDCost(nextISPcandSplitType, mode1);
+        double mode2Cost = ispTestedModes.getRDCost(nextISPcandSplitType, mode2);
+        if (!(mode1Cost < MAX_DOUBLE || mode2Cost < MAX_DOUBLE))
         {
-          currTU.compAlpha    [compID] = 0;
-
-          currModeId++;
-
-          const bool isFirstMode = (currModeId == 1);
-          const bool isLastMode  = (currModeId == totalModesToTest); // currModeId is indexed from 1
-
-          if (isOneMode)
-          {
-            default0Save1Load2 = 0;
-          }
-          else if (!isOneMode && (crossCPredictionModeId == 0))
-          {
-            default0Save1Load2 = 1; //save prediction on first mode
-          }
-          else
-          {
-            default0Save1Load2 = 2; //load it on subsequent modes
-          }
-
-          if (!isFirstMode) // if not first mode to be tested
-          {
-            m_CABACEstimator->getCtx() = ctxStart;
-          }
-
-          singleDistCTmp = 0;
-
-          xIntraCodingTUBlock( currTU, compID, crossCPredictionModeId != 0, singleDistCTmp, default0Save1Load2 );
+          stopThisSplit = true;
+        }
+      }
+    }
 
-          if( ( ( crossCPredictionModeId == 1 ) && ( currTU.compAlpha[compID] == 0 ) ) ) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
-          {
-            singleCostTmp = MAX_DOUBLE;
-          }
-          else if( lumaUsesISP && bestCostSoFar != MAX_DOUBLE && c == COMPONENT_Cb )
+    if (!stopThisSplit)
+    {
+      // 2) One split type may be discarded by comparing the number of sub-partitions of the best angle modes of both splits 
+      ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
+      int  numSubPartsBestMode2OtherSplit = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1;
+      if (numSubPartsBestMode2OtherSplit != -1 && numSubPartsBestMode2 != -1 && ispTestedModes.bestSplitSoFar != nextISPcandSplitType)
+      {
+        if (numSubPartsBestMode2OtherSplit > numSubPartsBestMode2)
+        {
+          stopThisSplit = true;
+        }
+        // both have the same number of subpartitions
+        else if (numSubPartsBestMode2OtherSplit == numSubPartsBestMode2)
+        {
+          // both have the maximum number of subpartitions, so it compares RD costs to decide
+          if (numSubPartsBestMode2OtherSplit == maxNumSubPartitions)
           {
-            uint64_t fracBitsTmp = xGetIntraFracBitsQTSingleChromaComponent( cs, partitioner, ComponentID( c ) );
-            singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp );
-            if( isOneMode || ( !isOneMode && !isLastMode ) )
+            double rdCostBestMode2ThisSplit = ispTestedModes.getRDCost(nextISPcandSplitType, mode2);
+            double rdCostBestMode2OtherSplit = ispTestedModes.getRDCost(otherSplit, mode2);
+            double threshold = 1.3;
+            if (rdCostBestMode2ThisSplit == MAX_DOUBLE || rdCostBestMode2OtherSplit < rdCostBestMode2ThisSplit * threshold)
             {
-              m_CABACEstimator->getCtx() = ctxStart;
+              stopThisSplit = true;
             }
           }
-          else if( !isOneMode )
-          {
-            uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma( currTU, compID );
-            singleCostTmp = m_pcRdCost->calcRdCost( fracBitsTmp, singleDistCTmp );
-          }
-
-          if( singleCostTmp < dSingleCost )
+          else // none of them reached the maximum number of subpartitions with the best angle modes, so it compares the results with the the planar mode
           {
-            dSingleCost = singleCostTmp;
-            singleDistC = singleDistCTmp;
-            bestModeId  = currModeId;
-
-            if( !isLastMode )
+            int  numSubPartsBestMode1OtherSplit = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode1) : -1;
+            if (numSubPartsBestMode1OtherSplit != -1 && numSubPartsBestMode1 != -1 && numSubPartsBestMode1OtherSplit > numSubPartsBestMode1)
             {
-#if KEEP_PRED_AND_RESI_SIGNALS
-              saveCS.getPredBuf   (area).copyFrom(cs.getPredBuf   (area));
-              saveCS.getOrgResiBuf(area).copyFrom(cs.getOrgResiBuf(area));
-#endif
-              saveCS.getPredBuf   (area).copyFrom(cs.getPredBuf   (area));
-              if( keepResi )
-              {
-                saveCS.getResiBuf (area).copyFrom(cs.getResiBuf   (area));
-              }
-              saveCS.getRecoBuf   (area).copyFrom(cs.getRecoBuf   (area));
-
-              tmpTU.copyComponentFrom(currTU, compID);
-
-              ctxBest = m_CABACEstimator->getCtx();
+              stopThisSplit = true;
             }
           }
         }
       }
-
-      if( lumaUsesISP && dSingleCost > bestCostSoFar && c == COMPONENT_Cb )
+    }
+    if (stopThisSplit)
+    {
+      ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
+      if (stopThisSplitForAllLFNSTs)
       {
-        //Luma + Cb cost is already larger than the best cost, so we don't need to test Cr
-        cs.dist = MAX_UINT;
-        m_CABACEstimator->getCtx() = ctxStart;
-        break;
-        //return cbfs;
+        for (int lfnstIdx = 1; lfnstIdx < NUM_LFNST_NUM_PER_SET; lfnstIdx++)
+        {
+          m_ispTestedModes[lfnstIdx].splitIsFinished[nextISPcandSplitType - 1] = true;
+        }
       }
+      return;
+    }
+  }
+
+  // Now a new mode is retrieved from the list and it has to be decided whether it should be tested or not
+  if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size())
+  {
+    ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(ispTestedModes.candIndexInList[nextISPcandSplitType - 1]);
+    ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++;
 
-      if (bestModeId < totalModesToTest)
+    // extra modes are only tested if ISP has won so far
+    if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > ispTestedModes.numOrigModesToTest)
+    {
+      if (ispTestedModes.bestSplitSoFar != candidate.ispMod || ispTestedModes.bestModeSoFar == PLANAR_IDX)
       {
-#if KEEP_PRED_AND_RESI_SIGNALS
-        cs.getPredBuf   (area).copyFrom(saveCS.getPredBuf   (area));
-        cs.getOrgResiBuf(area).copyFrom(saveCS.getOrgResiBuf(area));
-#endif
-        cs.getPredBuf   (area).copyFrom(saveCS.getPredBuf   (area));
-        if( keepResi )
-        {
-          cs.getResiBuf (area).copyFrom(saveCS.getResiBuf   (area));
-        }
-        cs.getRecoBuf   (area).copyFrom(saveCS.getRecoBuf   (area));
+        ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
+        return;
+      }
+    }
 
-        currTU.copyComponentFrom(tmpTU, compID);
+    bool testCandidate = true;
 
-        m_CABACEstimator->getCtx() = ctxBest;
+    // we look for a reference mode that has already been tested within the window and decide to test the new one according to the reference mode costs
+    if (maxNumSubPartitions > 2 && (curIspLfnstIdx > 0 || (candidate.modeId >= DC_IDX && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)))
+    {
+      int       refLfnstIdx = -1;
+      const int angWindowSize = 5;
+      int       numSubPartsLeftMode, numSubPartsRightMode, numSubPartsRefMode, leftIntraMode = -1, rightIntraMode = -1;
+      int       windowSize = candidate.modeId > DC_IDX ? angWindowSize : 1;
+      int       numSamples = cuSize.width << floorLog2(cuSize.height);
+      int       numSubPartsLimit = numSamples >= 256 ? maxNumSubPartitions - 1 : 2;
+
+      xFindAlreadyTestedNearbyIntraModes(curIspLfnstIdx, (int)candidate.modeId, &refLfnstIdx, &leftIntraMode, &rightIntraMode, (ISPType)candidate.ispMod, windowSize);
+
+      if (refLfnstIdx != -1 && refLfnstIdx != curIspLfnstIdx)
+      {
+        CHECK(leftIntraMode != candidate.modeId || rightIntraMode != candidate.modeId, "wrong intra mode and lfnstIdx values!");
+        numSubPartsRefMode = m_ispTestedModes[refLfnstIdx].getNumCompletedSubParts((ISPType)candidate.ispMod, candidate.modeId);
+        CHECK(numSubPartsRefMode <= 0, "Wrong value of the number of subpartitions completed!");
       }
+      else
+      {
+        numSubPartsLeftMode = leftIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1;
+        numSubPartsRightMode = rightIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1;
 
-      cs.picture->getPredBuf(area).copyFrom(cs.getPredBuf(area));
-      cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
+        numSubPartsRefMode = std::max(numSubPartsLeftMode, numSubPartsRightMode);
+      }
 
-      cbfs.cbf(compID) = TU::getCbf(currTU, compID);
+      if (numSubPartsRefMode > 0)
+      {
+        // The mode was found. Now we check the condition
+        testCandidate = numSubPartsRefMode > numSubPartsLimit;
+      }
+    }
 
-      cs.dist += singleDistC;
+    if (testCandidate)
+    {
+      modeInfo = candidate;
     }
   }
   else
   {
-    unsigned    numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
-    ChromaCbfs  SplitCbfs         ( false );
+    //the end of the list was reached, so the split is invalidated
+    ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
+  }
+}
 
-    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
+void IntraSearch::xFindAlreadyTestedNearbyIntraModes(int lfnstIdx, int currentIntraMode, int* refLfnstIdx, int* leftIntraMode, int* rightIntraMode, ISPType ispOption, int windowSize)
+{
+  bool leftModeFound = false, rightModeFound = false;
+  *leftIntraMode = -1;
+  *rightIntraMode = -1;
+  *refLfnstIdx = -1;
+  const unsigned st = ispOption - 1;
+
+  //first we check if the exact intra mode was already tested for another lfnstIdx value
+  if (lfnstIdx > 0)
+  {
+    bool sameIntraModeFound = false;
+    if (lfnstIdx == 2 && m_ispTestedModes[1].modeHasBeenTested[currentIntraMode][st])
     {
-      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
+      sameIntraModeFound = true;
+      *refLfnstIdx = 1;
     }
-    else if( currTU.cu->ispMode )
+    else if (m_ispTestedModes[0].modeHasBeenTested[currentIntraMode][st])
     {
-      partitioner.splitCurrArea( ispType, cs );
+      sameIntraModeFound = true;
+      *refLfnstIdx = 0;
     }
-    else
-      THROW( "Implicit TU split not available" );
 
-    do
+    if (sameIntraModeFound)
     {
-      ChromaCbfs subCbfs = xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
-
-      for( uint32_t ch = COMPONENT_Cb; ch < numValidTBlocks; ch++ )
-      {
-        const ComponentID compID = ComponentID( ch );
-        SplitCbfs.cbf( compID ) |= subCbfs.cbf( compID );
-      }
-    } while( partitioner.nextPart( cs ) );
+      *leftIntraMode = currentIntraMode;
+      *rightIntraMode = currentIntraMode;
+      return;
+    }
+  }
 
-    partitioner.exitCurrSplit();
+  //The mode has not been checked for another lfnstIdx value, so now we look for a similar mode within a window using the same lfnstIdx 
+  for (int k = 1; k <= windowSize; k++)
+  {
+    int off = currentIntraMode - 2 - k;
+    int leftMode = (off < 0) ? NUM_LUMA_MODE + off : currentIntraMode - k;
+    int rightMode = currentIntraMode > DC_IDX ? (((int)currentIntraMode - 2 + k) % 65) + 2 : PLANAR_IDX;
 
-    if( lumaUsesISP && cs.dist == MAX_UINT )
+    leftModeFound  = leftMode  != (int)currentIntraMode ? m_ispTestedModes[lfnstIdx].modeHasBeenTested[leftMode][st]  : false;
+    rightModeFound = rightMode != (int)currentIntraMode ? m_ispTestedModes[lfnstIdx].modeHasBeenTested[rightMode][st] : false;
+    if (leftModeFound || rightModeFound)
     {
-      return cbfs;
+      *leftIntraMode = leftModeFound ? leftMode : -1;
+      *rightIntraMode = rightModeFound ? rightMode : -1;
+      *refLfnstIdx = lfnstIdx;
+      break;
     }
-    {
+  }
+}
 
-      cbfs.Cb |= SplitCbfs.Cb;
-      cbfs.Cr |= SplitCbfs.Cr;
+//It prepares the list of potential intra modes candidates that will be tested using RD costs
+bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost, ModeInfo bestNonISPMode)
+{
+  int bestISPModeInRelCU = -1;
+  m_modeCtrl->setStopNonDCT2Transforms(false);
 
-      if( !lumaUsesISP )
+  if (m_pcEncCfg->getUseFastISP())
+  {
+    //we check if the ISP tests can be cancelled
+    double thSkipISP = 1.4;
+    if (bestNonISPCost > bestCostSoFar * thSkipISP)
+    {
+      for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
       {
-        for( auto &ptu : cs.tus )
+        for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++)
         {
-          if( currArea.Cb().contains( ptu->Cb() ) || ( !ptu->Cb().valid() && currArea.Y().contains( ptu->Y() ) ) )
-          {
-            TU::setCbfAtDepth( *ptu, COMPONENT_Cb, currDepth, SplitCbfs.Cb );
-            TU::setCbfAtDepth( *ptu, COMPONENT_Cr, currDepth, SplitCbfs.Cr );
-          }
+          m_ispTestedModes[j].splitIsFinished[splitIdx] = true;
         }
       }
+      return false;
+    }
+    if (!updateISPStatusFromRelCU(bestNonISPCost, bestNonISPMode, bestISPModeInRelCU))
+    {
+      return false;
     }
   }
 
-  return cbfs;
-}
+  for (int k = 0; k < m_ispCandListHor.size(); k++)
+  {
+    m_ispCandListHor.at(k).ispMod = HOR_INTRA_SUBPARTITIONS; //we set the correct ISP split type value
+  }
 
-uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType)
-{
-  uint32_t orgMode = uiMode;
+  auto origHadList = m_ispCandListHor;   // save the original hadamard list of regular intra
+  bool modeIsInList[NUM_LUMA_MODE] = { false };
 
-  if (!pu.mhIntraFlag)
-  std::swap(orgMode, pu.intraDir[chType]);
+  m_ispCandListHor.clear();
+  m_ispCandListVer.clear();
 
-  m_CABACEstimator->resetBits();
+  // we sort the normal intra modes according to their full RD costs
+  std::sort(m_regIntraRDListWithCosts.begin(), m_regIntraRDListWithCosts.end(), ModeInfoWithCost::compareModeInfoWithCost);
 
-  if( isLuma( chType ) )
+  // we get the best angle from the regular intra list
+  int bestNormalIntraAngle = -1;
+  for (int modeIdx = 0; modeIdx < m_regIntraRDListWithCosts.size(); modeIdx++)
   {
-    if ( pu.mhIntraFlag )
-      m_CABACEstimator->MHIntra_luma_pred_modes(*pu.cu);
-    else
+    if (bestNormalIntraAngle == -1 && m_regIntraRDListWithCosts.at(modeIdx).modeId > DC_IDX)
     {
-      m_CABACEstimator->extend_ref_line(pu);
-      m_CABACEstimator->intra_luma_pred_mode(pu);
+      bestNormalIntraAngle = m_regIntraRDListWithCosts.at(modeIdx).modeId;
+      break;
     }
   }
-  else
+
+  int mode1 = PLANAR_IDX;
+  int mode2 = bestNormalIntraAngle;
+
+  ModeInfo refMode = origHadList.at(0);
+  auto* destListPtr = &m_ispCandListHor;
+  //List creation 
+
+  if (m_pcEncCfg->getUseFastISP() && bestISPModeInRelCU != -1) //RelCU intra mode
   {
-    m_CABACEstimator->intra_chroma_pred_mode( pu );
+   destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, bestISPModeInRelCU));
+    modeIsInList[bestISPModeInRelCU] = true;
   }
 
-  if ( !pu.mhIntraFlag )
-  std::swap(orgMode, pu.intraDir[chType]);
+  // Planar
+  if (!modeIsInList[mode1])
+  {
+    destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode1));
+    modeIsInList[mode1] = true;
+  }
+  // Best angle in regular intra
+  if (mode2 != -1 && !modeIsInList[mode2])
+  {
+    destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode2));
+    modeIsInList[mode2] = true;
+  }
+  // Remaining regular intra modes that were full RD tested (except DC, which is added after the angles from regular intra)
+  int dcModeIndex = -1;
+  for (int remModeIdx = 0; remModeIdx < m_regIntraRDListWithCosts.size(); remModeIdx++)
+  {
+    int currentMode = m_regIntraRDListWithCosts.at(remModeIdx).modeId;
+    if (currentMode != mode1 && currentMode != mode2 && !modeIsInList[currentMode])
+    {
+      if (currentMode > DC_IDX)
+      {
+        destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, currentMode));
+        modeIsInList[currentMode] = true;
+      }
+      else if (currentMode == DC_IDX)
+      {
+        dcModeIndex = remModeIdx;
+      }
+    }
+  }
 
-  return m_CABACEstimator->getEstFracBits();
-}
+  // DC is added after the angles from regular intra
+  if (dcModeIndex != -1 && !modeIsInList[DC_IDX])
+  {
+    destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, DC_IDX));
+    modeIsInList[DC_IDX] = true;
+  }
 
+  // We add extra candidates to the list that will only be tested if ISP is likely to win
+  for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++)
+  {
+    m_ispTestedModes[j].numOrigModesToTest = (int)destListPtr->size();
+  }
+  const int addedModesFromHadList = 3;
+  int       newModesAdded = 0;
 
+  for (int k = 0; k < origHadList.size(); k++)
+  {
+    if (newModesAdded == addedModesFromHadList)
+    {
+      break;
+    }
+    if (!modeIsInList[origHadList.at(k).modeId])
+    {
+      destListPtr->push_back( ModeInfo( refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, origHadList.at(k).modeId ) );
+      newModesAdded++;
+    }
+  }
 
-void IntraSearch::encPredIntraDPCM( const ComponentID &compID, PelBuf &pOrg, PelBuf &pDst, const uint32_t &uiDirMode )
-{
-  CHECK( pOrg.buf == 0, "Encoder DPCM called without original buffer" );
+  if (m_pcEncCfg->getUseFastISP() && bestISPModeInRelCU != -1)
+  {
+    destListPtr->resize(1);
+  }
 
-  const int srcStride = m_topRefLength + 1;
-  CPelBuf   pSrc = CPelBuf(getPredictorPtr(compID), srcStride, m_leftRefLength + 1);
+  // Copy modes to other split-type list
+  m_ispCandListVer = m_ispCandListHor;
+  for (int i = 0; i < m_ispCandListVer.size(); i++)
+  {
+    m_ispCandListVer[i].ispMod = VER_INTRA_SUBPARTITIONS;
+  }
 
-  // Sample Adaptive intra-Prediction (SAP)
-  if( uiDirMode == HOR_IDX )
+  // Reset the tested modes information to 0
+  for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++)
   {
-    // left column filled with reference samples, remaining columns filled with pOrg data
-    for( int y = 0; y < pDst.height; y++ )
+    for (int i = 0; i < m_ispCandListHor.size(); i++)
     {
-      pDst.at( 0, y ) = pSrc.at( 0, 1 + y );
+      m_ispTestedModes[j].clearISPModeInfo(m_ispCandListHor[i].modeId);
     }
-    CPelBuf orgRest  = pOrg.subBuf( 0, 0, pOrg.width - 1, pOrg.height );
-    PelBuf  predRest = pDst.subBuf( 1, 0, pDst.width - 1, pDst.height );
+  }
+  return true;
+}
+
+void IntraSearch::xSortISPCandListLFNST()
+{
+  //It resorts the list of intra mode candidates for lfnstIdx > 0 by checking the RD costs for lfnstIdx = 0
+  ISPTestedModesInfo& ispTestedModesRef = m_ispTestedModes[0];
+  for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
+  {
+    ISPType ispMode = splitIdx ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
+    if (!m_ispTestedModes[m_curIspLfnstIdx].splitIsFinished[splitIdx] && ispTestedModesRef.testedModes[splitIdx].size() > 1)
+    {
+      auto& candList   = ispMode == HOR_INTRA_SUBPARTITIONS ? m_ispCandListHor : m_ispCandListVer;
+      int bestModeId   = candList[1].modeId > DC_IDX ? candList[1].modeId : -1;
+      int bestSubParts = candList[1].modeId > DC_IDX ? ispTestedModesRef.getNumCompletedSubParts(ispMode, bestModeId) : -1;
+      double bestCost  = candList[1].modeId > DC_IDX ? ispTestedModesRef.getRDCost(ispMode, bestModeId) : MAX_DOUBLE;
+      for (int i = 0; i < candList.size(); i++)
+      {
+        const int candSubParts = ispTestedModesRef.getNumCompletedSubParts(ispMode, candList[i].modeId);
+        const double candCost = ispTestedModesRef.getRDCost(ispMode, candList[i].modeId);
+        if (candSubParts > bestSubParts || candCost < bestCost)
+        {
+          bestModeId = candList[i].modeId;
+          bestCost = candCost;
+          bestSubParts = candSubParts;
+        }
+      }
 
-    predRest.copyFrom( orgRest );
+      if (bestModeId != -1)
+      {
+        if (bestModeId != candList[0].modeId)
+        {
+          auto prevMode = candList[0];
+          candList[0].modeId = bestModeId;
+          for (int i = 1; i < candList.size(); i++)
+          {
+            auto nextMode = candList[i];
+            candList[i] = prevMode;
+            if (nextMode.modeId == bestModeId)
+            {
+              break;
+            }
+            prevMode = nextMode;
+          }
+        }
+      }
+    }
   }
-  else // VER_IDX
+}
+
+bool IntraSearch::updateISPStatusFromRelCU( double bestNonISPCostCurrCu, ModeInfo bestNonISPModeCurrCu, int& bestISPModeInRelCU )
+{
+  //It compares the data of a related CU with the current CU to cancel or reduce the ISP tests
+  bestISPModeInRelCU = -1;
+  if (m_modeCtrl->getRelatedCuIsValid())
   {
-    // top row filled with reference samples, remaining rows filled with pOrg data
-    for( int x = 0; x < pDst.width; x++ )
+    double bestNonISPCostRelCU = m_modeCtrl->getBestDCT2NonISPCostRelCU();
+    double costRatio           = bestNonISPCostCurrCu / bestNonISPCostRelCU;
+    bool   bestModeRelCuIsMip  = (m_modeCtrl->getIspPredModeValRelCU() >> 5) & 0x1;
+    bool   bestModeCurrCuIsMip = bestNonISPModeCurrCu.mipFlg;
+    int    relatedCuIntraMode  = m_modeCtrl->getIspPredModeValRelCU() >> 9;
+    bool   isSameTypeOfMode    = (bestModeRelCuIsMip && bestModeCurrCuIsMip) || (!bestModeRelCuIsMip && !bestModeCurrCuIsMip);
+    bool   bothModesAreAngular = bestNonISPModeCurrCu.modeId > DC_IDX && relatedCuIntraMode > DC_IDX;
+    bool   modesAreComparable  = isSameTypeOfMode && (bestModeCurrCuIsMip || bestNonISPModeCurrCu.modeId == relatedCuIntraMode || (bothModesAreAngular && abs(relatedCuIntraMode - (int)bestNonISPModeCurrCu.modeId) <= 5));
+    int    status              = m_modeCtrl->getIspPredModeValRelCU();
+
+    if ((status & 0x3) == 0x3) //ISP was not selected in the relCU
     {
-      pDst.at( x, 0 ) = pSrc.at( 1 + x, 0 );
+      double bestNonDCT2Cost = m_modeCtrl->getBestNonDCT2Cost();
+      double ratioWithNonDCT2 = bestNonDCT2Cost / bestNonISPCostRelCU;
+      double margin = ratioWithNonDCT2 < 0.95 ? 0.2 : 0.1;
+
+      if (costRatio > 1 - margin && costRatio < 1 + margin && modesAreComparable)
+      {
+        for (int lfnstVal = 0; lfnstVal < NUM_LFNST_NUM_PER_SET; lfnstVal++)
+        {
+          m_ispTestedModes[lfnstVal].splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1] = true;
+          m_ispTestedModes[lfnstVal].splitIsFinished[VER_INTRA_SUBPARTITIONS - 1] = true;
+        }
+        return false;
+      }
     }
-    CPelBuf orgRest  = pOrg.subBuf( 0, 0, pOrg.width, pOrg.height - 1 );
-    PelBuf  predRest = pDst.subBuf( 0, 1, pDst.width, pDst.height - 1 );
+    else if ((status & 0x3) == 0x1) //ISP was selected in the relCU
+    {
+      double margin = 0.05;
+
+      if (costRatio > 1 - margin && costRatio < 1 + margin && modesAreComparable)
+      {
+        int  ispSplitIdx = (m_modeCtrl->getIspPredModeValRelCU() >> 2) & 0x1;
+        bool lfnstIdxIsNot0 = (bool)((m_modeCtrl->getIspPredModeValRelCU() >> 3) & 0x1);
+        bool lfnstIdxIs2 = (bool)((m_modeCtrl->getIspPredModeValRelCU() >> 4) & 0x1);
+        int  lfnstIdx = !lfnstIdxIsNot0 ? 0 : lfnstIdxIs2 ? 2 : 1;
+        bestISPModeInRelCU = (int)m_modeCtrl->getBestISPIntraModeRelCU();
+
+        for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
+        {
+          for (int lfnstVal = 0; lfnstVal < NUM_LFNST_NUM_PER_SET; lfnstVal++)
+          {
+            if (lfnstVal == lfnstIdx && splitIdx == ispSplitIdx)
+            {
+              continue;
+            }
+            m_ispTestedModes[lfnstVal].splitIsFinished[splitIdx] = true;
+          }
+        }
 
-    predRest.copyFrom( orgRest );
+        bool stopNonDCT2Transforms = (bool)((m_modeCtrl->getIspPredModeValRelCU() >> 6) & 0x1);
+        m_modeCtrl->setStopNonDCT2Transforms(stopNonDCT2Transforms);
+      }
+    }
+    else
+    {
+      THROW("Wrong ISP relCU status");
+    }
   }
+
+  return true;
 }
 
-bool IntraSearch::useDPCMForFirstPassIntraEstimation( const PredictionUnit &pu, const uint32_t &uiDirMode )
+void IntraSearch::xFinishISPModes()
 {
-  return CU::isRDPCMEnabled( *pu.cu ) && pu.cu->transQuantBypass && (uiDirMode == HOR_IDX || uiDirMode == VER_IDX);
+  //Continue to the next lfnst index 
+  m_curIspLfnstIdx++;
+
+  if (m_curIspLfnstIdx < NUM_LFNST_NUM_PER_SET)
+  {
+    //Check if LFNST is applicable
+    if (m_curIspLfnstIdx == 1)
+    {
+      bool canTestLFNST = false;
+      for (int lfnstIdx = 1; lfnstIdx < NUM_LFNST_NUM_PER_SET; lfnstIdx++)
+      {
+        canTestLFNST |= !m_ispTestedModes[lfnstIdx].splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1] || !m_ispTestedModes[lfnstIdx].splitIsFinished[VER_INTRA_SUBPARTITIONS - 1];
+      }
+      if (canTestLFNST)
+      {
+        //Construct the intra modes candidates list for the lfnst > 0 cases
+        xSortISPCandListLFNST();
+      }
+    }
+  }
 }
+
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index 6350f8076c3f9767b35c686a37e75a35edac4900..72ccaa02148956bceded9d45694cbe2c7c58baf5 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -56,9 +56,119 @@
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
-
 class EncModeCtrl;
 
+enum PLTScanMode
+{
+  PLT_SCAN_HORTRAV = 0,
+  PLT_SCAN_VERTRAV = 1,
+  NUM_PLT_SCAN = 2
+};
+class SortingElement
+{
+public:
+  inline bool operator<(const SortingElement &other) const
+  {
+    return cnt > other.cnt;
+  }
+  SortingElement() {
+    cnt = shift = lastCnt = 0;
+    data[0] = data[1] = data[2] = 0;
+    sumData[0] = sumData[1] = sumData[2] = 0;
+  }
+  uint32_t  getCnt() const        { return cnt; }
+  void      setCnt(uint32_t val)  { cnt = val; }
+  int       getSumData (int id) const   { return sumData[id]; }
+
+  void resetAll(ComponentID compBegin, uint32_t numComp)
+  {
+    shift = lastCnt = 0;
+    for (int ch = compBegin; ch < (compBegin + numComp); ch++)
+    {
+      data[ch] = 0;
+      sumData[ch] = 0;
+    }
+  }
+  void setAll(uint32_t* ui, ComponentID compBegin, uint32_t numComp)
+  {
+    for (int ch = compBegin; ch < (compBegin + numComp); ch++)
+    {
+      data[ch] = ui[ch];
+    }
+  }
+  bool almostEqualData(SortingElement element, int errorLimit, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp)
+  {
+    bool almostEqual = true;
+    for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+    {
+      uint32_t absError = 0;
+      if (isChroma((ComponentID) comp))
+      {
+        absError += int(double(std::abs(data[comp] - element.data[comp])) * PLT_CHROMA_WEIGHTING) >> (bitDepths.recon[CHANNEL_TYPE_CHROMA] - PLT_ENCBITDEPTH);
+      }
+      else
+      {
+        absError += (std::abs(data[comp] - element.data[comp]))>> (bitDepths.recon[CHANNEL_TYPE_LUMA] - PLT_ENCBITDEPTH);
+      }
+      if (absError > errorLimit)
+      {
+        almostEqual = false;
+        break;
+      }
+    }
+    return almostEqual;
+  }
+  uint32_t getSAD(SortingElement element, const BitDepths& bitDepths, ComponentID compBegin, uint32_t numComp)
+  {
+    uint32_t sumAd = 0;
+    for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+    {
+      ChannelType chType = (comp > 0) ? CHANNEL_TYPE_CHROMA : CHANNEL_TYPE_LUMA;
+      sumAd += (std::abs(data[comp] - element.data[comp]) >> (bitDepths.recon[chType] - PLT_ENCBITDEPTH));
+    }
+    return sumAd;
+  }
+  void copyDataFrom(SortingElement element, ComponentID compBegin, uint32_t numComp)
+  {
+    for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+    {
+      data[comp] = element.data[comp];
+      sumData[comp] = data[comp];
+    }
+    shift = 0; lastCnt = 1;
+  }
+  void copyAllFrom(SortingElement element, ComponentID compBegin, uint32_t numComp)
+  {
+    copyDataFrom(element, compBegin, numComp);
+    cnt = element.cnt;
+    for (int comp = compBegin; comp < (compBegin + numComp); comp++)
+    {
+      sumData[comp] = element.sumData[comp];
+    }
+    lastCnt = element.lastCnt; shift = element.shift;
+  }
+  void addElement(const SortingElement& element, ComponentID compBegin, uint32_t numComp)
+  {
+    cnt++;
+    for (int i = compBegin; i<(compBegin + numComp); i++)
+    {
+      sumData[i] += element.data[i];
+    }
+    if (cnt>1 && cnt == 2 * lastCnt)
+    {
+      uint32_t rnd = 1 << shift;
+      shift++;
+      for (int i = compBegin; i<(compBegin + numComp); i++)
+      {
+        data[i] = (sumData[i] + rnd) >> shift;
+      }
+      lastCnt = cnt;
+    }
+  }
+private:
+  uint32_t cnt;
+  int shift, lastCnt, data[3], sumData[3];
+};
 /// encoder search class
 class IntraSearch : public IntraPrediction, CrossComponentPrediction
 {
@@ -76,15 +186,171 @@ private:
 
   CodingStructure **m_pSaveCS;
 
+  bool            m_saveCuCostInSCIPU;
+  uint8_t         m_numCuInSCIPU;
+  Area            m_cuAreaInSCIPU[NUM_INTER_CU_INFO_SAVE];
+  double          m_cuCostInSCIPU[NUM_INTER_CU_INFO_SAVE];
+
+  struct ModeInfo
+  {
+    bool     mipFlg; // CU::mipFlag
+    bool     mipTrFlg; // PU::mipTransposedFlag
+    int      mRefId; // PU::multiRefIdx
+    uint8_t  ispMod; // CU::ispMode
+    uint32_t modeId; // PU::intraDir[CHANNEL_TYPE_LUMA]
+
+    ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0) {}
+    ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode) {}
+    bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId); }
+  };
+  struct ModeInfoWithCost : public ModeInfo
+  {
+    double rdCost;
+    ModeInfoWithCost() : ModeInfo(), rdCost(MAX_DOUBLE) {}
+    ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode), rdCost(cost) {}
+    bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && rdCost == cmp.rdCost); }
+    static bool compareModeInfoWithCost(ModeInfoWithCost a, ModeInfoWithCost b) { return a.rdCost < b.rdCost; }
+  };
+
+  struct ISPTestedModeInfo
+  {
+    int    numCompSubParts;
+    double rdCost;
+
+    ISPTestedModeInfo() {}
+
+    void setMode(int numParts, double cost)
+    {
+      numCompSubParts = numParts;
+      rdCost = cost;
+    }
+    void clear()
+    {
+      numCompSubParts = -1;
+      rdCost = MAX_DOUBLE;
+    }
+  };
+  struct ISPTestedModesInfo
+  {
+    ISPTestedModeInfo                           intraMode[NUM_LUMA_MODE][2];
+    bool                                        modeHasBeenTested[NUM_LUMA_MODE][2];
+    int                                         numTotalParts[2];
+    static_vector<int, FAST_UDI_MAX_RDMODE_NUM> testedModes[2];
+    int                                         bestModeSoFar;
+    ISPType                                     bestSplitSoFar;
+    int                                         bestMode[2];
+    double                                      bestCost[2];
+    int                                         numTestedModes[2];
+    int                                         candIndexInList[2];
+    bool                                        splitIsFinished[2];
+    int                                         numOrigModesToTest;
+
+    // set a tested mode results
+    void setModeResults(ISPType splitType, int iModeIdx, int numCompletedParts, double rdCost, double currentBestCost)
+    {
+      const unsigned st = splitType - 1;
+      CHECKD(st > 1, "The split type is invalid!");
+      const int maxNumParts = numTotalParts[st];
+      intraMode[iModeIdx][st].setMode(numCompletedParts, numCompletedParts == maxNumParts ? rdCost : MAX_DOUBLE);
+      testedModes[st].push_back(iModeIdx);
+      numTestedModes[st]++;
+      modeHasBeenTested[iModeIdx][st] = true;
+      if (numCompletedParts == maxNumParts && rdCost < bestCost[st])   // best mode update
+      {
+        bestMode[st] = iModeIdx;
+        bestCost[st] = rdCost;
+      }
+      if (numCompletedParts == maxNumParts && rdCost < currentBestCost)   // best mode update
+      {
+        bestModeSoFar = iModeIdx;
+        bestSplitSoFar = splitType;
+      }
+    }
+
+    int getNumCompletedSubParts(ISPType splitType, int iModeIdx)
+    {
+      const unsigned st = splitType - 1;
+      CHECK(st < 0 || st > 1, "The split type is invalid!");
+      CHECK(iModeIdx < 0 || iModeIdx >(NUM_LUMA_MODE - 1), "The modeIdx is invalid");
+      return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].numCompSubParts : -1;
+    }
+
+    double getRDCost(ISPType splitType, int iModeIdx)
+    {
+      const unsigned st = splitType - 1;
+      CHECKD(st > 1, "The split type is invalid!");
+      return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].rdCost : MAX_DOUBLE;
+    }
+
+    // get a tested intra mode index
+    int getTestedIntraMode(ISPType splitType, int pos)
+    {
+      const unsigned st = splitType - 1;
+      CHECKD(st > 1, "The split type is invalid!");
+      return pos < testedModes[st].size() ? testedModes[st].at(pos) : -1;
+    }
+
+    // set everything to default values
+    void clear()
+    {
+      for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
+      {
+        numTestedModes [splitIdx] = 0;
+        candIndexInList[splitIdx] = 0;
+        numTotalParts  [splitIdx] = 0;
+        splitIsFinished[splitIdx] = false;
+        testedModes    [splitIdx].clear();
+        bestCost       [splitIdx] = MAX_DOUBLE;
+        bestMode       [splitIdx] = -1;
+      }
+      bestModeSoFar = -1;
+      bestSplitSoFar = NOT_INTRA_SUBPARTITIONS;
+      numOrigModesToTest = -1;
+      memset(modeHasBeenTested, 0, sizeof(modeHasBeenTested));
+    }
+    void clearISPModeInfo(int idx)
+    {
+      intraMode[idx][0].clear();
+      intraMode[idx][1].clear();
+    }
+    void init(const int numTotalPartsHor, const int numTotalPartsVer)
+    {
+      clear();
+      const int horSplit = HOR_INTRA_SUBPARTITIONS - 1, verSplit = VER_INTRA_SUBPARTITIONS - 1;
+      numTotalParts  [horSplit] = numTotalPartsHor;
+      numTotalParts  [verSplit] = numTotalPartsVer;
+      splitIsFinished[horSplit] = (numTotalParts[horSplit] == 0);
+      splitIsFinished[verSplit] = (numTotalParts[verSplit] == 0);
+    }
+  };
+
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_ispCandListHor, m_ispCandListVer;
+  static_vector<ModeInfoWithCost, FAST_UDI_MAX_RDMODE_NUM> m_regIntraRDListWithCosts;
+
+  ISPTestedModesInfo m_ispTestedModes[NUM_LFNST_NUM_PER_SET];
+  int m_curIspLfnstIdx;
+
   //cost variables for the EMT algorithm and new modes list
-  static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrl;
-  static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlHor;
-  static_vector<uint32_t, FAST_UDI_MAX_RDMODE_NUM> m_rdModeListWithoutMrlVer;
+  double     m_bestModeCostStore[ NUM_LFNST_NUM_PER_SET ];                                    // RD cost of the best mode for each PU using DCT2
+  bool       m_bestModeCostValid[ NUM_LFNST_NUM_PER_SET ];
+  double     m_modeCostStore[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ];                   // RD cost of each mode for each PU using DCT2
+  ModeInfo   m_savedRdModeList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ];
+  int32_t    m_savedNumRdModes[ NUM_LFNST_NUM_PER_SET ];
+
+  ModeInfo                                           m_savedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM];
+  char                                               m_savedBDPCMModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM];
+  double                                             m_savedRdCostFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM];
+  int                                                m_numSavedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2];
+  int                                                m_savedRdModeIdx;
+
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedRdModeListLFNST;
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedHadModeListLFNST;
+  uint32_t                                         m_uiSavedNumRdModesLFNST;
+  static_vector<double,   FAST_UDI_MAX_RDMODE_NUM> m_dSavedModeCostLFNST;
+  static_vector<double,   FAST_UDI_MAX_RDMODE_NUM> m_dSavedHadListLFNST;
 
-  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_intraModeDiagRatio;
-  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_intraModeHorVerRatio;
-  static_vector<int,    FAST_UDI_MAX_RDMODE_NUM> m_intraModeTestedNormalIntra;
   PelStorage      m_tmpStorageLCU;
+  PelStorage      m_colorTransResiBuf;
 protected:
   // interface to option
   EncCfg*         m_pcEncCfg;
@@ -99,7 +365,18 @@ protected:
   CtxCache*       m_CtxCache;
 
   bool            m_isInitialized;
-
+  uint32_t        m_symbolSize;
+  uint16_t**      m_truncBinBits;
+  uint16_t*       m_escapeNumBins;
+  bool            m_bestEscape;
+  double*         m_indexError[MAXPLTSIZE + 1];
+  uint8_t*        m_minErrorIndexMap; // store the best index in terms of distortion for each pixel 
+  uint8_t         m_indexMapRDOQ   [2][NUM_TRELLIS_STATE][2 * MAX_CU_BLKSIZE_PLT];
+  bool            m_runMapRDOQ     [2][NUM_TRELLIS_STATE][2 * MAX_CU_BLKSIZE_PLT];
+  uint8_t*        m_statePtRDOQ    [NUM_TRELLIS_STATE];
+  bool            m_prevRunTypeRDOQ[2][NUM_TRELLIS_STATE];
+  int             m_prevRunPosRDOQ [2][NUM_TRELLIS_STATE];
+  double          m_stateCostRDOQ  [2][NUM_TRELLIS_STATE];
 public:
 
   IntraSearch();
@@ -114,6 +391,7 @@ public:
                                     const uint32_t     maxCUHeight,
                                     const uint32_t     maxTotalCUDepth
                                   , EncReshape*   m_pcReshape
+                                  , const unsigned bitDepthY
                                   );
 
   void destroy                    ();
@@ -124,12 +402,23 @@ public:
 
   void setModeCtrl                ( EncModeCtrl *modeCtrl ) { m_modeCtrl = modeCtrl; }
 
-public:
+  bool getSaveCuCostInSCIPU       ()               { return m_saveCuCostInSCIPU; }
+  void setSaveCuCostInSCIPU       ( bool b )       { m_saveCuCostInSCIPU = b;  }
+  void setNumCuInSCIPU            ( uint8_t i )    { m_numCuInSCIPU = i; }
+  void saveCuAreaCostInSCIPU      ( Area area, double cost );
+  void initCuAreaCostInSCIPU      ();
+  double findInterCUCost          ( CodingUnit &cu );
 
-  void estIntraPredLumaQT         ( CodingUnit &cu, Partitioner& pm, const double bestCostSoFar  = MAX_DOUBLE );
+public:
+  bool estIntraPredLumaQT(CodingUnit &cu, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false, CodingStructure* bestCS = NULL);
   void estIntraPredChromaQT       ( CodingUnit &cu, Partitioner& pm, const double maxCostAllowed = MAX_DOUBLE );
-  void IPCMSearch                 (CodingStructure &cs, Partitioner& partitioner);
+  void PLTSearch                  ( CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp);
   uint64_t xFracModeBitsIntra     (PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &compID);
+  void invalidateBestModeCost     () { for( int i = 0; i < NUM_LFNST_NUM_PER_SET; i++ ) m_bestModeCostValid[ i ] = false; };
+
+  void sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum);
+  void invalidateBestRdModeFirstColorSpace();
+  void setSavedRdModeIdx(int idx) { m_savedRdModeIdx = idx; }
 
 protected:
 
@@ -137,7 +426,6 @@ protected:
   // T & Q & Q-1 & T-1
   // -------------------------------------------------------------------------------------------------------------------
 
-  void xEncPCM                    (CodingStructure &cs, Partitioner& partitioner, const ComponentID &compID);
 
   // -------------------------------------------------------------------------------------------------------------------
   // Intra search
@@ -145,21 +433,38 @@ protected:
 
   void     xEncIntraHeader                         ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1 );
   void     xEncSubdivCbfQT                         ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP );
-  uint64_t xGetIntraFracBitsQT                     ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP );
+  uint64_t xGetIntraFracBitsQT                     ( CodingStructure &cs, Partitioner& pm, const bool &luma, const bool &chroma, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, CUCtx * cuCtx = nullptr  );
   uint64_t xGetIntraFracBitsQTSingleChromaComponent( CodingStructure &cs, Partitioner& pm, const ComponentID compID );
 
   uint64_t xGetIntraFracBitsQTChroma(TransformUnit& tu, const ComponentID &compID);
-  void xEncCoeffQT                                 ( CodingStructure &cs, Partitioner& pm, const ComponentID compID, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP );
-
+  void xEncCoeffQT                                 ( CodingStructure &cs, Partitioner& pm, const ComponentID compID, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, CUCtx * cuCtx = nullptr );
 
   void xIntraCodingTUBlock        (TransformUnit &tu, const ComponentID &compID, const bool &checkCrossCPrediction, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr, std::vector<TrMode>* trModes=nullptr, const bool loadTr=false );
+  void xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes = nullptr, const bool loadTr = false);
 
   ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE,                          const PartSplit ispType = TU_NO_ISP );
-  void       xRecurIntraCodingLumaQT  ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinnder = false );
-
-
-  void encPredIntraDPCM( const ComponentID &compID, PelBuf &pOrg, PelBuf &pDst, const uint32_t &uiDirMode );
-  static bool useDPCMForFirstPassIntraEstimation( const PredictionUnit &pu, const uint32_t &uiDirMode );
+  bool       xRecurIntraCodingLumaQT  ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false );
+  bool       xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner& pm, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false);
+  bool       xIntraCodingLumaISP      ( CodingStructure& cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE );
+
+  template<typename T, size_t N>
+  void reduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const PredictionUnit &pu, const bool fastMip);
+  void   derivePLTLossy  (      CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp);
+  void   calcPixelPred   (      CodingStructure& cs, Partitioner& partitioner, uint32_t    yPos,      uint32_t xPos,             ComponentID compBegin, uint32_t  numComp);
+  void     preCalcPLTIndexRD      (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp);
+  void     calcPixelPredRD        (CodingStructure& cs, Partitioner& partitioner, Pel* orgBuf, Pel* pixelValue, Pel* recoValue, ComponentID compBegin, uint32_t numComp);
+  void     deriveIndexMap         (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dCost);
+  bool     deriveSubblockIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, PLTScanMode pltScanMode, int minSubPos, int maxSubPos, const BinFracBits& fracBitsPltRunType, const BinFracBits* fracBitsPltIndexINDEX, const BinFracBits* fracBitsPltIndexCOPY, const double minCost, bool useRotate);
+  double   rateDistOptPLT         (bool RunType, uint8_t RunIndex, bool prevRunType, uint8_t prevRunIndex, uint8_t aboveRunIndex, bool& prevCodedRunType, int& prevCodedRunPos, int scanPos, uint32_t width, int dist, int indexMaxValue, const BinFracBits* IndexfracBits, const BinFracBits& TypefracBits);
+  void     initTBCTable           (int bitDepth);
+  uint32_t getTruncBinBits        (uint32_t symbol, uint32_t maxSymbol);
+  uint32_t getEpExGolombNumBins   (uint32_t symbol, uint32_t count);
+  void xGetNextISPMode                    ( ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize );
+  bool xSortISPCandList                   ( double bestCostSoFar, double bestNonISPCost, ModeInfo bestNonISPMode );
+  void xSortISPCandListLFNST              ( );
+  void xFindAlreadyTestedNearbyIntraModes ( int currentLfnstIdx, int currentIntraMode, int* refLfnstIdx, int* leftIntraMode, int* rightIntraMode, ISPType ispOption, int windowSize );
+  bool updateISPStatusFromRelCU           ( double bestNonISPCostCurrCu, ModeInfo bestNonISPModeCurrCu, int& bestISPModeInRelCU );
+  void xFinishISPModes                    ( );
 };// END CLASS DEFINITION EncSearch
 
 //! \}
diff --git a/source/Lib/EncoderLib/NALwrite.cpp b/source/Lib/EncoderLib/NALwrite.cpp
index 93b09b4d90535882605a4c272a0246dc76114690..4764029a595e9daa55484faec86163de40b055cd 100644
--- a/source/Lib/EncoderLib/NALwrite.cpp
+++ b/source/Lib/EncoderLib/NALwrite.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,11 +49,14 @@ static const uint8_t emulation_prevention_three_byte = 3;
 void writeNalUnitHeader(ostream& out, OutputNALUnit& nalu)       // nal_unit_header()
 {
 OutputBitstream bsNALUHeader;
-
-  bsNALUHeader.write(0,1);                    // forbidden_zero_bit
-  bsNALUHeader.write(nalu.m_nalUnitType, 6);  // nal_unit_type
-  bsNALUHeader.write(nalu.m_nuhLayerId, 6);   // nuh_layer_id
-  bsNALUHeader.write(nalu.m_temporalId+1, 3); // nuh_temporal_id_plus1
+  int forbiddenZero = 0;
+  bsNALUHeader.write(forbiddenZero, 1);   // forbidden_zero_bit
+  int nuhReservedZeroBit = 0;
+  bsNALUHeader.write(nuhReservedZeroBit, 1);   // nuh_reserved_zero_bit
+  CHECK(nalu.m_nuhLayerId > 55, "The value of nuh_layer_id shall be in the range of 0 to 55, inclusive");
+  bsNALUHeader.write(nalu.m_nuhLayerId, 6);       // nuh_layer_id
+  bsNALUHeader.write(nalu.m_nalUnitType, 5);      // nal_unit_type
+  bsNALUHeader.write(nalu.m_temporalId + 1, 3);   // nuh_temporal_id_plus1
 
   out.write(reinterpret_cast<const char*>(bsNALUHeader.getByteStream()), bsNALUHeader.getByteStreamLength());
 }
diff --git a/source/Lib/EncoderLib/NALwrite.h b/source/Lib/EncoderLib/NALwrite.h
index b107a2f4c3425e7d3d123c25372e7946bbc1140a..34a787d4064aca797b52f4be8289fdaa199a9ab3 100644
--- a/source/Lib/EncoderLib/NALwrite.h
+++ b/source/Lib/EncoderLib/NALwrite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -58,9 +58,10 @@ struct OutputNALUnit : public NALUnit
    */
   OutputNALUnit(
     NalUnitType nalUnitType,
+    uint32_t layerId = 0,
     uint32_t temporalID = 0,
     uint32_t reserved_zero_6bits = 0)
-  : NALUnit(nalUnitType, temporalID, reserved_zero_6bits)
+  : NALUnit( nalUnitType, temporalID, reserved_zero_6bits, 0, layerId )
   , m_Bitstream()
   {}
 
diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp
index da6c26d849c4f665402d2456c7ab9522e1d39fa5..e53ce655f3cc8206950f0ce5867649614d408d9d 100644
--- a/source/Lib/EncoderLib/RateCtrl.cpp
+++ b/source/Lib/EncoderLib/RateCtrl.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -306,6 +306,8 @@ EncRCGOP::EncRCGOP()
   m_targetBits = 0;
   m_picLeft    = 0;
   m_bitsLeft   = 0;
+  m_minEstLambda = 0.0;
+  m_maxEstLambda = 0.0;
 }
 
 EncRCGOP::~EncRCGOP()
@@ -476,6 +478,11 @@ void EncRCGOP::create( EncRCSeq* encRCSeq, int numPic )
   m_targetBits   = targetBits;
   m_picLeft      = m_numPic;
   m_bitsLeft     = m_targetBits;
+  int bitdepth_luma_scale =
+    2 * (encRCSeq->getbitDepth() - 8
+      - DISTORTION_PRECISION_ADJUSTMENT(encRCSeq->getbitDepth()));
+  m_minEstLambda = 0.1;
+  m_maxEstLambda = 10000.0 * pow(2.0, bitdepth_luma_scale);
 }
 
 void EncRCGOP::xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* equaCoeffA, double* equaCoeffB, int GOPSize )
@@ -493,8 +500,8 @@ void EncRCGOP::xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* e
 double EncRCGOP::xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize)
 {
   double solution = 100.0;
-  double minNumber = 0.1;
-  double maxNumber = 10000.0;
+  double minNumber = m_minEstLambda;
+  double maxNumber = m_maxEstLambda;
   for ( int i=0; i<g_RCIterationNum; i++ )
   {
     double fx = 0.0;
@@ -522,7 +529,7 @@ double EncRCGOP::xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCo
     }
   }
 
-  solution = Clip3( 0.1, 10000.0, solution );
+  solution = Clip3(m_minEstLambda, m_maxEstLambda, solution);
   return solution;
 }
 
@@ -776,6 +783,10 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, bool
   double beta          = m_encRCSeq->getPicPara( m_frameLevel ).m_beta;
   double bpp       = (double)m_targetBits/(double)m_numberOfPixel;
 
+  int bitdepth_luma_scale =
+    2 * (m_encRCSeq->getbitDepth() - 8
+      - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth()));
+
   int lastPicValPix = 0;
   if (listPreviousPictures.size() > 0)
   {
@@ -816,28 +827,28 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, bool
 
   if ( lastLevelLambda > 0.0 )
   {
-    lastLevelLambda = Clip3( 0.1, 10000.0, lastLevelLambda );
+    lastLevelLambda = Clip3(m_encRCGOP->getMinEstLambda(), m_encRCGOP->getMaxEstLambda(), lastLevelLambda);
     estLambda = Clip3( lastLevelLambda * pow( 2.0, -3.0/3.0 ), lastLevelLambda * pow( 2.0, 3.0/3.0 ), estLambda );
   }
 
   if ( lastPicLambda > 0.0 )
   {
-    lastPicLambda = Clip3( 0.1, 2000.0, lastPicLambda );
+    lastPicLambda = Clip3(m_encRCGOP->getMinEstLambda(), 2000.0 * pow(2.0, bitdepth_luma_scale), lastPicLambda);
     estLambda = Clip3( lastPicLambda * pow( 2.0, -10.0/3.0 ), lastPicLambda * pow( 2.0, 10.0/3.0 ), estLambda );
   }
   else if ( lastValidLambda > 0.0 )
   {
-    lastValidLambda = Clip3( 0.1, 2000.0, lastValidLambda );
+    lastValidLambda = Clip3(m_encRCGOP->getMinEstLambda(), 2000.0 * pow(2.0, bitdepth_luma_scale), lastValidLambda);
     estLambda = Clip3( lastValidLambda * pow(2.0, -10.0/3.0), lastValidLambda * pow(2.0, 10.0/3.0), estLambda );
   }
   else
   {
-    estLambda = Clip3( 0.1, 10000.0, estLambda );
+    estLambda = Clip3(m_encRCGOP->getMinEstLambda(), m_encRCGOP->getMaxEstLambda(), estLambda);
   }
 
-  if ( estLambda < 0.1 )
+  if ( estLambda < m_encRCGOP->getMinEstLambda())
   {
-    estLambda = 0.1;
+    estLambda = m_encRCGOP->getMinEstLambda();
   }
 
   //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms.
@@ -1007,7 +1018,11 @@ double EncRCPic::getLCUEstLambda( double bpp )
   }
   else
   {
-    estLambda = Clip3( 10.0, 1000.0, estLambda );
+    int bitdepth_luma_scale =
+      2
+      * (m_encRCSeq->getbitDepth() - 8
+        - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth()));
+    estLambda = Clip3(10.0 * pow(2.0, bitdepth_luma_scale), 1000.0 * pow(2.0, bitdepth_luma_scale), estLambda);
   }
 
   if ( estLambda < 0.1 )
@@ -1086,8 +1101,8 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl
     alpha *= ( 1.0 - m_encRCSeq->getAlphaUpdate() / 2.0 );
     beta  *= ( 1.0 - m_encRCSeq->getBetaUpdate() / 2.0 );
 
-    alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha );
-    beta  = Clip3( g_RCBetaMinValue,  g_RCBetaMaxValue,  beta  );
+    alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha );
+    beta = clipRcBeta( beta );
 
     TRCParameter rcPara;
     rcPara.m_alpha = alpha;
@@ -1103,21 +1118,16 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl
     }
 
     double MSE = m_LCUs[LCUIdx].m_actualMSE;
-    double updatedK = bpp * inputLambda / MSE;
+    double updatedK = MSE > 0 ? bpp * inputLambda / MSE : 0.0;
     double updatedC = MSE / pow(bpp, -updatedK);
     rcPara.m_alpha = updatedC * updatedK;
     rcPara.m_beta = -updatedK - 1.0;
-
-    if (bpp > 0 && updatedK > 0.0001)
+    if (MSE > 0)
     {
+      rcPara.m_alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), rcPara.m_alpha );
+      rcPara.m_beta = clipRcBeta( rcPara.m_beta );
       m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
     }
-    else
-    {
-      rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha);
-      m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
-    }
-
     return;
   }
 
@@ -1127,8 +1137,8 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl
   lnbpp = Clip3( -5.0, -0.1, lnbpp );
   beta  += m_encRCSeq->getBetaUpdate() * ( log( inputLambda ) - log( calLambda ) ) * lnbpp;
 
-  alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha );
-  beta  = Clip3( g_RCBetaMinValue,  g_RCBetaMaxValue,  beta  );
+  alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha );
+  beta = clipRcBeta( beta );
 
   TRCParameter rcPara;
   rcPara.m_alpha = alpha;
@@ -1144,21 +1154,17 @@ void EncRCPic::updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, doubl
   }
 
   double MSE = m_LCUs[LCUIdx].m_actualMSE;
-  double updatedK = bpp * inputLambda / MSE;
+  double updatedK = MSE > 0 ? bpp * inputLambda / MSE : 0.0;
   double updatedC = MSE / pow(bpp, -updatedK);
   rcPara.m_alpha = updatedC * updatedK;
   rcPara.m_beta = -updatedK - 1.0;
 
-  if (bpp > 0 && updatedK > 0.0001)
+  if (MSE > 0)
   {
+    rcPara.m_alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), rcPara.m_alpha );
+    rcPara.m_beta = clipRcBeta( rcPara.m_beta );
     m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
   }
-  else
-  {
-    rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha);
-    m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
-  }
-
 }
 
 double EncRCPic::calAverageQP()
@@ -1243,7 +1249,6 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
     m_picQP             = g_RCInvalidQPValue;
   }
   m_picLambda           = averageLambda;
-
   double alpha = m_encRCSeq->getPicPara( m_frameLevel ).m_alpha;
   double beta  = m_encRCSeq->getPicPara( m_frameLevel ).m_beta;
   double skipRatio = 0;
@@ -1262,7 +1267,7 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
   {
     // update parameters
     double picActualBits = ( double )m_picActualBits;
-    double picActualBpp = picActualBits / (double)m_validPixelsInPic;
+    double picActualBpp = m_validPixelsInPic > 0 ? picActualBits / (double)m_validPixelsInPic : 0.001;
     double calLambda     = alpha * pow( picActualBpp, beta );
     double inputLambda   = m_picLambda;
 
@@ -1271,8 +1276,8 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
       alpha *= ( 1.0 - m_encRCSeq->getAlphaUpdate() / 2.0 );
       beta  *= ( 1.0 - m_encRCSeq->getBetaUpdate() / 2.0 );
 
-      alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha );
-      beta  = Clip3( g_RCBetaMinValue,  g_RCBetaMaxValue,  beta  );
+      alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha );
+      beta = clipRcBeta( beta );
 
       TRCParameter rcPara;
       rcPara.m_alpha = alpha;
@@ -1292,6 +1297,8 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
 
       if (m_validPixelsInPic > 0)
       {
+        rcPara.m_alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), rcPara.m_alpha );
+        rcPara.m_beta = clipRcBeta( rcPara.m_beta );
         m_encRCSeq->setPicPara(m_frameLevel, rcPara);
       }
 
@@ -1302,19 +1309,17 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
     alpha += m_encRCSeq->getAlphaUpdate() * ( log( inputLambda ) - log( calLambda ) ) * alpha;
     double lnbpp = log( picActualBpp );
     lnbpp = Clip3( -5.0, -0.1, lnbpp );
-
     beta  += m_encRCSeq->getBetaUpdate() * ( log( inputLambda ) - log( calLambda ) ) * lnbpp;
 
-    alpha = Clip3( g_RCAlphaMinValue, g_RCAlphaMaxValue, alpha );
-    beta  = Clip3( g_RCBetaMinValue,  g_RCBetaMaxValue,  beta  );
+    alpha = clipRcAlpha( m_encRCSeq->getbitDepth(), alpha );
+    beta = clipRcBeta( beta );
   }
 
   TRCParameter rcPara;
   rcPara.m_alpha = alpha;
   rcPara.m_beta  = beta;
   rcPara.m_skipRatio = skipRatio;
-  double picActualBpp = (double)m_picActualBits / (double)m_validPixelsInPic;
-
+  double picActualBpp = m_validPixelsInPic > 0 ? m_picActualBits / (double)m_validPixelsInPic : 0.001;
   double avgMSE = getPicMSE();
   double updatedK = picActualBpp * averageLambda / avgMSE;
   double updatedC = avgMSE / pow(picActualBpp, -updatedK);
@@ -1328,17 +1333,33 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
 
   if (m_validPixelsInPic > 0)
   {
+    rcPara.m_alpha = clipRcAlpha(m_encRCSeq->getbitDepth(), rcPara.m_alpha);
+    rcPara.m_beta = clipRcBeta( rcPara.m_beta );
     m_encRCSeq->setPicPara(m_frameLevel, rcPara);
   }
 
   if ( m_frameLevel == 1 )
   {
-    double currLambda = Clip3( 0.1, 10000.0, m_picLambda );
+    double currLambda = Clip3(m_encRCGOP->getMinEstLambda(), m_encRCGOP->getMaxEstLambda(), m_picLambda);
     double updateLastLambda = g_RCWeightHistoryLambda * m_encRCSeq->getLastLambda() + g_RCWeightCurrentLambda * currLambda;
     m_encRCSeq->setLastLambda( updateLastLambda );
   }
 }
 
+double EncRCPic::clipRcAlpha(const int bitdepth, const double alpha)
+{
+  int bitdepth_luma_scale =
+    2
+    * (bitdepth - 8
+      - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth()));
+  return Clip3(g_RCAlphaMinValue, g_RCAlphaMaxValue * pow(2.0, bitdepth_luma_scale), alpha);
+}
+
+double EncRCPic::clipRcBeta(const double beta)
+{
+  return Clip3(g_RCBetaMinValue, g_RCBetaMaxValue, beta);
+}
+
 int EncRCPic::getRefineBitsForIntra( int orgBits )
 {
   double alpha=0.25, beta=0.5582;
@@ -1822,7 +1843,7 @@ int  RateCtrl::updateCpbState(int actualBits)
   return cpbState;
 }
 
-void RateCtrl::initHrdParam(const HRD* pcHrd, int iFrameRate, double fInitialCpbFullness)
+void RateCtrl::initHrdParam(const HRDParameters* pcHrd, int iFrameRate, double fInitialCpbFullness)
 {
   m_CpbSaturationEnabled = true;
   m_cpbSize = (pcHrd->getCpbSizeValueMinus1(0, 0, 0) + 1) << (4 + pcHrd->getCpbSizeScale());
diff --git a/source/Lib/EncoderLib/RateCtrl.h b/source/Lib/EncoderLib/RateCtrl.h
index 09e84409f6444d836277cba90bbdcc239c695853..afb298a2ebdd9559a9a0c50749090d977555d4e9 100644
--- a/source/Lib/EncoderLib/RateCtrl.h
+++ b/source/Lib/EncoderLib/RateCtrl.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -209,6 +209,8 @@ public:
   int  getPicLeft()               { return m_picLeft; }
   int  getBitsLeft()              { return m_bitsLeft; }
   int  getTargetBitInGOP( int i ) { return m_picTargetBitInGOP[i]; }
+  double getMinEstLambda()        { return m_minEstLambda; }
+  double getMaxEstLambda()        { return m_maxEstLambda; }
 
 private:
   EncRCSeq* m_encRCSeq;
@@ -217,6 +219,8 @@ private:
   int m_targetBits;
   int m_picLeft;
   int m_bitsLeft;
+  double m_minEstLambda;
+  double m_maxEstLambda;
 };
 
 class EncRCPic
@@ -243,6 +247,9 @@ public:
   void updateAfterCTU(int LCUIdx, int bits, int QP, double lambda, double skipRatio, bool updateLCUParameter = true);
   void updateAfterPicture( int actualHeaderBits, int actualTotalBits, double averageQP, double averageLambda, bool isIRAP);
 
+  double clipRcAlpha(const int bitdepth, const double alpha);
+  double clipRcBeta(const double beta);
+
   void addToPictureLsit( list<EncRCPic*>& listPreviousPictures );
   double calAverageQP();
   double calAverageLambda();
@@ -347,7 +354,7 @@ public:
   uint32_t       getCpbSize()               { return m_cpbSize;        }
   uint32_t       getBufferingRate()         { return m_bufferingRate;  }
   int        updateCpbState(int actualBits);
-  void       initHrdParam(const HRD* pcHrd, int iFrameRate, double fInitialCpbFullness);
+  void       initHrdParam(const HRDParameters* pcHrd, int iFrameRate, double fInitialCpbFullness);
 #endif
 
 private:
diff --git a/source/Lib/EncoderLib/SEIEncoder.cpp b/source/Lib/EncoderLib/SEIEncoder.cpp
index 748036209ad0e43a8a787e61b85fd30de2ee90cf..7e94dcd31fbca6d6ab8f90270b3d6185dc5873e6 100644
--- a/source/Lib/EncoderLib/SEIEncoder.cpp
+++ b/source/Lib/EncoderLib/SEIEncoder.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,29 +44,6 @@ std::string hashToString(const PictureHash &digest, int numChar);
 //! \ingroup EncoderLib
 //! \{
 
-#if HEVC_VPS
-void SEIEncoder::initSEIActiveParameterSets (SEIActiveParameterSets *seiActiveParameterSets, const VPS *vps, const SPS *sps)
-#else
-void SEIEncoder::initSEIActiveParameterSets (SEIActiveParameterSets *seiActiveParameterSets, const SPS *sps)
-#endif
-{
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiActiveParameterSets!=NULL), "Unspecified error");
-#if HEVC_VPS
-  CHECK(!(vps!=NULL), "Unspecified error");
-#endif
-  CHECK(!(sps!=NULL), "Unspecified error");
-
-#if HEVC_VPS
-  seiActiveParameterSets->activeVPSId = vps->getVPSId();
-#endif
-  seiActiveParameterSets->m_selfContainedCvsFlag = false;
-  seiActiveParameterSets->m_noParameterSetUpdateFlag = false;
-  seiActiveParameterSets->numSpsIdsMinus1 = 0;
-  seiActiveParameterSets->activeSeqParameterSetId.resize(seiActiveParameterSets->numSpsIdsMinus1 + 1);
-  seiActiveParameterSets->activeSeqParameterSetId[0] = sps->getSPSId();
-}
-
 void SEIEncoder::initSEIFramePacking(SEIFramePacking *seiFramePacking, int currPicNum)
 {
   CHECK(!(m_isInitialized), "Unspecified error");
@@ -93,178 +70,308 @@ void SEIEncoder::initSEIFramePacking(SEIFramePacking *seiFramePacking, int currP
   seiFramePacking->m_upsampledAspectRatio = 0;
 }
 
-void SEIEncoder::initSEISegmentedRectFramePacking(SEISegmentedRectFramePacking *seiSegmentedRectFramePacking)
+void SEIEncoder::initSEIBufferingPeriod(SEIBufferingPeriod *bufferingPeriodSEI, bool noLeadingPictures)
 {
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiSegmentedRectFramePacking!=NULL), "Unspecified error");
+  CHECK(!(m_isInitialized), "bufferingPeriodSEI already initialized");
+  CHECK(!(bufferingPeriodSEI != nullptr), "Need a bufferingPeriodSEI for initialization (got nullptr)");
 
-  seiSegmentedRectFramePacking->m_arrangementCancelFlag = m_pcCfg->getSegmentedRectFramePackingArrangementSEICancel();
-  seiSegmentedRectFramePacking->m_contentInterpretationType = m_pcCfg->getSegmentedRectFramePackingArrangementSEIType();
-  seiSegmentedRectFramePacking->m_arrangementPersistenceFlag = m_pcCfg->getSegmentedRectFramePackingArrangementSEIPersistence();
+  uint32_t uiInitialCpbRemovalDelay = (90000/2);                      // 0.5 sec
+  bufferingPeriodSEI->m_bpNalCpbParamsPresentFlag = true;
+  bufferingPeriodSEI->m_bpVclCpbParamsPresentFlag = true;
+  bufferingPeriodSEI->m_bpMaxSubLayers = m_pcCfg->getMaxTempLayer() ;
+  bufferingPeriodSEI->m_bpCpbCnt = 1;
+  for(int i=0; i < bufferingPeriodSEI->m_bpMaxSubLayers; i++)
+  {
+    for(int j=0; j < bufferingPeriodSEI->m_bpCpbCnt; j++)
+    {
+      bufferingPeriodSEI->m_initialCpbRemovalDelay[j][i][0] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalDelay[j][i][1] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalOffset[j][i][0] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalOffset[j][i][1] = uiInitialCpbRemovalDelay;
+    }
+  }
+  // We don't set concatenation_flag here. max_initial_removal_delay_for_concatenation depends on the usage scenario.
+  // The parameters could be added to config file, but as long as the initialisation of generic buffering parameters is
+  // not controllable, it does not seem to make sense to provide settings for these.
+  bufferingPeriodSEI->m_concatenationFlag = false;
+  bufferingPeriodSEI->m_maxInitialRemovalDelayForConcatenation = uiInitialCpbRemovalDelay;
+
+  bufferingPeriodSEI->m_bpDecodingUnitHrdParamsPresentFlag = m_pcCfg->getNoPicPartitionFlag() == false;
+  bufferingPeriodSEI->m_decodingUnitCpbParamsInPicTimingSeiFlag = !m_pcCfg->getDecodingUnitInfoSEIEnabled();
+
+  bufferingPeriodSEI->m_initialCpbRemovalDelayLength = 16;                  // assuming 0.5 sec, log2( 90,000 * 0.5 ) = 16-bit
+  // Note: The following parameters require some knowledge about the GOP structure.
+  //       Using getIntraPeriod() should be avoided though, because it assumes certain GOP
+  //       properties, which are only valid in CTC.
+  //       Still copying this setting from HM for consistency, improvements welcome
+  bool isRandomAccess  = m_pcCfg->getIntraPeriod() > 0;
+  if( isRandomAccess )
+  {
+    bufferingPeriodSEI->m_cpbRemovalDelayLength = 6;                        // 32 = 2^5 (plus 1)
+    bufferingPeriodSEI->m_dpbOutputDelayLength =  6;                        // 32 + 3 = 2^6
+  }
+  else
+  {
+    bufferingPeriodSEI->m_cpbRemovalDelayLength = 9;                        // max. 2^10
+    bufferingPeriodSEI->m_dpbOutputDelayLength =  9;                        // max. 2^10
+  }
+  bufferingPeriodSEI->m_duCpbRemovalDelayIncrementLength = 7;               // ceil( log2( tick_divisor_minus2 + 2 ) )
+  bufferingPeriodSEI->m_dpbOutputDelayDuLength = bufferingPeriodSEI->m_dpbOutputDelayLength + bufferingPeriodSEI->m_duCpbRemovalDelayIncrementLength;
+  //for the concatenation, it can be set to one during splicing.
+  bufferingPeriodSEI->m_concatenationFlag = 0;
+  //since the temporal layer HRDParameters is not ready, we assumed it is fixed
+  bufferingPeriodSEI->m_auCpbRemovalDelayDelta = 1;
+  bufferingPeriodSEI->m_cpbRemovalDelayDeltasPresentFlag = m_pcCfg->getBpDeltasGOPStructure() ;
+  if (bufferingPeriodSEI->m_cpbRemovalDelayDeltasPresentFlag)
+  {
+    switch (m_pcCfg->getGOPSize())
+    {
+      case 8:
+      {
+        if (noLeadingPictures)
+        {
+          bufferingPeriodSEI->m_numCpbRemovalDelayDeltas         = 5;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[0]          = 1;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[1]          = 2;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[2]          = 3;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[3]          = 6;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[4]          = 7;
+        }
+        else
+        {
+          bufferingPeriodSEI->m_numCpbRemovalDelayDeltas         = 3;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[0]          = 1;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[1]          = 2;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[2]          = 3;
+        }
+      }
+        break;
+      case 16:
+      {
+        if (noLeadingPictures)
+        {
+          bufferingPeriodSEI->m_numCpbRemovalDelayDeltas         = 9;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[0]          = 1;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[1]          = 2;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[2]          = 3;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[3]          = 4;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[4]          = 6;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[5]          = 7;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[6]          = 9;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[7]          = 14;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[8]          = 15;
+        }
+        else
+        {
+          bufferingPeriodSEI->m_numCpbRemovalDelayDeltas         = 5;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[0]          = 1;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[1]          = 2;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[2]          = 3;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[3]          = 6;
+          bufferingPeriodSEI->m_cpbRemovalDelayDelta[4]          = 7;
+        }
+      }
+        break;
+      default:
+      {
+        THROW("m_cpbRemovalDelayDelta not applicable for the GOP size");
+      }
+        break;
+    }
+  }
+  // A commercial encoder should track the buffer state for all layers and sub-layers
+  // to ensure CPB conformance. Such tracking is required for calculating alternative
+  // CPB parameters.
+  // Unfortunately VTM does not have such tracking. Thus we cannot encode alternative 
+  // CPB parameters here.
+  bufferingPeriodSEI->m_altCpbParamsPresentFlag = false;
+  bufferingPeriodSEI->m_useAltCpbParamsFlag = false;
 }
 
-void SEIEncoder::initSEIDisplayOrientation(SEIDisplayOrientation* seiDisplayOrientation)
+void SEIEncoder::initSEIErp(SEIEquirectangularProjection* seiEquirectangularProjection)
 {
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiDisplayOrientation!=NULL), "Unspecified error");
+  CHECK(!(m_isInitialized), "seiEquirectangularProjection already initialized");
+  CHECK(!(seiEquirectangularProjection != nullptr), "Need a seiEquirectangularProjection for initialization (got nullptr)");
 
-  seiDisplayOrientation->cancelFlag = false;
-  seiDisplayOrientation->horFlip = false;
-  seiDisplayOrientation->verFlip = false;
-  seiDisplayOrientation->anticlockwiseRotation = m_pcCfg->getDisplayOrientationSEIAngle();
+  seiEquirectangularProjection->m_erpCancelFlag = m_pcCfg->getErpSEICancelFlag();
+  if (!seiEquirectangularProjection->m_erpCancelFlag)
+  {
+    seiEquirectangularProjection->m_erpPersistenceFlag   = m_pcCfg->getErpSEIPersistenceFlag();
+    seiEquirectangularProjection->m_erpGuardBandFlag     = m_pcCfg->getErpSEIGuardBandFlag();
+    if (seiEquirectangularProjection->m_erpGuardBandFlag == 1)
+    {
+      seiEquirectangularProjection->m_erpGuardBandType       = m_pcCfg->getErpSEIGuardBandType();
+      seiEquirectangularProjection->m_erpLeftGuardBandWidth  = m_pcCfg->getErpSEILeftGuardBandWidth();
+      seiEquirectangularProjection->m_erpRightGuardBandWidth = m_pcCfg->getErpSEIRightGuardBandWidth();
+    }
+  }
 }
 
-void SEIEncoder::initSEIToneMappingInfo(SEIToneMappingInfo *seiToneMappingInfo)
+void SEIEncoder::initSEISphereRotation(SEISphereRotation* seiSphereRotation)
 {
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiToneMappingInfo!=NULL), "Unspecified error");
+  CHECK(!(m_isInitialized), "seiSphereRotation already initialized");
+  CHECK(!(seiSphereRotation != nullptr), "Need a seiSphereRotation for initialization (got nullptr)");
 
-  seiToneMappingInfo->m_toneMapId = m_pcCfg->getTMISEIToneMapId();
-  seiToneMappingInfo->m_toneMapCancelFlag = m_pcCfg->getTMISEIToneMapCancelFlag();
-  seiToneMappingInfo->m_toneMapPersistenceFlag = m_pcCfg->getTMISEIToneMapPersistenceFlag();
+  seiSphereRotation->m_sphereRotationCancelFlag = m_pcCfg->getSphereRotationSEICancelFlag();
+  if ( !seiSphereRotation->m_sphereRotationCancelFlag )
+  {
+    seiSphereRotation->m_sphereRotationPersistenceFlag = m_pcCfg->getSphereRotationSEIPersistenceFlag();
+    seiSphereRotation->m_sphereRotationYaw = m_pcCfg->getSphereRotationSEIYaw();
+    seiSphereRotation->m_sphereRotationPitch = m_pcCfg->getSphereRotationSEIPitch();
+    seiSphereRotation->m_sphereRotationRoll = m_pcCfg->getSphereRotationSEIRoll();
+  }
+}
 
-  seiToneMappingInfo->m_codedDataBitDepth = m_pcCfg->getTMISEICodedDataBitDepth();
-  CHECK(!(seiToneMappingInfo->m_codedDataBitDepth >= 8 && seiToneMappingInfo->m_codedDataBitDepth <= 14), "Unspecified error");
-  seiToneMappingInfo->m_targetBitDepth = m_pcCfg->getTMISEITargetBitDepth();
-  CHECK(!(seiToneMappingInfo->m_targetBitDepth >= 1 && seiToneMappingInfo->m_targetBitDepth <= 17), "Unspecified error");
-  seiToneMappingInfo->m_modelId = m_pcCfg->getTMISEIModelID();
-  CHECK(!(seiToneMappingInfo->m_modelId >=0 &&seiToneMappingInfo->m_modelId<=4), "Unspecified error");
+void SEIEncoder::initSEIOmniViewport(SEIOmniViewport* seiOmniViewport)
+{
+  CHECK(!(m_isInitialized), "seiOmniViewport already initialized");
+  CHECK(!(seiOmniViewport != nullptr), "Need a seiOmniViewport for initialization (got nullptr)");
 
-  switch( seiToneMappingInfo->m_modelId)
+  seiOmniViewport->m_omniViewportId = m_pcCfg->getOmniViewportSEIId();
+  seiOmniViewport->m_omniViewportCancelFlag = m_pcCfg->getOmniViewportSEICancelFlag();
+  if ( !seiOmniViewport->m_omniViewportCancelFlag )
   {
-  case 0:
-    {
-      seiToneMappingInfo->m_minValue = m_pcCfg->getTMISEIMinValue();
-      seiToneMappingInfo->m_maxValue = m_pcCfg->getTMISEIMaxValue();
-      break;
-    }
-  case 1:
+    seiOmniViewport->m_omniViewportPersistenceFlag = m_pcCfg->getOmniViewportSEIPersistenceFlag();
+    seiOmniViewport->m_omniViewportCntMinus1 = m_pcCfg->getOmniViewportSEICntMinus1();
+
+    seiOmniViewport->m_omniViewportRegions.resize(seiOmniViewport->m_omniViewportCntMinus1+1);
+    for (uint32_t i = 0; i <= seiOmniViewport->m_omniViewportCntMinus1; i++)
     {
-      seiToneMappingInfo->m_sigmoidMidpoint = m_pcCfg->getTMISEISigmoidMidpoint();
-      seiToneMappingInfo->m_sigmoidWidth = m_pcCfg->getTMISEISigmoidWidth();
-      break;
+      SEIOmniViewport::OmniViewport &viewport = seiOmniViewport->m_omniViewportRegions[i];
+      viewport.azimuthCentre   = m_pcCfg->getOmniViewportSEIAzimuthCentre(i);
+      viewport.elevationCentre = m_pcCfg->getOmniViewportSEIElevationCentre(i);
+      viewport.tiltCentre      = m_pcCfg->getOmniViewportSEITiltCentre(i);
+      viewport.horRange        = m_pcCfg->getOmniViewportSEIHorRange(i);
+      viewport.verRange        = m_pcCfg->getOmniViewportSEIVerRange(i);
     }
-  case 2:
+  }
+}
+
+void SEIEncoder::initSEIRegionWisePacking(SEIRegionWisePacking *seiRegionWisePacking)
+{
+  CHECK(!(m_isInitialized), "seiRegionWisePacking already initialized");
+  CHECK(!(seiRegionWisePacking != nullptr), "Need a seiRegionWisePacking for initialization (got nullptr)");
+
+  seiRegionWisePacking->m_rwpCancelFlag                          = m_pcCfg->getRwpSEIRwpCancelFlag();
+  seiRegionWisePacking->m_rwpPersistenceFlag                     = m_pcCfg->getRwpSEIRwpPersistenceFlag();
+  seiRegionWisePacking->m_constituentPictureMatchingFlag         = m_pcCfg->getRwpSEIConstituentPictureMatchingFlag();
+  seiRegionWisePacking->m_numPackedRegions                       = m_pcCfg->getRwpSEINumPackedRegions();
+  seiRegionWisePacking->m_projPictureWidth                       = m_pcCfg->getRwpSEIProjPictureWidth();
+  seiRegionWisePacking->m_projPictureHeight                      = m_pcCfg->getRwpSEIProjPictureHeight();
+  seiRegionWisePacking->m_packedPictureWidth                     = m_pcCfg->getRwpSEIPackedPictureWidth();
+  seiRegionWisePacking->m_packedPictureHeight                    = m_pcCfg->getRwpSEIPackedPictureHeight();
+  seiRegionWisePacking->m_rwpTransformType.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpGuardBandFlag.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_projRegionWidth.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_projRegionHeight.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpProjRegionTop.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_projRegionLeft.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_packedRegionWidth.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_packedRegionHeight.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_packedRegionTop.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_packedRegionLeft.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpLeftGuardBandWidth.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpRightGuardBandWidth.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpTopGuardBandHeight.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpBottomGuardBandHeight.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpGuardBandNotUsedForPredFlag.resize(seiRegionWisePacking->m_numPackedRegions);
+  seiRegionWisePacking->m_rwpGuardBandType.resize(4*seiRegionWisePacking->m_numPackedRegions); 
+  for( int i=0; i < seiRegionWisePacking->m_numPackedRegions; i++ )
+  {
+    seiRegionWisePacking->m_rwpTransformType[i]                  = m_pcCfg->getRwpSEIRwpTransformType(i);
+    seiRegionWisePacking->m_rwpGuardBandFlag[i]                  = m_pcCfg->getRwpSEIRwpGuardBandFlag(i);
+    seiRegionWisePacking->m_projRegionWidth[i]                   = m_pcCfg->getRwpSEIProjRegionWidth(i);
+    seiRegionWisePacking->m_projRegionHeight[i]                  = m_pcCfg->getRwpSEIProjRegionHeight(i);
+    seiRegionWisePacking->m_rwpProjRegionTop[i]                  = m_pcCfg->getRwpSEIRwpSEIProjRegionTop(i);
+    seiRegionWisePacking->m_projRegionLeft[i]                    = m_pcCfg->getRwpSEIProjRegionLeft(i);
+    seiRegionWisePacking->m_packedRegionWidth[i]                 = m_pcCfg->getRwpSEIPackedRegionWidth(i);
+    seiRegionWisePacking->m_packedRegionHeight[i]                = m_pcCfg->getRwpSEIPackedRegionHeight(i);
+    seiRegionWisePacking->m_packedRegionTop[i]                   = m_pcCfg->getRwpSEIPackedRegionTop(i);
+    seiRegionWisePacking->m_packedRegionLeft[i]                  = m_pcCfg->getRwpSEIPackedRegionLeft(i);
+    if( seiRegionWisePacking->m_rwpGuardBandFlag[i] )
     {
-      uint32_t num = 1u<<(seiToneMappingInfo->m_targetBitDepth);
-      seiToneMappingInfo->m_startOfCodedInterval.resize(num);
-      int* ptmp = m_pcCfg->getTMISEIStartOfCodedInterva();
-      if(ptmp)
+      seiRegionWisePacking->m_rwpLeftGuardBandWidth[i]           =  m_pcCfg->getRwpSEIRwpLeftGuardBandWidth(i);
+      seiRegionWisePacking->m_rwpRightGuardBandWidth[i]          =  m_pcCfg->getRwpSEIRwpRightGuardBandWidth(i);
+      seiRegionWisePacking->m_rwpTopGuardBandHeight[i]           =  m_pcCfg->getRwpSEIRwpTopGuardBandHeight(i);
+      seiRegionWisePacking->m_rwpBottomGuardBandHeight[i]        =  m_pcCfg->getRwpSEIRwpBottomGuardBandHeight(i);
+      seiRegionWisePacking->m_rwpGuardBandNotUsedForPredFlag[i]  =  m_pcCfg->getRwpSEIRwpGuardBandNotUsedForPredFlag(i);
+      for( int j=0; j < 4; j++ )
       {
-        for(int i=0; i<num;i++)
-        {
-          seiToneMappingInfo->m_startOfCodedInterval[i] = ptmp[i];
-        }
+        seiRegionWisePacking->m_rwpGuardBandType[i*4 + j]         =  m_pcCfg->getRwpSEIRwpGuardBandType(i*4 + j);
       }
-      break;
     }
-  case 3:
+  }
+}
+
+void SEIEncoder::initSEIGcmp(SEIGeneralizedCubemapProjection* seiGeneralizedCubemapProjection)
+{
+  CHECK(!(m_isInitialized), "seiGeneralizedCubemapProjection already initialized");
+  CHECK(!(seiGeneralizedCubemapProjection != nullptr), "Need a seiGeneralizedCubemapProjection for initialization (got nullptr)");
+
+  seiGeneralizedCubemapProjection->m_gcmpCancelFlag                      = m_pcCfg->getGcmpSEICancelFlag();
+  if (!seiGeneralizedCubemapProjection->m_gcmpCancelFlag)
+  {
+    seiGeneralizedCubemapProjection->m_gcmpPersistenceFlag               = m_pcCfg->getGcmpSEIPersistenceFlag();
+    seiGeneralizedCubemapProjection->m_gcmpPackingType                   = m_pcCfg->getGcmpSEIPackingType();
+    seiGeneralizedCubemapProjection->m_gcmpMappingFunctionType           = m_pcCfg->getGcmpSEIMappingFunctionType();
+
+    int numFace = seiGeneralizedCubemapProjection->m_gcmpPackingType == 4 || seiGeneralizedCubemapProjection->m_gcmpPackingType == 5 ? 5 : 6;
+    seiGeneralizedCubemapProjection->m_gcmpFaceIndex.resize(numFace);
+    seiGeneralizedCubemapProjection->m_gcmpFaceRotation.resize(numFace);
+    if (seiGeneralizedCubemapProjection->m_gcmpMappingFunctionType == 2)
     {
-      seiToneMappingInfo->m_numPivots = m_pcCfg->getTMISEINumPivots();
-      seiToneMappingInfo->m_codedPivotValue.resize(seiToneMappingInfo->m_numPivots);
-      seiToneMappingInfo->m_targetPivotValue.resize(seiToneMappingInfo->m_numPivots);
-      int* ptmpcoded = m_pcCfg->getTMISEICodedPivotValue();
-      int* ptmptarget = m_pcCfg->getTMISEITargetPivotValue();
-      if(ptmpcoded&&ptmptarget)
-      {
-        for(int i=0; i<(seiToneMappingInfo->m_numPivots);i++)
-        {
-          seiToneMappingInfo->m_codedPivotValue[i]=ptmpcoded[i];
-          seiToneMappingInfo->m_targetPivotValue[i]=ptmptarget[i];
-        }
-      }
-      break;
+      seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffU.resize(numFace);
+      seiGeneralizedCubemapProjection->m_gcmpFunctionUAffectedByVFlag.resize(numFace);
+      seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffV.resize(numFace);
+      seiGeneralizedCubemapProjection->m_gcmpFunctionVAffectedByUFlag.resize(numFace);
     }
-  case 4:
+    for (int i = 0; i < numFace; i++)
     {
-      seiToneMappingInfo->m_cameraIsoSpeedIdc = m_pcCfg->getTMISEICameraIsoSpeedIdc();
-      seiToneMappingInfo->m_cameraIsoSpeedValue = m_pcCfg->getTMISEICameraIsoSpeedValue();
-      CHECK(!( seiToneMappingInfo->m_cameraIsoSpeedValue !=0 ), "Unspecified error");
-      seiToneMappingInfo->m_exposureIndexIdc = m_pcCfg->getTMISEIExposurIndexIdc();
-      seiToneMappingInfo->m_exposureIndexValue = m_pcCfg->getTMISEIExposurIndexValue();
-      CHECK(!( seiToneMappingInfo->m_exposureIndexValue !=0 ), "Unspecified error");
-      seiToneMappingInfo->m_exposureCompensationValueSignFlag = m_pcCfg->getTMISEIExposureCompensationValueSignFlag();
-      seiToneMappingInfo->m_exposureCompensationValueNumerator = m_pcCfg->getTMISEIExposureCompensationValueNumerator();
-      seiToneMappingInfo->m_exposureCompensationValueDenomIdc = m_pcCfg->getTMISEIExposureCompensationValueDenomIdc();
-      seiToneMappingInfo->m_refScreenLuminanceWhite = m_pcCfg->getTMISEIRefScreenLuminanceWhite();
-      seiToneMappingInfo->m_extendedRangeWhiteLevel = m_pcCfg->getTMISEIExtendedRangeWhiteLevel();
-      CHECK(!( seiToneMappingInfo->m_extendedRangeWhiteLevel >= 100 ), "Unspecified error");
-      seiToneMappingInfo->m_nominalBlackLevelLumaCodeValue = m_pcCfg->getTMISEINominalBlackLevelLumaCodeValue();
-      seiToneMappingInfo->m_nominalWhiteLevelLumaCodeValue = m_pcCfg->getTMISEINominalWhiteLevelLumaCodeValue();
-      CHECK(!( seiToneMappingInfo->m_nominalWhiteLevelLumaCodeValue > seiToneMappingInfo->m_nominalBlackLevelLumaCodeValue ), "Unspecified error");
-      seiToneMappingInfo->m_extendedWhiteLevelLumaCodeValue = m_pcCfg->getTMISEIExtendedWhiteLevelLumaCodeValue();
-      CHECK(!( seiToneMappingInfo->m_extendedWhiteLevelLumaCodeValue >= seiToneMappingInfo->m_nominalWhiteLevelLumaCodeValue ), "Unspecified error");
-      break;
+      seiGeneralizedCubemapProjection->m_gcmpFaceIndex[i]                = m_pcCfg->getGcmpSEIFaceIndex(i);
+      seiGeneralizedCubemapProjection->m_gcmpFaceRotation[i]             = m_pcCfg->getGcmpSEIFaceRotation(i);
+      if (seiGeneralizedCubemapProjection->m_gcmpMappingFunctionType == 2)
+      {
+        seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffU[i]           = std::max<uint8_t>(1, (uint8_t)(128.0 * m_pcCfg->getGcmpSEIFunctionCoeffU(i) + 0.5)) - 1;
+        seiGeneralizedCubemapProjection->m_gcmpFunctionUAffectedByVFlag[i] = m_pcCfg->getGcmpSEIFunctionUAffectedByVFlag(i);
+        seiGeneralizedCubemapProjection->m_gcmpFunctionCoeffV[i]           = std::max<uint8_t>(1, (uint8_t)(128.0 * m_pcCfg->getGcmpSEIFunctionCoeffV(i) + 0.5)) - 1;
+        seiGeneralizedCubemapProjection->m_gcmpFunctionVAffectedByUFlag[i] = m_pcCfg->getGcmpSEIFunctionVAffectedByUFlag(i);
+      }
     }
-  default:
+
+    seiGeneralizedCubemapProjection->m_gcmpGuardBandFlag                 = m_pcCfg->getGcmpSEIGuardBandFlag();
+    if (seiGeneralizedCubemapProjection->m_gcmpGuardBandFlag)
     {
-      CHECK(!(!"Undefined SEIToneMapModelId"), "Unspecified error");
-      break;
+      seiGeneralizedCubemapProjection->m_gcmpGuardBandBoundaryType       = m_pcCfg->getGcmpSEIGuardBandBoundaryType();
+      seiGeneralizedCubemapProjection->m_gcmpGuardBandSamplesMinus1      = m_pcCfg->getGcmpSEIGuardBandSamplesMinus1();
     }
   }
 }
 
-void SEIEncoder::initSEISOPDescription(SEISOPDescription *sopDescriptionSEI, Slice *slice, int picInGOP, int lastIdr, int currGOPSize)
+void SEIEncoder::initSEISampleAspectRatioInfo(SEISampleAspectRatioInfo* seiSampleAspectRatioInfo)
 {
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(sopDescriptionSEI != NULL), "Unspecified error");
-  CHECK(!(slice != NULL), "Unspecified error");
-
-  int sopCurrPOC = slice->getPOC();
-  sopDescriptionSEI->m_sopSeqParameterSetId = slice->getSPS()->getSPSId();
+  CHECK(!(m_isInitialized), "seiSampleAspectRatioInfo already initialized");
+  CHECK(!(seiSampleAspectRatioInfo != nullptr), "Need a seiSampleAspectRatioInfo for initialization (got nullptr)");
 
-  int i = 0;
-  int prevEntryId = picInGOP;
-  for (int j = picInGOP; j < currGOPSize; j++)
+  seiSampleAspectRatioInfo->m_sariCancelFlag = m_pcCfg->getSariCancelFlag();
+  if (!seiSampleAspectRatioInfo->m_sariCancelFlag)
   {
-    int deltaPOC = m_pcCfg->getGOPEntry(j).m_POC - m_pcCfg->getGOPEntry(prevEntryId).m_POC;
-    if ((sopCurrPOC + deltaPOC) < m_pcCfg->getFramesToBeEncoded())
+    seiSampleAspectRatioInfo->m_sariPersistenceFlag   = m_pcCfg->getSariPersistenceFlag();
+    seiSampleAspectRatioInfo->m_sariAspectRatioIdc    = m_pcCfg->getSariAspectRatioIdc();
+    if (seiSampleAspectRatioInfo->m_sariAspectRatioIdc == 255)
+    {
+      seiSampleAspectRatioInfo->m_sariSarWidth   = m_pcCfg->getSariSarWidth();
+      seiSampleAspectRatioInfo->m_sariSarHeight  = m_pcCfg->getSariSarHeight();
+    }
+    else
     {
-      sopCurrPOC += deltaPOC;
-      sopDescriptionSEI->m_sopDescVclNaluType[i] = m_pcEncGOP->getNalUnitType(sopCurrPOC, lastIdr, slice->getPic()->fieldPic);
-      sopDescriptionSEI->m_sopDescTemporalId[i] = m_pcCfg->getGOPEntry(j).m_temporalId;
-      sopDescriptionSEI->m_sopDescStRpsIdx[i] = m_pcEncLib->getReferencePictureSetIdxForSOP(sopCurrPOC, j);
-      sopDescriptionSEI->m_sopDescPocDelta[i] = deltaPOC;
-
-      prevEntryId = j;
-      i++;
+      seiSampleAspectRatioInfo->m_sariSarWidth   = 0;
+      seiSampleAspectRatioInfo->m_sariSarHeight  = 0;
     }
   }
-
-  sopDescriptionSEI->m_numPicsInSopMinus1 = i - 1;
 }
 
-void SEIEncoder::initSEIBufferingPeriod(SEIBufferingPeriod *bufferingPeriodSEI, Slice *slice)
-{
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(bufferingPeriodSEI != NULL), "Unspecified error");
-  CHECK(!(slice != NULL), "Unspecified error");
-
-  uint32_t uiInitialCpbRemovalDelay = (90000/2);                      // 0.5 sec
-  bufferingPeriodSEI->m_initialCpbRemovalDelay      [0][0]     = uiInitialCpbRemovalDelay;
-  bufferingPeriodSEI->m_initialCpbRemovalDelayOffset[0][0]     = uiInitialCpbRemovalDelay;
-  bufferingPeriodSEI->m_initialCpbRemovalDelay      [0][1]     = uiInitialCpbRemovalDelay;
-  bufferingPeriodSEI->m_initialCpbRemovalDelayOffset[0][1]     = uiInitialCpbRemovalDelay;
-
-  double dTmp = (double)slice->getSPS()->getVuiParameters()->getTimingInfo()->getNumUnitsInTick() / (double)slice->getSPS()->getVuiParameters()->getTimingInfo()->getTimeScale();
-
-  uint32_t uiTmp = (uint32_t)( dTmp * 90000.0 );
-  uiInitialCpbRemovalDelay -= uiTmp;
-  uiInitialCpbRemovalDelay -= uiTmp / ( slice->getSPS()->getVuiParameters()->getHrdParameters()->getTickDivisorMinus2() + 2 );
-  bufferingPeriodSEI->m_initialAltCpbRemovalDelay      [0][0]  = uiInitialCpbRemovalDelay;
-  bufferingPeriodSEI->m_initialAltCpbRemovalDelayOffset[0][0]  = uiInitialCpbRemovalDelay;
-  bufferingPeriodSEI->m_initialAltCpbRemovalDelay      [0][1]  = uiInitialCpbRemovalDelay;
-  bufferingPeriodSEI->m_initialAltCpbRemovalDelayOffset[0][1]  = uiInitialCpbRemovalDelay;
-
-  bufferingPeriodSEI->m_rapCpbParamsPresentFlag = 0;
-  //for the concatenation, it can be set to one during splicing.
-  bufferingPeriodSEI->m_concatenationFlag = 0;
-  //since the temporal layer HRD is not ready, we assumed it is fixed
-  bufferingPeriodSEI->m_auCpbRemovalDelayDelta = 1;
-  bufferingPeriodSEI->m_cpbDelayOffset = 0;
-  bufferingPeriodSEI->m_dpbDelayOffset = 0;
-}
 
+#if HEVC_SEI
 //! initialize scalable nesting SEI message.
 //! Note: The SEI message structures input into this function will become part of the scalable nesting SEI and will be
 //!       automatically freed, when the nesting SEI is disposed.
@@ -298,7 +405,7 @@ void SEIEncoder::initSEIRecoveryPoint(SEIRecoveryPoint *recoveryPointSEI, Slice
   recoveryPointSEI->m_exactMatchingFlag = ( slice->getPOC() == 0 ) ? (true) : (false);
   recoveryPointSEI->m_brokenLinkFlag    = false;
 }
-
+#endif
 
 //! calculate hashes for entire reconstructed picture
 void SEIEncoder::initDecodedPictureHashSEI(SEIDecodedPictureHash *decodedPictureHashSEI, PelUnitBuf& pic, std::string &rHashString, const BitDepths &bitDepths)
@@ -331,6 +438,13 @@ void SEIEncoder::initDecodedPictureHashSEI(SEIDecodedPictureHash *decodedPicture
   }
 }
 
+void SEIEncoder::initSEIDependentRAPIndication(SEIDependentRAPIndication *seiDependentRAPIndication)
+{
+  CHECK(!(m_isInitialized), "Unspecified error");
+  CHECK(!(seiDependentRAPIndication!=NULL), "Unspecified error");
+}
+
+#if HEVC_SEI
 void SEIEncoder::initTemporalLevel0IndexSEI(SEITemporalLevel0Index *temporalLevel0IndexSEI, Slice *slice)
 {
   CHECK(!(m_isInitialized), "Unspecified error");
@@ -350,14 +464,13 @@ void SEIEncoder::initTemporalLevel0IndexSEI(SEITemporalLevel0Index *temporalLeve
   temporalLevel0IndexSEI->rapIdx = m_rapIdx;
 }
 
-#if HEVC_TILES_WPP
 void SEIEncoder::initSEITempMotionConstrainedTileSets (SEITempMotionConstrainedTileSets *sei, const PPS *pps)
 {
   CHECK(!(m_isInitialized), "Unspecified error");
   CHECK(!(sei!=NULL), "Unspecified error");
   CHECK(!(pps!=NULL), "Unspecified error");
 
-  if(pps->getTilesEnabledFlag())
+  if(!pps->getSingleTileInPicFlag())
   {
     if (m_pcCfg->getMCTSEncConstraint())
     {
@@ -395,7 +508,6 @@ void SEIEncoder::initSEITempMotionConstrainedTileSets (SEITempMotionConstrainedT
     CHECK(!(!"Tile is not enabled"), "Unspecified error");
   }
 }
-#endif
 
 void SEIEncoder::initSEIKneeFunctionInfo(SEIKneeFunctionInfo *seiKneeFunctionInfo)
 {
@@ -427,6 +539,7 @@ void SEIEncoder::initSEIKneeFunctionInfo(SEIKneeFunctionInfo *seiKneeFunctionInf
     }
   }
 }
+#endif
 
 template <typename T>
 static void readTokenValue(T            &returnedValue, /// value returned
@@ -496,204 +609,117 @@ static void readTokenValueAndValidate(T            &returnedValue, /// value ret
   }
 }
 
-// bool version does not have maximum and minimum values.
-static void readTokenValueAndValidate(bool         &returnedValue, /// value returned
-                                      bool         &failed,        /// used and updated
-                                      std::istream &is,            /// stream to read token from
-                                      const char  *pToken)        /// token string
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+void SEIEncoder::initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *seiAltTransCharacteristics)
 {
-  readTokenValue(returnedValue, failed, is, pToken);
+  CHECK(!(m_isInitialized), "Unspecified error");
+  CHECK(!(seiAltTransCharacteristics!=NULL), "Unspecified error");
+  //  Set SEI message parameters read from command line options
+  seiAltTransCharacteristics->m_preferredTransferCharacteristics = m_pcCfg->getSEIPreferredTransferCharacteristics();
 }
-
-bool SEIEncoder::initSEIColourRemappingInfo(SEIColourRemappingInfo* seiColourRemappingInfo, int currPOC) // returns true on success, false on failure.
+#endif
+void SEIEncoder::initSEIFilmGrainCharacteristics(SEIFilmGrainCharacteristics *seiFilmGrain)
 {
   CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiColourRemappingInfo!=NULL), "Unspecified error");
-
-  // reading external Colour Remapping Information SEI message parameters from file
-  if( !m_pcCfg->getColourRemapInfoSEIFileRoot().empty())
+  CHECK(!(seiFilmGrain != NULL), "Unspecified error");
+  //  Set SEI message parameters read from command line options
+  seiFilmGrain->m_filmGrainCharacteristicsCancelFlag      = m_pcCfg->getFilmGrainCharactersticsSEICancelFlag();
+  seiFilmGrain->m_filmGrainCharacteristicsPersistenceFlag = m_pcCfg->getFilmGrainCharactersticsSEIPersistenceFlag();
+  seiFilmGrain->m_filmGrainModelId                        = m_pcCfg->getFilmGrainCharactersticsSEIModelID();
+  seiFilmGrain->m_separateColourDescriptionPresentFlag    = m_pcCfg->getFilmGrainCharactersticsSEISepColourDescPresent();
+  seiFilmGrain->m_blendingModeId                          = m_pcCfg->getFilmGrainCharactersticsSEIBlendingModeID();
+  seiFilmGrain->m_log2ScaleFactor                         = m_pcCfg->getFilmGrainCharactersticsSEILog2ScaleFactor();
+  for (int i = 0; i < MAX_NUM_COMPONENT; i++)
   {
-    bool failed=false;
-
-    // building the CRI file name with poc num in prefix "_poc.txt"
-    std::string colourRemapSEIFileWithPoc(m_pcCfg->getColourRemapInfoSEIFileRoot());
-    {
-      std::stringstream suffix;
-      suffix << "_" << currPOC << ".txt";
-      colourRemapSEIFileWithPoc+=suffix.str();
-    }
-
-    std::ifstream fic(colourRemapSEIFileWithPoc.c_str());
-    if (!fic.good() || !fic.is_open())
-    {
-      std::cerr <<  "No Colour Remapping Information SEI parameters file " << colourRemapSEIFileWithPoc << " for POC " << currPOC << std::endl;
-      return false;
-    }
-
-    // TODO: identify and remove duplication with decoder parsing through abstraction.
-
-    readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapId,         failed, fic, "colour_remap_id",        uint32_t(0), uint32_t(0x7fffffff) );
-    readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapCancelFlag, failed, fic, "colour_remap_cancel_flag" );
-    if( !seiColourRemappingInfo->m_colourRemapCancelFlag )
-    {
-      readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapPersistenceFlag,            failed, fic, "colour_remap_persistence_flag" );
-      readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapVideoSignalInfoPresentFlag, failed, fic, "colour_remap_video_signal_info_present_flag");
-      if( seiColourRemappingInfo->m_colourRemapVideoSignalInfoPresentFlag )
-      {
-        readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapFullRangeFlag,      failed, fic, "colour_remap_full_range_flag" );
-        readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapPrimaries,          failed, fic, "colour_remap_primaries",           int(0), int(255) );
-        readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapTransferFunction,   failed, fic, "colour_remap_transfer_function",   int(0), int(255) );
-        readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapMatrixCoefficients, failed, fic, "colour_remap_matrix_coefficients", int(0), int(255) );
-      }
-      readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapInputBitDepth, failed, fic, "colour_remap_input_bit_depth",            int(8), int(16) );
-      readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapBitDepth,      failed, fic, "colour_remap_bit_depth",                  int(8), int(16) );
-
-      const int maximumInputValue    = (1 << (((seiColourRemappingInfo->m_colourRemapInputBitDepth + 7) >> 3) << 3)) - 1;
-      const int maximumRemappedValue = (1 << (((seiColourRemappingInfo->m_colourRemapBitDepth      + 7) >> 3) << 3)) - 1;
-
-      for( int c=0 ; c<3 ; c++ )
-      {
-        readTokenValueAndValidate(seiColourRemappingInfo->m_preLutNumValMinus1[c],         failed, fic, "pre_lut_num_val_minus1[c]",        int(0), int(32) );
-        if( seiColourRemappingInfo->m_preLutNumValMinus1[c]>0 )
-        {
-          seiColourRemappingInfo->m_preLut[c].resize(seiColourRemappingInfo->m_preLutNumValMinus1[c]+1);
-          for( int i=0 ; i<=seiColourRemappingInfo->m_preLutNumValMinus1[c] ; i++ )
-          {
-            readTokenValueAndValidate(seiColourRemappingInfo->m_preLut[c][i].codedValue,   failed, fic, "pre_lut_coded_value[c][i]",  int(0), maximumInputValue    );
-            readTokenValueAndValidate(seiColourRemappingInfo->m_preLut[c][i].targetValue,  failed, fic, "pre_lut_target_value[c][i]", int(0), maximumRemappedValue );
-          }
-        }
-      }
-      readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapMatrixPresentFlag, failed, fic, "colour_remap_matrix_present_flag" );
-      if( seiColourRemappingInfo->m_colourRemapMatrixPresentFlag )
-      {
-        readTokenValueAndValidate(seiColourRemappingInfo->m_log2MatrixDenom, failed, fic, "log2_matrix_denom", int(0), int(15) );
-        for( int c=0 ; c<3 ; c++ )
-        {
-          for( int i=0 ; i<3 ; i++ )
-          {
-            readTokenValueAndValidate(seiColourRemappingInfo->m_colourRemapCoeffs[c][i], failed, fic, "colour_remap_coeffs[c][i]", -32768, 32767 );
-          }
-        }
-      }
-      for( int c=0 ; c<3 ; c++ )
-      {
-        readTokenValueAndValidate(seiColourRemappingInfo->m_postLutNumValMinus1[c], failed, fic, "post_lut_num_val_minus1[c]", int(0), int(32) );
-        if( seiColourRemappingInfo->m_postLutNumValMinus1[c]>0 )
-        {
-          seiColourRemappingInfo->m_postLut[c].resize(seiColourRemappingInfo->m_postLutNumValMinus1[c]+1);
-          for( int i=0 ; i<=seiColourRemappingInfo->m_postLutNumValMinus1[c] ; i++ )
-          {
-            readTokenValueAndValidate(seiColourRemappingInfo->m_postLut[c][i].codedValue,  failed, fic, "post_lut_coded_value[c][i]",  int(0), maximumRemappedValue );
-            readTokenValueAndValidate(seiColourRemappingInfo->m_postLut[c][i].targetValue, failed, fic, "post_lut_target_value[c][i]", int(0), maximumRemappedValue );
-          }
-        }
-      }
-    }
+    seiFilmGrain->m_compModel[i].presentFlag = m_pcCfg->getFGCSEICompModelPresent(i);
+  }
+}
 
-    if( failed )
+void SEIEncoder::initSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume *seiMDCV)
+{
+  CHECK(!(m_isInitialized), "Unspecified error");
+  CHECK(!(seiMDCV != NULL), "Unspecified error");
+  //  Set SEI message parameters read from command line options
+  for (int j = 0; j <= 1; j++)
+  {
+    for (int i = 0; i <= 2; i++)
     {
-      EXIT( "Error while reading Colour Remapping Information SEI parameters file '" << colourRemapSEIFileWithPoc << "'" );
+       seiMDCV->values.primaries[i][j] = m_pcCfg->getMasteringDisplaySEI().primaries[i][j];
     }
+    seiMDCV->values.whitePoint[j] = m_pcCfg->getMasteringDisplaySEI().whitePoint[j];
   }
-  return true;
+  seiMDCV->values.maxLuminance = m_pcCfg->getMasteringDisplaySEI().maxLuminance;
+  seiMDCV->values.minLuminance = m_pcCfg->getMasteringDisplaySEI().minLuminance;
 }
 
-void SEIEncoder::initSEIChromaResamplingFilterHint(SEIChromaResamplingFilterHint *seiChromaResamplingFilterHint, int iHorFilterIndex, int iVerFilterIndex)
+void SEIEncoder::initSEIContentLightLevel(SEIContentLightLevelInfo *seiCLL)
 {
   CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiChromaResamplingFilterHint!=NULL), "Unspecified error");
+  CHECK(!(seiCLL != NULL), "Unspecified error");
+  //  Set SEI message parameters read from command line options
+  seiCLL->m_maxContentLightLevel    = m_pcCfg->getCLLSEIMaxContentLightLevel();
+  seiCLL->m_maxPicAverageLightLevel = m_pcCfg->getCLLSEIMaxPicAvgLightLevel();
+}
 
-  seiChromaResamplingFilterHint->m_verChromaFilterIdc = iVerFilterIndex;
-  seiChromaResamplingFilterHint->m_horChromaFilterIdc = iHorFilterIndex;
-  seiChromaResamplingFilterHint->m_verFilteringFieldProcessingFlag = 1;
-  seiChromaResamplingFilterHint->m_targetFormatIdc = 3;
-  seiChromaResamplingFilterHint->m_perfectReconstructionFlag = false;
+void SEIEncoder::initSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment *seiAmbViewEnvironment)
+{
+  CHECK(!(m_isInitialized), "Unspecified error");
+  CHECK(!(seiAmbViewEnvironment != NULL), "Unspecified error");
+  //  Set SEI message parameters read from command line options
+  seiAmbViewEnvironment->m_ambientIlluminance = m_pcCfg->getAmbientViewingEnvironmentSEIIlluminance();
+  seiAmbViewEnvironment->m_ambientLightX      = m_pcCfg->getAmbientViewingEnvironmentSEIAmbientLightX();
+  seiAmbViewEnvironment->m_ambientLightY      = m_pcCfg->getAmbientViewingEnvironmentSEIAmbientLightY();
+}
 
-  // this creates some example filter values, if explicit filter definition is selected
-  if (seiChromaResamplingFilterHint->m_verChromaFilterIdc == 1)
+void SEIEncoder::initSEIContentColourVolume(SEIContentColourVolume *seiContentColourVolume)
+{
+  assert(m_isInitialized);
+  assert(seiContentColourVolume != NULL);
+  seiContentColourVolume->m_ccvCancelFlag = m_pcCfg->getCcvSEICancelFlag();
+  seiContentColourVolume->m_ccvPersistenceFlag = m_pcCfg->getCcvSEIPersistenceFlag();
+
+  seiContentColourVolume->m_ccvPrimariesPresentFlag = m_pcCfg->getCcvSEIPrimariesPresentFlag();
+  seiContentColourVolume->m_ccvMinLuminanceValuePresentFlag = m_pcCfg->getCcvSEIMinLuminanceValuePresentFlag();
+  seiContentColourVolume->m_ccvMaxLuminanceValuePresentFlag = m_pcCfg->getCcvSEIMaxLuminanceValuePresentFlag();
+  seiContentColourVolume->m_ccvAvgLuminanceValuePresentFlag = m_pcCfg->getCcvSEIAvgLuminanceValuePresentFlag();
+
+  // Currently we are using a floor operation for setting up the "integer" values for this SEI.
+  // This applies to both primaries and luminance limits.
+  if (seiContentColourVolume->m_ccvPrimariesPresentFlag == true)
   {
-    const int numVerticalFilters = 3;
-    const int verTapLengthMinus1[] = {5,3,3};
-
-    seiChromaResamplingFilterHint->m_verFilterCoeff.resize(numVerticalFilters);
-    for(int i = 0; i < numVerticalFilters; i ++)
+    for (int i = 0; i < MAX_NUM_COMPONENT; i++)
     {
-      seiChromaResamplingFilterHint->m_verFilterCoeff[i].resize(verTapLengthMinus1[i]+1);
+      seiContentColourVolume->m_ccvPrimariesX[i] = (int32_t)(50000.0 * m_pcCfg->getCcvSEIPrimariesX(i));
+      seiContentColourVolume->m_ccvPrimariesY[i] = (int32_t)(50000.0 * m_pcCfg->getCcvSEIPrimariesY(i));
     }
-    // Note: C++11 -> seiChromaResamplingFilterHint->m_verFilterCoeff[0] = {-3,13,31,23,3,-3};
-    seiChromaResamplingFilterHint->m_verFilterCoeff[0][0] = -3;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[0][1] = 13;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[0][2] = 31;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[0][3] = 23;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[0][4] = 3;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[0][5] = -3;
-
-    seiChromaResamplingFilterHint->m_verFilterCoeff[1][0] = -1;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[1][1] = 25;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[1][2] = 247;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[1][3] = -15;
-
-    seiChromaResamplingFilterHint->m_verFilterCoeff[2][0] = -20;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[2][1] = 186;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[2][2] = 100;
-    seiChromaResamplingFilterHint->m_verFilterCoeff[2][3] = -10;
-  }
-  else
-  {
-    seiChromaResamplingFilterHint->m_verFilterCoeff.resize(0);
   }
 
-  if (seiChromaResamplingFilterHint->m_horChromaFilterIdc == 1)
+  if (seiContentColourVolume->m_ccvMinLuminanceValuePresentFlag == true)
   {
-    int const numHorizontalFilters = 1;
-    const int horTapLengthMinus1[] = {3};
-
-    seiChromaResamplingFilterHint->m_horFilterCoeff.resize(numHorizontalFilters);
-    for(int i = 0; i < numHorizontalFilters; i ++)
-    {
-      seiChromaResamplingFilterHint->m_horFilterCoeff[i].resize(horTapLengthMinus1[i]+1);
-    }
-    seiChromaResamplingFilterHint->m_horFilterCoeff[0][0] = 1;
-    seiChromaResamplingFilterHint->m_horFilterCoeff[0][1] = 6;
-    seiChromaResamplingFilterHint->m_horFilterCoeff[0][2] = 1;
+    seiContentColourVolume->m_ccvMinLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIMinLuminanceValue());
   }
-  else
+  if (seiContentColourVolume->m_ccvMaxLuminanceValuePresentFlag == true)
   {
-    seiChromaResamplingFilterHint->m_horFilterCoeff.resize(0);
+    seiContentColourVolume->m_ccvMaxLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIMaxLuminanceValue());
   }
-}
-
-void SEIEncoder::initSEITimeCode(SEITimeCode *seiTimeCode)
-{
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiTimeCode!=NULL), "Unspecified error");
-  //  Set data as per command line options
-  seiTimeCode->numClockTs = m_pcCfg->getNumberOfTimesets();
-  for(int i = 0; i < seiTimeCode->numClockTs; i++)
+  if (seiContentColourVolume->m_ccvAvgLuminanceValuePresentFlag == true)
   {
-    seiTimeCode->timeSetArray[i] = m_pcCfg->getTimeSet(i);
+    seiContentColourVolume->m_ccvAvgLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIAvgLuminanceValue());
   }
 }
-
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-void SEIEncoder::initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *seiAltTransCharacteristics)
-{
-  CHECK(!(m_isInitialized), "Unspecified error");
-  CHECK(!(seiAltTransCharacteristics!=NULL), "Unspecified error");
-  //  Set SEI message parameters read from command line options
-  seiAltTransCharacteristics->m_preferredTransferCharacteristics = m_pcCfg->getSEIPreferredTransferCharacteristics();
-}
-#endif
-
-void SEIEncoder::initSEIGreenMetadataInfo(SEIGreenMetadataInfo *seiGreenMetadataInfo, uint32_t u)
+void SEIEncoder::initSEISubpictureLevelInfo(SEISubpicureLevelInfo *sei, const SPS *sps)
 {
-    CHECK(!(m_isInitialized), "Unspecified error");
-    CHECK(!(seiGreenMetadataInfo!=NULL), "Unspecified error");
-
-    seiGreenMetadataInfo->m_greenMetadataType = m_pcCfg->getSEIGreenMetadataType();
-    seiGreenMetadataInfo->m_xsdMetricType = m_pcCfg->getSEIXSDMetricType();
-    seiGreenMetadataInfo->m_xsdMetricValue = u;
+  // subpicture level information should be specified via config file
+  // unfortunately the implementation of subpictures is still not available
+  // TODO: implement config file parameters and intialization
+  fprintf(stderr, "SEISubpicureLevelInfo depends on subpictures! Initializing to dummy values!\n");
+
+  sei->m_sliSeqParameterSetId = sps->getSPSId();
+  sei->m_numRefLevels = 2;
+  sei->m_refLevelIdc.resize(2);
+  sei->m_refLevelIdc[0] = Level::LEVEL4;
+  sei->m_refLevelIdc[1] = Level::LEVEL8_5;
+  sei->m_explicitFractionPresentFlag = false;
 }
 
 
diff --git a/source/Lib/EncoderLib/SEIEncoder.h b/source/Lib/EncoderLib/SEIEncoder.h
index b4058a56855213ff2ada2e3a9f26e02989420566..dcb9730f8aa1daea730d111b226447024a44a807 100644
--- a/source/Lib/EncoderLib/SEIEncoder.h
+++ b/source/Lib/EncoderLib/SEIEncoder.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -52,9 +52,11 @@ public:
     :m_pcCfg(NULL)
     ,m_pcEncLib(NULL)
     ,m_pcEncGOP(NULL)
+#if HEVC_SEI
     ,m_tl0Idx(0)
     ,m_rapIdx(0)
-    ,m_isInitialized(false)
+#endif
+  ,m_isInitialized(false)
   {};
   virtual ~SEIEncoder(){};
 
@@ -67,44 +69,40 @@ public:
   };
 
   // leading SEIs
-#if HEVC_VPS
-  void initSEIActiveParameterSets (SEIActiveParameterSets *sei, const VPS *vps, const SPS *sps);
-#else
-  void initSEIActiveParameterSets (SEIActiveParameterSets *sei, const SPS *sps);
-#endif
   void initSEIFramePacking(SEIFramePacking *sei, int currPicNum);
-  void initSEIDisplayOrientation(SEIDisplayOrientation *sei);
-  void initSEIToneMappingInfo(SEIToneMappingInfo *sei);
-  void initSEISOPDescription(SEISOPDescription *sei, Slice *slice, int picInGOP, int lastIdr, int currGOPSize);
-  void initSEIBufferingPeriod(SEIBufferingPeriod *sei, Slice *slice);
-  void initSEIScalableNesting(SEIScalableNesting *sei, SEIMessages &nestedSEIs);
-  void initSEIRecoveryPoint(SEIRecoveryPoint *sei, Slice *slice);
-  void initSEISegmentedRectFramePacking(SEISegmentedRectFramePacking *sei);
-#if HEVC_TILES_WPP
-  void initSEITempMotionConstrainedTileSets (SEITempMotionConstrainedTileSets *sei, const PPS *pps);
-#endif
-  void initSEIKneeFunctionInfo(SEIKneeFunctionInfo *sei);
-  void initSEIChromaResamplingFilterHint(SEIChromaResamplingFilterHint *sei, int iHorFilterIndex, int iVerFilterIndex);
-  void initSEITimeCode(SEITimeCode *sei);
-  bool initSEIColourRemappingInfo(SEIColourRemappingInfo *sei, int currPOC); // returns true on success, false on failure.
+  void initSEIDependentRAPIndication(SEIDependentRAPIndication *sei);
+  void initSEIBufferingPeriod(SEIBufferingPeriod *sei, bool noLeadingPictures);
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   void initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *sei);
 #endif
-
   // trailing SEIs
   void initDecodedPictureHashSEI(SEIDecodedPictureHash *sei, PelUnitBuf& pic, std::string &rHashString, const BitDepths &bitDepths);
+#if HEVC_SEI
   void initTemporalLevel0IndexSEI(SEITemporalLevel0Index *sei, Slice *slice);
   void initSEIGreenMetadataInfo(SEIGreenMetadataInfo *sei, uint32_t u);
-
+#endif
+  void initSEIErp(SEIEquirectangularProjection *sei);
+  void initSEISphereRotation(SEISphereRotation *sei);
+  void initSEIOmniViewport(SEIOmniViewport *sei);
+  void initSEIRegionWisePacking(SEIRegionWisePacking *sei);
+  void initSEIGcmp(SEIGeneralizedCubemapProjection *sei);
+  void initSEISubpictureLevelInfo(SEISubpicureLevelInfo *sei, const SPS *sps);
+  void initSEISampleAspectRatioInfo(SEISampleAspectRatioInfo *sei);
+  void initSEIFilmGrainCharacteristics(SEIFilmGrainCharacteristics *sei);
+  void initSEIMasteringDisplayColourVolume(SEIMasteringDisplayColourVolume *sei);
+  void initSEIContentLightLevel(SEIContentLightLevelInfo *sei);
+  void initSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment *sei);
+  void initSEIContentColourVolume(SEIContentColourVolume *sei);
 private:
   EncCfg* m_pcCfg;
   EncLib* m_pcEncLib;
   EncGOP* m_pcEncGOP;
 
+#if HEVC_SEI
   // for temporal level 0 index SEI
   uint32_t m_tl0Idx;
   uint32_t m_rapIdx;
-
+#endif
   bool m_isInitialized;
 };
 
diff --git a/source/Lib/EncoderLib/SEIwrite.cpp b/source/Lib/EncoderLib/SEIwrite.cpp
index f6d7c78adde76064015434952a8fa10074876164..67f80ebcd343b1ccc0213628b09f410f54269666 100644
--- a/source/Lib/EncoderLib/SEIwrite.cpp
+++ b/source/Lib/EncoderLib/SEIwrite.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,85 +41,85 @@
 //! \ingroup EncoderLib
 //! \{
 
-void SEIWriter::xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps)
+void SEIWriter::xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps, HRD &hrd, const uint32_t temporalId)
 {
+  const SEIBufferingPeriod *bp = NULL;
   switch (sei.payloadType())
   {
   case SEI::USER_DATA_UNREGISTERED:
     xWriteSEIuserDataUnregistered(*static_cast<const SEIuserDataUnregistered*>(&sei));
     break;
-  case SEI::ACTIVE_PARAMETER_SETS:
-    xWriteSEIActiveParameterSets(*static_cast<const SEIActiveParameterSets*>(& sei));
-    break;
   case SEI::DECODING_UNIT_INFO:
-    xWriteSEIDecodingUnitInfo(*static_cast<const SEIDecodingUnitInfo*>(& sei), sps);
+    bp = hrd.getBufferingPeriodSEI();
+    CHECK (bp == nullptr, "Buffering Period need to be initialized in HRD to allow writing of Decoding Unit Information SEI");
+    xWriteSEIDecodingUnitInfo(*static_cast<const SEIDecodingUnitInfo*>(& sei), *bp, temporalId);
     break;
   case SEI::DECODED_PICTURE_HASH:
     xWriteSEIDecodedPictureHash(*static_cast<const SEIDecodedPictureHash*>(&sei));
     break;
   case SEI::BUFFERING_PERIOD:
-    xWriteSEIBufferingPeriod(*static_cast<const SEIBufferingPeriod*>(&sei), sps);
+    xWriteSEIBufferingPeriod(*static_cast<const SEIBufferingPeriod*>(&sei));
+    hrd.setBufferingPeriodSEI(static_cast<const SEIBufferingPeriod*>(&sei));
     break;
   case SEI::PICTURE_TIMING:
-    xWriteSEIPictureTiming(*static_cast<const SEIPictureTiming*>(&sei), sps);
+    {
+      bp = hrd.getBufferingPeriodSEI();
+      CHECK (bp == nullptr, "Buffering Period need to be initialized in HRD to allow writing of Picture Timing SEI");
+      xWriteSEIPictureTiming(*static_cast<const SEIPictureTiming*>(&sei), *bp, temporalId);
+    }
+    break;
+  case SEI::FRAME_FIELD_INFO:
+    xWriteSEIFrameFieldInfo(*static_cast<const SEIFrameFieldInfo*>(&sei));
     break;
-  case SEI::RECOVERY_POINT:
-    xWriteSEIRecoveryPoint(*static_cast<const SEIRecoveryPoint*>(&sei));
+  case SEI::DEPENDENT_RAP_INDICATION:
+    xWriteSEIDependentRAPIndication(*static_cast<const SEIDependentRAPIndication*>(&sei));
     break;
   case SEI::FRAME_PACKING:
     xWriteSEIFramePacking(*static_cast<const SEIFramePacking*>(&sei));
     break;
-  case SEI::SEGM_RECT_FRAME_PACKING:
-    xWriteSEISegmentedRectFramePacking(*static_cast<const SEISegmentedRectFramePacking*>(&sei));
-    break;
-  case SEI::DISPLAY_ORIENTATION:
-    xWriteSEIDisplayOrientation(*static_cast<const SEIDisplayOrientation*>(&sei));
-    break;
-  case SEI::TEMPORAL_LEVEL0_INDEX:
-    xWriteSEITemporalLevel0Index(*static_cast<const SEITemporalLevel0Index*>(&sei));
+  case SEI::MASTERING_DISPLAY_COLOUR_VOLUME:
+    xWriteSEIMasteringDisplayColourVolume(*static_cast<const SEIMasteringDisplayColourVolume*>(&sei));
     break;
-  case SEI::REGION_REFRESH_INFO:
-    xWriteSEIGradualDecodingRefreshInfo(*static_cast<const SEIGradualDecodingRefreshInfo*>(&sei));
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+  case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS:
+    xWriteSEIAlternativeTransferCharacteristics(*static_cast<const SEIAlternativeTransferCharacteristics*>(&sei));
     break;
-  case SEI::NO_DISPLAY:
-    xWriteSEINoDisplay(*static_cast<const SEINoDisplay*>(&sei));
+#endif
+  case SEI::EQUIRECTANGULAR_PROJECTION:
+    xWriteSEIEquirectangularProjection(*static_cast<const SEIEquirectangularProjection*>(&sei));
     break;
-  case SEI::TONE_MAPPING_INFO:
-    xWriteSEIToneMappingInfo(*static_cast<const SEIToneMappingInfo*>(&sei));
+  case SEI::SPHERE_ROTATION:
+    xWriteSEISphereRotation(*static_cast<const SEISphereRotation*>(&sei));
     break;
-  case SEI::SOP_DESCRIPTION:
-    xWriteSEISOPDescription(*static_cast<const SEISOPDescription*>(&sei));
+  case SEI::OMNI_VIEWPORT:
+    xWriteSEIOmniViewport(*static_cast<const SEIOmniViewport*>(&sei));
     break;
-  case SEI::SCALABLE_NESTING:
-    xWriteSEIScalableNesting(bs, *static_cast<const SEIScalableNesting*>(&sei), sps);
+  case SEI::REGION_WISE_PACKING:
+    xWriteSEIRegionWisePacking(*static_cast<const SEIRegionWisePacking*>(&sei));
     break;
-  case SEI::CHROMA_RESAMPLING_FILTER_HINT:
-    xWriteSEIChromaResamplingFilterHint(*static_cast<const SEIChromaResamplingFilterHint*>(&sei));
+  case SEI::GENERALIZED_CUBEMAP_PROJECTION:
+    xWriteSEIGeneralizedCubemapProjection(*static_cast<const SEIGeneralizedCubemapProjection*>(&sei));
     break;
-#if HEVC_TILES_WPP
-  case SEI::TEMP_MOTION_CONSTRAINED_TILE_SETS:
-    xWriteSEITempMotionConstrainedTileSets(*static_cast<const SEITempMotionConstrainedTileSets*>(&sei));
+  case SEI::USER_DATA_REGISTERED_ITU_T_T35:
+    xWriteSEIUserDataRegistered(*static_cast<const SEIUserDataRegistered*>(&sei));
     break;
-#endif
-  case SEI::TIME_CODE:
-    xWriteSEITimeCode(*static_cast<const SEITimeCode*>(&sei));
+  case SEI::FILM_GRAIN_CHARACTERISTICS:
+    xWriteSEIFilmGrainCharacteristics(*static_cast<const SEIFilmGrainCharacteristics*>(&sei));
     break;
-  case SEI::KNEE_FUNCTION_INFO:
-    xWriteSEIKneeFunctionInfo(*static_cast<const SEIKneeFunctionInfo*>(&sei));
+  case SEI::CONTENT_LIGHT_LEVEL_INFO:
+    xWriteSEIContentLightLevelInfo(*static_cast<const SEIContentLightLevelInfo*>(&sei));
     break;
-  case SEI::COLOUR_REMAPPING_INFO:
-    xWriteSEIColourRemappingInfo(*static_cast<const SEIColourRemappingInfo*>(&sei));
+  case SEI::AMBIENT_VIEWING_ENVIRONMENT:
+    xWriteSEIAmbientViewingEnvironment(*static_cast<const SEIAmbientViewingEnvironment*>(&sei));
     break;
-  case SEI::MASTERING_DISPLAY_COLOUR_VOLUME:
-    xWriteSEIMasteringDisplayColourVolume(*static_cast<const SEIMasteringDisplayColourVolume*>(&sei));
+  case SEI::CONTENT_COLOUR_VOLUME:
+    xWriteSEIContentColourVolume(*static_cast<const SEIContentColourVolume*>(&sei));
     break;
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-  case SEI::ALTERNATIVE_TRANSFER_CHARACTERISTICS:
-    xWriteSEIAlternativeTransferCharacteristics(*static_cast<const SEIAlternativeTransferCharacteristics*>(&sei));
+  case SEI::SUBPICTURE_LEVEL_INFO:
+    xWriteSEISubpictureLevelInfo(*static_cast<const SEISubpicureLevelInfo*>(&sei), sps);
     break;
-#endif
-  case SEI::GREEN_METADATA:
-      xWriteSEIGreenMetadataInfo(*static_cast<const SEIGreenMetadataInfo*>(&sei));
+  case SEI::SAMPLE_ASPECT_RATIO_INFO:
+    xWriteSEISampleAspectRatioInfo(*static_cast<const SEISampleAspectRatioInfo*>(&sei));
     break;
   default:
     THROW("Trying to write unhandled SEI message");
@@ -131,7 +131,7 @@ void SEIWriter::xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const
 /**
  * marshal all SEI messages in provided list into one bitstream bs
  */
-void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, bool isNested)
+void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, HRD &hrd, bool isNested, const uint32_t temporalId)
 {
 #if ENABLE_TRACING
   if (g_HLSTraceEnable)
@@ -151,7 +151,7 @@ void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList
     bool traceEnable = g_HLSTraceEnable;
     g_HLSTraceEnable = false;
 #endif
-    xWriteSEIpayloadData(bs_count, **sei, sps);
+    xWriteSEIpayloadData(bs_count, **sei, sps, hrd, temporalId);
 #if ENABLE_TRACING
     g_HLSTraceEnable = traceEnable;
 #endif
@@ -179,7 +179,7 @@ void SEIWriter::writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList
       xTraceSEIMessageType((*sei)->payloadType());
 #endif
 
-    xWriteSEIpayloadData(bs, **sei, sps);
+    xWriteSEIpayloadData(bs_count, **sei, sps, hrd, temporalId);
   }
   if (!isNested)
   {
@@ -229,11 +229,9 @@ void SEIWriter::xWriteSEIDecodedPictureHash(const SEIDecodedPictureHash& sei)
   }
 }
 
+#if HEVC_SEI
 void SEIWriter::xWriteSEIActiveParameterSets(const SEIActiveParameterSets& sei)
 {
-#if HEVC_VPS
-  WRITE_CODE(sei.activeVPSId,     4,         "active_video_parameter_set_id");
-#endif
   WRITE_FLAG(sei.m_selfContainedCvsFlag,     "self_contained_cvs_flag");
   WRITE_FLAG(sei.m_noParameterSetUpdateFlag, "no_parameter_set_update_flag");
   WRITE_UVLC(sei.numSpsIdsMinus1,            "num_sps_ids_minus1");
@@ -242,107 +240,198 @@ void SEIWriter::xWriteSEIActiveParameterSets(const SEIActiveParameterSets& sei)
 
   for (int i = 0; i < sei.activeSeqParameterSetId.size(); i++)
   {
-    WRITE_UVLC(sei.activeSeqParameterSetId[i], "active_seq_parameter_set_id");
+    WRITE_CODE( sei.activeSeqParameterSetId[i], 4, "active_seq_parameter_set_id" );
   }
 }
+#endif
 
-void SEIWriter::xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SPS *sps)
+void SEIWriter::xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId)
 {
-  const VUI *vui = sps->getVuiParameters();
   WRITE_UVLC(sei.m_decodingUnitIdx, "decoding_unit_idx");
-  if(vui->getHrdParameters()->getSubPicCpbParamsInPicTimingSEIFlag())
+  if( !bp.m_decodingUnitCpbParamsInPicTimingSeiFlag )
   {
-    WRITE_CODE( sei.m_duSptCpbRemovalDelay, (vui->getHrdParameters()->getDuCpbRemovalDelayLengthMinus1() + 1), "du_spt_cpb_removal_delay_increment");
+    for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
+    {
+      WRITE_FLAG( sei.m_duiSubLayerDelaysPresentFlag[i], "dui_sub_layer_delays_present_flag[i]" );
+      if( sei.m_duiSubLayerDelaysPresentFlag[i] )
+        WRITE_CODE( sei.m_duSptCpbRemovalDelayIncrement[i], bp.getDuCpbRemovalDelayIncrementLength(), "du_spt_cpb_removal_delay_increment[i]");
+    }
   }
   WRITE_FLAG( sei.m_dpbOutputDuDelayPresentFlag, "dpb_output_du_delay_present_flag");
   if(sei.m_dpbOutputDuDelayPresentFlag)
   {
-    WRITE_CODE(sei.m_picSptDpbOutputDuDelay, vui->getHrdParameters()->getDpbOutputDelayDuLengthMinus1() + 1, "pic_spt_dpb_output_du_delay");
+    WRITE_CODE(sei.m_picSptDpbOutputDuDelay, bp.getDpbOutputDelayDuLength(), "pic_spt_dpb_output_du_delay");
   }
 }
 
-void SEIWriter::xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei, const SPS *sps)
+void SEIWriter::xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei)
 {
-  int i, nalOrVcl;
-  const VUI *vui = sps->getVuiParameters();
-  const HRD *hrd = vui->getHrdParameters();
+  WRITE_FLAG( sei.m_bpNalCpbParamsPresentFlag, "bp_nal_hrd_parameters_present_flag");
+  WRITE_FLAG( sei.m_bpVclCpbParamsPresentFlag, "bp_vcl_hrd_parameters_present_flag");
+  CHECK(!sei.m_bpNalCpbParamsPresentFlag && !sei.m_bpVclCpbParamsPresentFlag, "bp_nal_hrd_parameters_present_flag and/or bp_vcl_hrd_parameters_present_flag must be true");
+  CHECK (sei.m_initialCpbRemovalDelayLength < 1, "sei.m_initialCpbRemovalDelayLength must be > 0");
+  WRITE_CODE( sei.m_initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1" );
+  CHECK (sei.m_cpbRemovalDelayLength < 1, "sei.m_cpbRemovalDelayLength must be > 0");
+  WRITE_CODE( sei.m_cpbRemovalDelayLength - 1,        5, "cpb_removal_delay_length_minus1" );
+  CHECK (sei.m_dpbOutputDelayLength < 1, "sei.m_dpbOutputDelayLength must be > 0");
+  WRITE_CODE( sei.m_dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1" );
+  WRITE_FLAG(sei.m_altCpbParamsPresentFlag, "alt_cpb_params_present_flag");
+  WRITE_FLAG( sei.m_bpDecodingUnitHrdParamsPresentFlag, "bp_decoding_unit_hrd_params_present_flag"  );
+  if( sei.m_bpDecodingUnitHrdParamsPresentFlag )
+  {
+    CHECK (sei.m_duCpbRemovalDelayIncrementLength < 1, "sei.m_duCpbRemovalDelayIncrementLength must be > 0");
+    WRITE_CODE( sei.m_duCpbRemovalDelayIncrementLength - 1, 5, "du_cpb_removal_delay_increment_length_minus1" );
+    CHECK (sei.m_dpbOutputDelayDuLength < 1, "sei.m_dpbOutputDelayDuLength must be > 0");
+    WRITE_CODE( sei.m_dpbOutputDelayDuLength - 1, 5, "dpb_output_delay_du_length_minus1" );
+    WRITE_FLAG( sei.m_decodingUnitCpbParamsInPicTimingSeiFlag, "decoding_unit_cpb_params_in_pic_timing_sei_flag" );
+  }
 
-  WRITE_UVLC( sei.m_bpSeqParameterSetId, "bp_seq_parameter_set_id" );
-  if( !hrd->getSubPicCpbParamsPresentFlag() )
+  WRITE_FLAG( sei.m_concatenationFlag, "concatenation_flag");
+  WRITE_FLAG( sei.m_additionalConcatenationInfoPresentFlag, "additional_concatenation_info_present_flag");
+  if (sei.m_additionalConcatenationInfoPresentFlag)
   {
-    WRITE_FLAG( sei.m_rapCpbParamsPresentFlag, "irap_cpb_params_present_flag" );
+    WRITE_CODE( sei.m_maxInitialRemovalDelayForConcatenation, sei.m_initialCpbRemovalDelayLength, "max_initial_removal_delay_for_concatenation" );
   }
-  if( sei.m_rapCpbParamsPresentFlag )
+
+  CHECK (sei.m_auCpbRemovalDelayDelta < 1, "sei.m_auCpbRemovalDelayDelta must be > 0");
+  WRITE_CODE( sei.m_auCpbRemovalDelayDelta - 1, sei.m_cpbRemovalDelayLength, "au_cpb_removal_delay_delta_minus1" );
+
+  WRITE_FLAG( sei.m_cpbRemovalDelayDeltasPresentFlag, "cpb_removal_delay_deltas_present_flag");
+  if (sei.m_cpbRemovalDelayDeltasPresentFlag)
   {
-    WRITE_CODE( sei.m_cpbDelayOffset, hrd->getCpbRemovalDelayLengthMinus1() + 1, "cpb_delay_offset" );
-    WRITE_CODE( sei.m_dpbDelayOffset, hrd->getDpbOutputDelayLengthMinus1()  + 1, "dpb_delay_offset" );
+    CHECK (sei.m_numCpbRemovalDelayDeltas < 1, "m_numCpbRemovalDelayDeltas must be > 0");
+    WRITE_UVLC( sei.m_numCpbRemovalDelayDeltas - 1, "num_cpb_removal_delay_deltas_minus1" );
+    for( int i = 0; i < sei.m_numCpbRemovalDelayDeltas; i ++ )
+    {
+      WRITE_CODE( sei.m_cpbRemovalDelayDelta[i],        sei.m_cpbRemovalDelayLength, "cpb_removal_delay_delta[i]" );
+    }
+    CHECK (sei.m_bpMaxSubLayers < 1, "bp_max_sub_layers_minus1 must be > 0");
+    WRITE_CODE( sei.m_bpMaxSubLayers - 1,        3, "bp_max_sub_layers_minus1" );
   }
-  WRITE_FLAG( sei.m_concatenationFlag, "concatenation_flag");
-  WRITE_CODE( sei.m_auCpbRemovalDelayDelta - 1, ( hrd->getCpbRemovalDelayLengthMinus1() + 1 ), "au_cpb_removal_delay_delta_minus1" );
-  for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
+  CHECK (sei.m_bpCpbCnt < 1, "sei.m_bpCpbCnt must be > 0");
+  WRITE_UVLC( sei.m_bpCpbCnt - 1, "bp_cpb_cnt_minus1");
+  WRITE_FLAG(sei.m_sublayerInitialCpbRemovalDelayPresentFlag, "sublayer_initial_cpb_removal_delay_present_flag");
+  for (int i = (sei.m_sublayerInitialCpbRemovalDelayPresentFlag ? 0 : sei.m_bpMaxSubLayers - 1); i < sei.m_bpMaxSubLayers; i++)
   {
-    if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) ||
-        ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) )
+    for( int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
     {
-      for( i = 0; i < ( hrd->getCpbCntMinus1( 0 ) + 1 ); i ++ )
+      if( ( ( nalOrVcl == 0 ) && ( sei.m_bpNalCpbParamsPresentFlag ) ) ||
+         ( ( nalOrVcl == 1 ) && ( sei.m_bpVclCpbParamsPresentFlag ) ) )
       {
-        WRITE_CODE( sei.m_initialCpbRemovalDelay[i][nalOrVcl],( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ) ,           "initial_cpb_removal_delay" );
-        WRITE_CODE( sei.m_initialCpbRemovalDelayOffset[i][nalOrVcl],( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ),      "initial_cpb_removal_delay_offset" );
-        if( hrd->getSubPicCpbParamsPresentFlag() || sei.m_rapCpbParamsPresentFlag )
+        for( int j = 0; j < sei.m_bpCpbCnt; j ++ )
         {
-          WRITE_CODE( sei.m_initialAltCpbRemovalDelay[i][nalOrVcl], ( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ) ,     "initial_alt_cpb_removal_delay" );
-          WRITE_CODE( sei.m_initialAltCpbRemovalDelayOffset[i][nalOrVcl], ( hrd->getInitialCpbRemovalDelayLengthMinus1() + 1 ),"initial_alt_cpb_removal_delay_offset" );
+          WRITE_CODE( sei.m_initialCpbRemovalDelay[j][i][nalOrVcl],  sei.m_initialCpbRemovalDelayLength,           "initial_cpb_removal_delay[j][i][nalOrVcl]" );
+          WRITE_CODE( sei.m_initialCpbRemovalOffset[j][i][nalOrVcl], sei.m_initialCpbRemovalDelayLength,           "initial_cpb_removal_delay_offset[j][i][nalOrVcl]" );
         }
       }
     }
   }
+  if (sei.m_altCpbParamsPresentFlag)
+  {
+    WRITE_FLAG(sei.m_useAltCpbParamsFlag, "use_alt_cpb_params_flag");
+  }
+
 }
-void SEIWriter::xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SPS *sps)
-{
-  int i;
-  const VUI *vui = sps->getVuiParameters();
-  const HRD *hrd = vui->getHrdParameters();
 
-  if( vui->getFrameFieldInfoPresentFlag() )
+void SEIWriter::xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SEIBufferingPeriod &bp, const uint32_t temporalId)
+{
+  
+  WRITE_CODE( sei.m_auCpbRemovalDelay[bp.m_bpMaxSubLayers - 1] - 1, bp.m_cpbRemovalDelayLength,               "cpb_removal_delay_minus1[bp_max_sub_layers_minus1]" );
+  if( bp.m_altCpbParamsPresentFlag ) 
   {
-    WRITE_CODE( sei.m_picStruct, 4,              "pic_struct" );
-    WRITE_CODE( sei.m_sourceScanType, 2,         "source_scan_type" );
-    WRITE_FLAG( sei.m_duplicateFlag ? 1 : 0,     "duplicate_flag" );
+    WRITE_FLAG( sei.m_cpbAltTimingInfoPresentFlag, "cpb_alt_timing_info_present_flag" );
+    if( sei.m_cpbAltTimingInfoPresentFlag ) 
+    {
+      for( int i = 0; i < bp.m_bpCpbCnt; i++ ) 
+      {
+        WRITE_CODE( sei.m_cpbAltInitialCpbRemovalDelayDelta[i], bp.m_initialCpbRemovalDelayLength, "cpb_alt_initial_cpb_removal_delay_delta[ i ]" );
+        WRITE_CODE( sei.m_cpbAltInitialCpbRemovalOffsetDelta[i], bp.m_initialCpbRemovalDelayLength, "cpb_alt_initial_cpb_removal_offset_delta[ i ]" );
+      }
+      WRITE_CODE( sei.m_cpbDelayOffset, bp.m_initialCpbRemovalDelayLength, "cpb_delay_offset" );
+      WRITE_CODE( sei.m_dpbDelayOffset, bp.m_initialCpbRemovalDelayLength, "dpb_delay_offset" );
+    }
   }
-
-  if( hrd->getCpbDpbDelaysPresentFlag() )
+  for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
   {
-    WRITE_CODE( sei.m_auCpbRemovalDelay - 1, ( hrd->getCpbRemovalDelayLengthMinus1() + 1 ),                                         "au_cpb_removal_delay_minus1" );
-    WRITE_CODE( sei.m_picDpbOutputDelay, ( hrd->getDpbOutputDelayLengthMinus1() + 1 ),                                          "pic_dpb_output_delay" );
-    if(hrd->getSubPicCpbParamsPresentFlag())
+    WRITE_FLAG( sei.m_ptSubLayerDelaysPresentFlag[i], "pt_sub_layer_delays_present_flag[i]" );
+    if( sei.m_ptSubLayerDelaysPresentFlag[i] )
     {
-      WRITE_CODE(sei.m_picDpbOutputDuDelay, hrd->getDpbOutputDelayDuLengthMinus1()+1, "pic_dpb_output_du_delay" );
+      if (bp.m_cpbRemovalDelayDeltasPresentFlag)
+      {
+        WRITE_FLAG(sei.m_cpbRemovalDelayDeltaEnabledFlag[i], "cpb_removal_delay_delta_enabled_flag[i]");
+      }
+      if( sei.m_cpbRemovalDelayDeltaEnabledFlag[i] )
+      {
+        WRITE_CODE( sei.m_cpbRemovalDelayDeltaIdx[i], ceilLog2(bp.m_numCpbRemovalDelayDeltas),               "cpb_removal_delay_delta_idx[i]" );
+      }
+      else
+      {
+        WRITE_CODE( sei.m_auCpbRemovalDelay[i] - 1, bp.m_cpbRemovalDelayLength,                                "cpb_removal_delay_minus1[i]" );
+      }
     }
-    if( hrd->getSubPicCpbParamsPresentFlag() && hrd->getSubPicCpbParamsInPicTimingSEIFlag() )
+  }
+  WRITE_CODE( sei.m_picDpbOutputDelay,     bp.m_dpbOutputDelayLength,                                          "dpb_output_delay" );
+  if( bp.m_bpDecodingUnitHrdParamsPresentFlag )
+  {
+    WRITE_CODE( sei.m_picDpbOutputDuDelay, bp.m_dpbOutputDelayDuLength, "pic_dpb_output_du_delay" );
+  }
+  if( bp.m_bpDecodingUnitHrdParamsPresentFlag && bp.m_decodingUnitCpbParamsInPicTimingSeiFlag )
+  {
+    WRITE_UVLC( sei.m_numDecodingUnitsMinus1, "num_decoding_units_minus1" );
+    WRITE_FLAG( sei.m_duCommonCpbRemovalDelayFlag, "du_commmon_cpb_removal_delay_flag" );
+    if( sei.m_duCommonCpbRemovalDelayFlag )
     {
-      WRITE_UVLC( sei.m_numDecodingUnitsMinus1,     "num_decoding_units_minus1" );
-      WRITE_FLAG( sei.m_duCommonCpbRemovalDelayFlag, "du_common_cpb_removal_delay_flag" );
-      if( sei.m_duCommonCpbRemovalDelayFlag )
+      for( int i = temporalId; i < bp.m_bpMaxSubLayers - 1; i ++ )
       {
-        WRITE_CODE( sei.m_duCommonCpbRemovalDelayMinus1, ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ),                       "du_common_cpb_removal_delay_minus1" );
+        if( sei.m_ptSubLayerDelaysPresentFlag[i] )
+          WRITE_CODE( sei.m_duCommonCpbRemovalDelayMinus1[i], bp.m_duCpbRemovalDelayIncrementLength, "du_common_cpb_removal_delay_increment_minus1[i]" );
       }
-      for( i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ )
+    }
+    for( int i = 0; i <= sei.m_numDecodingUnitsMinus1; i ++ )
+    {
+      WRITE_UVLC( sei.m_numNalusInDuMinus1[i], "num_nalus_in_du_minus1[i]" );
+      if( !sei.m_duCommonCpbRemovalDelayFlag && i < sei.m_numDecodingUnitsMinus1 )
       {
-        WRITE_UVLC( sei.m_numNalusInDuMinus1[ i ],  "num_nalus_in_du_minus1");
-        if( ( !sei.m_duCommonCpbRemovalDelayFlag ) && ( i < sei.m_numDecodingUnitsMinus1 ) )
+        for( int j = temporalId; j < bp.m_bpMaxSubLayers - 1; j ++ )
         {
-          WRITE_CODE( sei.m_duCpbRemovalDelayMinus1[ i ], ( hrd->getDuCpbRemovalDelayLengthMinus1() + 1 ),                        "du_cpb_removal_delay_minus1" );
+          if( sei.m_ptSubLayerDelaysPresentFlag[j] )
+            WRITE_CODE( sei.m_duCpbRemovalDelayMinus1[i * bp.m_bpMaxSubLayers + j], bp.m_duCpbRemovalDelayIncrementLength, "du_cpb_removal_delay_increment_minus1[i][j]" );
         }
       }
     }
   }
 }
-void SEIWriter::xWriteSEIRecoveryPoint(const SEIRecoveryPoint& sei)
+
+void SEIWriter::xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei)
 {
-  WRITE_SVLC( sei.m_recoveryPocCnt,    "recovery_poc_cnt"    );
-  WRITE_FLAG( sei.m_exactMatchingFlag, "exact_matching_flag" );
-  WRITE_FLAG( sei.m_brokenLinkFlag,    "broken_link_flag"    );
+  WRITE_FLAG( sei.m_fieldPicFlag ? 1 : 0,                    "field_pic_flag" );
+  if (sei.m_fieldPicFlag)
+  {
+    WRITE_FLAG( sei.m_bottomFieldFlag ? 1 : 0,               "bottom_field_flag" );
+    WRITE_FLAG( sei.m_pairingIndicatedFlag ? 1 : 0,          "pairing_indicated_flag" );
+    if (sei.m_pairingIndicatedFlag)
+    {
+      WRITE_FLAG( sei.m_pairedWithNextFieldFlag ? 1 : 0,     "paired_with_next_field_flag" );
+    }
+  }
+  else
+  {
+    WRITE_FLAG( sei.m_displayFieldsFromFrameFlag ? 1 : 0,     "display_fields_from_frame_flag" );
+    if (sei.m_displayFieldsFromFrameFlag)
+    {
+      WRITE_FLAG( sei.m_topFieldFirstFlag ? 1 : 0,            "display_fields_from_frame_flag" );
+    }
+    WRITE_UVLC( sei.m_displayElementalPeriodsMinus1,          "display_elemental_periods_minus1" );
+  }
+  WRITE_CODE( sei.m_sourceScanType, 2,                        "source_scan_type" );
+  WRITE_FLAG( sei.m_duplicateFlag ? 1 : 0,                    "duplicate_flag" );
 }
+
+void SEIWriter::xWriteSEIDependentRAPIndication(const SEIDependentRAPIndication& /*sei*/)
+{
+  // intentionally empty
+}
+
 void SEIWriter::xWriteSEIFramePacking(const SEIFramePacking& sei)
 {
   WRITE_UVLC( sei.m_arrangementId,                  "frame_packing_arrangement_id" );
@@ -377,458 +466,322 @@ void SEIWriter::xWriteSEIFramePacking(const SEIFramePacking& sei)
   WRITE_FLAG( sei.m_upsampledAspectRatio,           "upsampled_aspect_ratio" );
 }
 
-void SEIWriter::xWriteSEISegmentedRectFramePacking(const SEISegmentedRectFramePacking& sei)
-{
-  WRITE_FLAG( sei.m_arrangementCancelFlag,          "segmented_rect_frame_packing_arrangement_cancel_flag" );
-  if( sei.m_arrangementCancelFlag == 0 )
-  {
-    WRITE_CODE( sei.m_contentInterpretationType, 2, "segmented_rect_content_interpretation_type" );
-    WRITE_FLAG( sei.m_arrangementPersistenceFlag,   "segmented_rect_frame_packing_arrangement_persistence" );
-  }
-}
 
-void SEIWriter::xWriteSEIToneMappingInfo(const SEIToneMappingInfo& sei)
+void SEIWriter::xWriteSEIMasteringDisplayColourVolume(const SEIMasteringDisplayColourVolume& sei)
 {
-  int i;
-  WRITE_UVLC( sei.m_toneMapId,                    "tone_map_id" );
-  WRITE_FLAG( sei.m_toneMapCancelFlag,            "tone_map_cancel_flag" );
-  if( !sei.m_toneMapCancelFlag )
-  {
-    WRITE_FLAG( sei.m_toneMapPersistenceFlag,     "tone_map_persistence_flag" );
-    WRITE_CODE( sei.m_codedDataBitDepth,    8,    "coded_data_bit_depth" );
-    WRITE_CODE( sei.m_targetBitDepth,       8,    "target_bit_depth" );
-    WRITE_UVLC( sei.m_modelId,                    "model_id" );
-    switch(sei.m_modelId)
-    {
-    case 0:
-      {
-        WRITE_CODE( sei.m_minValue,  32,        "min_value" );
-        WRITE_CODE( sei.m_maxValue, 32,         "max_value" );
-        break;
-      }
-    case 1:
-      {
-        WRITE_CODE( sei.m_sigmoidMidpoint, 32,  "sigmoid_midpoint" );
-        WRITE_CODE( sei.m_sigmoidWidth,    32,  "sigmoid_width"    );
-        break;
-      }
-    case 2:
-      {
-        uint32_t num = 1u << sei.m_targetBitDepth;
-        for(i = 0; i < num; i++)
-        {
-          WRITE_CODE( sei.m_startOfCodedInterval[i], (( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3,  "start_of_coded_interval" );
-        }
-        break;
-      }
-    case 3:
-      {
-        WRITE_CODE( sei.m_numPivots, 16,          "num_pivots" );
-        for(i = 0; i < sei.m_numPivots; i++ )
-        {
-          WRITE_CODE( sei.m_codedPivotValue[i], (( sei.m_codedDataBitDepth + 7 ) >> 3 ) << 3,       "coded_pivot_value" );
-          WRITE_CODE( sei.m_targetPivotValue[i], (( sei.m_targetBitDepth + 7 ) >> 3 ) << 3,         "target_pivot_value");
-        }
-        break;
-      }
-    case 4:
-      {
-        WRITE_CODE( sei.m_cameraIsoSpeedIdc,    8,    "camera_iso_speed_idc" );
-        if( sei.m_cameraIsoSpeedIdc == 255) //Extended_ISO
-        {
-          WRITE_CODE( sei.m_cameraIsoSpeedValue,    32,    "camera_iso_speed_value" );
-        }
-        WRITE_CODE( sei.m_exposureIndexIdc,     8,    "exposure_index_idc" );
-        if( sei.m_exposureIndexIdc == 255) //Extended_ISO
-        {
-          WRITE_CODE( sei.m_exposureIndexValue,     32,    "exposure_index_value" );
-        }
-        WRITE_FLAG( sei.m_exposureCompensationValueSignFlag,           "exposure_compensation_value_sign_flag" );
-        WRITE_CODE( sei.m_exposureCompensationValueNumerator,     16,  "exposure_compensation_value_numerator" );
-        WRITE_CODE( sei.m_exposureCompensationValueDenomIdc,      16,  "exposure_compensation_value_denom_idc" );
-        WRITE_CODE( sei.m_refScreenLuminanceWhite,                32,  "ref_screen_luminance_white" );
-        WRITE_CODE( sei.m_extendedRangeWhiteLevel,                32,  "extended_range_white_level" );
-        WRITE_CODE( sei.m_nominalBlackLevelLumaCodeValue,         16,  "nominal_black_level_luma_code_value" );
-        WRITE_CODE( sei.m_nominalWhiteLevelLumaCodeValue,         16,  "nominal_white_level_luma_code_value" );
-        WRITE_CODE( sei.m_extendedWhiteLevelLumaCodeValue,        16,  "extended_white_level_luma_code_value" );
-        break;
-      }
-    default:
-      {
-        THROW("Undefined SEIToneMapModelId");
-        break;
-      }
-    }//switch m_modelId
-  }//if(!sei.m_toneMapCancelFlag)
+  WRITE_CODE( sei.values.primaries[0][0],  16,  "display_primaries_x[0]" );
+  WRITE_CODE( sei.values.primaries[0][1],  16,  "display_primaries_y[0]" );
+
+  WRITE_CODE( sei.values.primaries[1][0],  16,  "display_primaries_x[1]" );
+  WRITE_CODE( sei.values.primaries[1][1],  16,  "display_primaries_y[1]" );
+
+  WRITE_CODE( sei.values.primaries[2][0],  16,  "display_primaries_x[2]" );
+  WRITE_CODE( sei.values.primaries[2][1],  16,  "display_primaries_y[2]" );
+
+  WRITE_CODE( sei.values.whitePoint[0],    16,  "white_point_x" );
+  WRITE_CODE( sei.values.whitePoint[1],    16,  "white_point_y" );
+
+  WRITE_CODE( sei.values.maxLuminance,     32,  "max_display_mastering_luminance" );
+  WRITE_CODE( sei.values.minLuminance,     32,  "min_display_mastering_luminance" );
 }
 
-void SEIWriter::xWriteSEIDisplayOrientation(const SEIDisplayOrientation &sei)
+void SEIWriter::xWriteByteAlign()
 {
-  WRITE_FLAG( sei.cancelFlag,           "display_orientation_cancel_flag" );
-  if( !sei.cancelFlag )
+  if( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0)
   {
-    WRITE_FLAG( sei.horFlip,                   "hor_flip" );
-    WRITE_FLAG( sei.verFlip,                   "ver_flip" );
-    WRITE_CODE( sei.anticlockwiseRotation, 16, "anticlockwise_rotation" );
-    WRITE_FLAG( sei.persistenceFlag,          "display_orientation_persistence_flag" );
+    WRITE_FLAG( 1, "payload_bit_equal_to_one" );
+    while( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0 )
+    {
+      WRITE_FLAG( 0, "payload_bit_equal_to_zero" );
+    }
   }
 }
 
-void SEIWriter::xWriteSEITemporalLevel0Index(const SEITemporalLevel0Index &sei)
+#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
+void SEIWriter::xWriteSEIAlternativeTransferCharacteristics(const SEIAlternativeTransferCharacteristics& sei)
 {
-  WRITE_CODE( sei.tl0Idx, 8 , "tl0_idx" );
-  WRITE_CODE( sei.rapIdx, 8 , "rap_idx" );
+  WRITE_CODE(sei.m_preferredTransferCharacteristics, 8, "preferred_transfer_characteristics");
 }
+#endif
 
-void SEIWriter::xWriteSEIGradualDecodingRefreshInfo(const SEIGradualDecodingRefreshInfo &sei)
+void SEIWriter::xWriteSEIEquirectangularProjection(const SEIEquirectangularProjection &sei)
 {
-  WRITE_FLAG( sei.m_gdrForegroundFlag, "gdr_foreground_flag");
+  WRITE_FLAG( sei.m_erpCancelFlag, "erp_cancel_flag" );
+  if( !sei.m_erpCancelFlag )
+  {
+    WRITE_FLAG( sei.m_erpPersistenceFlag, "erp_persistence_flag" );
+    WRITE_FLAG( sei.m_erpGuardBandFlag,   "erp_guard_band_flag" );
+    WRITE_CODE( 0, 2, "erp_reserved_zero_2bits" );
+    if ( sei.m_erpGuardBandFlag == 1)
+    {
+      WRITE_CODE( sei.m_erpGuardBandType,       3, "erp_guard_band_type" );  
+      WRITE_CODE( sei.m_erpLeftGuardBandWidth,  8, "erp_left_guard_band_width" );  
+      WRITE_CODE( sei.m_erpRightGuardBandWidth, 8, "erp_right_guard_band_width" );  
+    }
+  }
 }
 
-void SEIWriter::xWriteSEINoDisplay(const SEINoDisplay& /*sei*/)
+void SEIWriter::xWriteSEISphereRotation(const SEISphereRotation &sei)
 {
+  WRITE_FLAG( sei.m_sphereRotationCancelFlag, "sphere_rotation_cancel_flag" );
+  if( !sei.m_sphereRotationCancelFlag )
+  {
+    WRITE_FLAG( sei.m_sphereRotationPersistenceFlag,    "sphere_rotation_persistence_flag" );
+    WRITE_CODE( 0,                                   6, "sphere_rotation_reserved_zero_6bits" );
+    WRITE_SCODE(sei.m_sphereRotationYaw,            32, "sphere_rotation_yaw" );  
+    WRITE_SCODE(sei.m_sphereRotationPitch,          32, "sphere_rotation_pitch" );  
+    WRITE_SCODE(sei.m_sphereRotationRoll,           32, "sphere_rotation_roll" );  
+  }
 }
 
-void SEIWriter::xWriteSEISOPDescription(const SEISOPDescription& sei)
+void SEIWriter::xWriteSEIOmniViewport(const SEIOmniViewport &sei)
 {
-  WRITE_UVLC( sei.m_sopSeqParameterSetId,           "sop_seq_parameter_set_id"               );
-  WRITE_UVLC( sei.m_numPicsInSopMinus1,             "num_pics_in_sop_minus1"               );
-  for (uint32_t i = 0; i <= sei.m_numPicsInSopMinus1; i++)
+  WRITE_CODE( sei.m_omniViewportId,     10,    "omni_viewport_id" );
+  WRITE_FLAG( sei.m_omniViewportCancelFlag, "omni_viewport_cancel_flag" );
+  if ( !sei.m_omniViewportCancelFlag )
   {
-    WRITE_CODE( sei.m_sopDescVclNaluType[i], 6, "sop_desc_vcl_nalu_type" );
-    WRITE_CODE( sei.m_sopDescTemporalId[i],  3, "sop_desc_temporal_id" );
-    if (sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_W_RADL && sei.m_sopDescVclNaluType[i] != NAL_UNIT_CODED_SLICE_IDR_N_LP)
-    {
-      WRITE_UVLC( sei.m_sopDescStRpsIdx[i],           "sop_desc_st_rps_idx"               );
-    }
-    if (i > 0)
+    WRITE_FLAG( sei.m_omniViewportPersistenceFlag, "omni_viewport_persistence_flag" );
+    const uint32_t numRegions = (uint32_t) sei.m_omniViewportRegions.size();
+    WRITE_CODE( numRegions - 1, 4, "omni_viewport_cnt_minus1" );
+    for(uint32_t region=0; region<numRegions; region++)
     {
-      WRITE_SVLC( sei.m_sopDescPocDelta[i],           "sop_desc_poc_delta"               );
+      const SEIOmniViewport::OmniViewport &viewport=sei.m_omniViewportRegions[region];
+      WRITE_SCODE( viewport.azimuthCentre,     32,  "omni_viewport_azimuth_centre"   );  
+      WRITE_SCODE( viewport.elevationCentre,   32,  "omni_viewport_elevation_centre" );  
+      WRITE_SCODE( viewport.tiltCentre,        32,  "omni_viewport_tilt_center" );  
+      WRITE_CODE( viewport.horRange,           32, "omni_viewport_hor_range[i]" );
+      WRITE_CODE( viewport.verRange,           32, "omni_viewport_ver_range[i]" );
     }
   }
 }
 
-void SEIWriter::xWriteSEIScalableNesting(OutputBitstream& bs, const SEIScalableNesting& sei, const SPS *sps)
+void SEIWriter::xWriteSEIRegionWisePacking(const SEIRegionWisePacking &sei)
 {
-  WRITE_FLAG( sei.m_bitStreamSubsetFlag,             "bitstream_subset_flag"         );
-  WRITE_FLAG( sei.m_nestingOpFlag,                   "nesting_op_flag      "         );
-  if (sei.m_nestingOpFlag)
-  {
-    WRITE_FLAG( sei.m_defaultOpFlag,                 "default_op_flag"               );
-    WRITE_UVLC( sei.m_nestingNumOpsMinus1,           "nesting_num_ops_minus1"        );
-    for (uint32_t i = (sei.m_defaultOpFlag ? 1 : 0); i <= sei.m_nestingNumOpsMinus1; i++)
-    {
-      WRITE_CODE( sei.m_nestingMaxTemporalIdPlus1[i], 3,  "nesting_max_temporal_id_plus1" );
-      WRITE_UVLC( sei.m_nestingOpIdx[i],                  "nesting_op_idx"                );
-    }
-  }
-  else
+  WRITE_FLAG( sei.m_rwpCancelFlag,                                           "rwp_cancel_flag" );
+  if(!sei.m_rwpCancelFlag)
   {
-    WRITE_FLAG( sei.m_allLayersFlag,                      "all_layers_flag"               );
-    if (!sei.m_allLayersFlag)
-    {
-      WRITE_CODE( sei.m_nestingNoOpMaxTemporalIdPlus1, 3, "nesting_no_op_max_temporal_id_plus1" );
-      WRITE_UVLC( sei.m_nestingNumLayersMinus1,           "nesting_num_layers"                  );
-      for (uint32_t i = 0; i <= sei.m_nestingNumLayersMinus1; i++)
+    WRITE_FLAG( sei.m_rwpPersistenceFlag,                                    "rwp_persistence_flag" );
+    WRITE_FLAG( sei.m_constituentPictureMatchingFlag,                        "constituent_picture_matching_flag" );
+    WRITE_CODE( 0, 5,                                                        "rwp_reserved_zero_5bits" );
+    WRITE_CODE( (uint32_t)sei.m_numPackedRegions,                 8,             "num_packed_regions" );
+    WRITE_CODE( (uint32_t)sei.m_projPictureWidth,                 32,            "proj_picture_width" );
+    WRITE_CODE( (uint32_t)sei.m_projPictureHeight,                32,            "proj_picture_height" );
+    WRITE_CODE( (uint32_t)sei.m_packedPictureWidth,               16,            "packed_picture_width" );
+    WRITE_CODE( (uint32_t)sei.m_packedPictureHeight,              16,            "packed_picture_height" );
+    for( int i=0; i < sei.m_numPackedRegions; i++ )
+    { 
+      WRITE_CODE( 0, 4,                                                      "rwp_reserved_zero_4bits" );
+      WRITE_CODE( (uint32_t)sei.m_rwpTransformType[i],            3,             "rwp_tTransform_type" );
+      WRITE_FLAG( sei.m_rwpGuardBandFlag[i],                                 "rwp_guard_band_flag" );
+      WRITE_CODE( (uint32_t)sei.m_projRegionWidth[i],             32,            "proj_region_width" );
+      WRITE_CODE( (uint32_t)sei.m_projRegionHeight[i],            32,            "proj_region_height" );
+      WRITE_CODE( (uint32_t)sei.m_rwpProjRegionTop[i],            32,            "rwp_proj_regionTop" );
+      WRITE_CODE( (uint32_t)sei.m_projRegionLeft[i],              32,            "proj_region_left" );
+      WRITE_CODE( (uint32_t)sei.m_packedRegionWidth[i],           16,            "packed_region_width" );
+      WRITE_CODE( (uint32_t)sei.m_packedRegionHeight[i],          16,            "packed_region_height" );
+      WRITE_CODE( (uint32_t)sei.m_packedRegionTop[i],             16,            "packed_region_top" );
+      WRITE_CODE( (uint32_t)sei.m_packedRegionLeft[i],            16,            "packed_region_left" );
+      if( sei.m_rwpGuardBandFlag[i] )
       {
-        WRITE_CODE( sei.m_nestingLayerId[i], 6,           "nesting_layer_id"              );
+        WRITE_CODE( (uint32_t)sei.m_rwpLeftGuardBandWidth[i],     8,             "rwp_left_guard_band_width");
+        WRITE_CODE( (uint32_t)sei.m_rwpRightGuardBandWidth[i],    8,             "rwp_right_guard_band_width");
+        WRITE_CODE( (uint32_t)sei.m_rwpTopGuardBandHeight[i],     8,             "rwp_top_guard_band_height");
+        WRITE_CODE( (uint32_t)sei. m_rwpBottomGuardBandHeight[i], 8,             "rwp_bottom_guard_band_height");
+        WRITE_FLAG( sei.m_rwpGuardBandNotUsedForPredFlag[i],                 "rwp_guard_band_not_used_forPred_flag" );
+        for( int j=0; j < 4; j++ )
+        {
+          WRITE_CODE( (uint32_t)sei.m_rwpGuardBandType[i*4 + j],  3,             "rwp_guard_band_type");
+        }
+        WRITE_CODE( 0, 3,                                                    "rwp_guard_band_reserved_zero_3bits" );
       }
     }
   }
-
-  // byte alignment
-  while ( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0 )
-  {
-    WRITE_FLAG( 0, "nesting_zero_bit" );
-  }
-
-  // write nested SEI messages
-  writeSEImessages(bs, sei.m_nestedSEIs, sps, true);
 }
 
-#if HEVC_TILES_WPP
-void SEIWriter::xWriteSEITempMotionConstrainedTileSets(const SEITempMotionConstrainedTileSets& sei)
+void SEIWriter::xWriteSEIGeneralizedCubemapProjection(const SEIGeneralizedCubemapProjection &sei)
 {
-  //uint32_t code;
-  WRITE_FLAG((sei.m_mc_all_tiles_exact_sample_value_match_flag ? 1 : 0), "mc_all_tiles_exact_sample_value_match_flag");
-  WRITE_FLAG((sei.m_each_tile_one_tile_set_flag                ? 1 : 0), "each_tile_one_tile_set_flag"               );
-
-  if(!sei.m_each_tile_one_tile_set_flag)
+  WRITE_FLAG( sei.m_gcmpCancelFlag,                           "gcmp_cancel_flag" );
+  if (!sei.m_gcmpCancelFlag)
   {
-    WRITE_FLAG((sei.m_limited_tile_set_display_flag ? 1 : 0), "limited_tile_set_display_flag");
-    WRITE_UVLC((sei.getNumberOfTileSets() - 1),               "num_sets_in_message_minus1"   );
-
-    if(sei.getNumberOfTileSets() > 0)
+    WRITE_FLAG( sei.m_gcmpPersistenceFlag,                    "gcmp_persistence_flag" );
+    WRITE_CODE( sei.m_gcmpPackingType,                     3, "gcmp_packing_type" );  
+    WRITE_CODE( sei.m_gcmpMappingFunctionType,             2, "gcmp_mapping_function_type" );
+    int numFace = sei.m_gcmpPackingType == 4 || sei.m_gcmpPackingType == 5 ? 5 : 6;
+    for (int i = 0; i < numFace; i++)
     {
-      for(int i = 0; i < sei.getNumberOfTileSets(); i++)
+      WRITE_CODE( sei.m_gcmpFaceIndex[i],                  3, "gcmp_face_index" );  
+      WRITE_CODE( sei.m_gcmpFaceRotation[i],               2, "gcmp_face_rotation" );  
+      if (sei.m_gcmpMappingFunctionType == 2)
       {
-        WRITE_UVLC(sei.tileSetData(i).m_mcts_id, "mcts_id");
-
-        if(sei.m_limited_tile_set_display_flag)
-        {
-          WRITE_FLAG((sei.tileSetData(i).m_display_tile_set_flag ? 1 : 0), "display_tile_set_flag");
-        }
-
-        WRITE_UVLC((sei.tileSetData(i).getNumberOfTileRects() - 1), "num_tile_rects_in_set_minus1");
-
-        for(int j = 0; j < sei.tileSetData(i).getNumberOfTileRects(); j++)
-        {
-          WRITE_UVLC(sei.tileSetData(i).topLeftTileIndex    (j), "top_left_tile_index");
-          WRITE_UVLC(sei.tileSetData(i).bottomRightTileIndex(j), "bottom_right_tile_index");
-        }
-
-        if(!sei.m_mc_all_tiles_exact_sample_value_match_flag)
-        {
-          WRITE_FLAG((sei.tileSetData(i).m_exact_sample_value_match_flag ? 1 : 0), "exact_sample_value_match_flag");
-        }
-
-        WRITE_FLAG((sei.tileSetData(i).m_mcts_tier_level_idc_present_flag ? 1 : 0), "mcts_tier_level_idc_present_flag");
-
-        if(sei.tileSetData(i).m_mcts_tier_level_idc_present_flag)
-        {
-          WRITE_FLAG((sei.tileSetData(i).m_mcts_tier_flag ? 1 : 0), "mcts_tier_flag");
-          WRITE_CODE( sei.tileSetData(i).m_mcts_level_idc, 8,       "mcts_level_idc");
-        }
+        WRITE_CODE( sei.m_gcmpFunctionCoeffU[i],           7, "gcmp_function_coeff_u" );  
+        WRITE_FLAG( sei.m_gcmpFunctionUAffectedByVFlag[i],    "gcmp_function_u_affected_by_v_flag" );
+        WRITE_CODE( sei.m_gcmpFunctionCoeffV[i],           7, "gcmp_function_coeff_v" );  
+        WRITE_FLAG( sei.m_gcmpFunctionVAffectedByUFlag[i],    "gcmp_function_v_affected_by_u_flag" );
       }
     }
-  }
-  else
-  {
-    WRITE_FLAG((sei.m_max_mcs_tier_level_idc_present_flag ? 1 : 0), "max_mcs_tier_level_idc_present_flag");
-
-    if(sei.m_max_mcs_tier_level_idc_present_flag)
+    WRITE_FLAG( sei.m_gcmpGuardBandFlag,                      "gcmp_guard_band_flag" );
+    if (sei.m_gcmpGuardBandFlag)
     {
-      WRITE_FLAG((sei.m_max_mcts_tier_flag ? 1 : 0), "max_mcts_tier_flag");
-      WRITE_CODE( sei.m_max_mcts_level_idc, 8,       "max_mcts_level_idc");
+      WRITE_FLAG( sei.m_gcmpGuardBandBoundaryType,            "gcmp_guard_band_boundary_type" );  
+      WRITE_CODE( sei.m_gcmpGuardBandSamplesMinus1,        4, "gcmp_guard_band_samples_minus1" );  
     }
   }
 }
-#endif
 
-void SEIWriter::xWriteSEITimeCode(const SEITimeCode& sei)
+void SEIWriter::xWriteSEISubpictureLevelInfo(const SEISubpicureLevelInfo &sei, const SPS* sps)
 {
-  WRITE_CODE(sei.numClockTs, 2, "num_clock_ts");
-  for(int i = 0; i < sei.numClockTs; i++)
+  WRITE_CODE( (uint32_t)sei.m_sliSeqParameterSetId, 4,                        "sli_seq_parameter_set_id");
+  CHECK(sei.m_numRefLevels < 1, "SEISubpicureLevelInfo: numRefLevels must be greater than zero");
+  CHECK(sei.m_numRefLevels != (int)sei.m_refLevelIdc.size(), "SEISubpicureLevelInfo: numRefLevels must be equal to the number of levels");
+  if (sei.m_explicitFractionPresentFlag)
+  {
+    CHECK(sei.m_numRefLevels != (int)sei.m_refLevelFraction.size(), "SEISubpicureLevelInfo: numRefLevels must be equal to the number of fractions");
+  }
+  WRITE_CODE( (uint32_t)sei.m_numRefLevels - 1, 3,                            "num_ref_levels_minus1");
+  WRITE_FLAG(           sei.m_explicitFractionPresentFlag,                    "explicit_fraction_present_flag");
+
+  for (int i=0; i<sei.m_numRefLevels; i++)
   {
-    const SEITimeSet &currentTimeSet = sei.timeSetArray[i];
-    WRITE_FLAG(currentTimeSet.clockTimeStampFlag, "clock_time_stamp_flag");
-    if(currentTimeSet.clockTimeStampFlag)
+    WRITE_CODE( (uint32_t)sei.m_refLevelIdc[i], 8,                            "ref_level_idc[i]");
+    if (sei.m_explicitFractionPresentFlag)
     {
-      WRITE_FLAG(currentTimeSet.numUnitFieldBasedFlag, "units_field_based_flag");
-      WRITE_CODE(currentTimeSet.countingType, 5, "counting_type");
-      WRITE_FLAG(currentTimeSet.fullTimeStampFlag, "full_timestamp_flag");
-      WRITE_FLAG(currentTimeSet.discontinuityFlag, "discontinuity_flag");
-      WRITE_FLAG(currentTimeSet.cntDroppedFlag, "cnt_dropped_flag");
-      WRITE_CODE(currentTimeSet.numberOfFrames, 9, "n_frames");
-      if(currentTimeSet.fullTimeStampFlag)
-      {
-        WRITE_CODE(currentTimeSet.secondsValue, 6, "seconds_value");
-        WRITE_CODE(currentTimeSet.minutesValue, 6, "minutes_value");
-        WRITE_CODE(currentTimeSet.hoursValue, 5, "hours_value");
-      }
-      else
+      CHECK(sps->getNumSubPics() != (int)sei.m_refLevelFraction[i].size(),    "SEISubpicureLevelInfo: number of fractions differs from number of subpictures");
+      for (int j = 0; j < sps->getNumSubPics(); j++)
       {
-        WRITE_FLAG(currentTimeSet.secondsFlag, "seconds_flag");
-        if(currentTimeSet.secondsFlag)
-        {
-          WRITE_CODE(currentTimeSet.secondsValue, 6, "seconds_value");
-          WRITE_FLAG(currentTimeSet.minutesFlag, "minutes_flag");
-          if(currentTimeSet.minutesFlag)
-          {
-            WRITE_CODE(currentTimeSet.minutesValue, 6, "minutes_value");
-            WRITE_FLAG(currentTimeSet.hoursFlag, "hours_flag");
-            if(currentTimeSet.hoursFlag)
-            {
-              WRITE_CODE(currentTimeSet.hoursValue, 5, "hours_value");
-            }
-          }
-        }
-      }
-      WRITE_CODE(currentTimeSet.timeOffsetLength, 5, "time_offset_length");
-      if(currentTimeSet.timeOffsetLength > 0)
-      {
-        if(currentTimeSet.timeOffsetValue >= 0)
-        {
-          WRITE_CODE((uint32_t)currentTimeSet.timeOffsetValue, currentTimeSet.timeOffsetLength, "time_offset_value");
-        }
-        else
-        {
-          //  Two's complement conversion
-          uint32_t offsetValue = ~(currentTimeSet.timeOffsetValue) + 1;
-          offsetValue |= (1 << (currentTimeSet.timeOffsetLength-1));
-          WRITE_CODE(offsetValue, currentTimeSet.timeOffsetLength, "time_offset_value");
-        }
+        WRITE_CODE( (uint32_t)sei.m_refLevelFraction[i][j], 8,                "ref_level_fraction_minus1[i][j]");
       }
     }
   }
 }
 
-void SEIWriter::xWriteSEIChromaResamplingFilterHint(const SEIChromaResamplingFilterHint &sei)
+void SEIWriter::xWriteSEISampleAspectRatioInfo(const SEISampleAspectRatioInfo &sei)
 {
-  WRITE_CODE(sei.m_verChromaFilterIdc, 8, "ver_chroma_filter_idc");
-  WRITE_CODE(sei.m_horChromaFilterIdc, 8, "hor_chroma_filter_idc");
-  WRITE_FLAG(sei.m_verFilteringFieldProcessingFlag, "ver_filtering_field_processing_flag");
-  if(sei.m_verChromaFilterIdc == 1 || sei.m_horChromaFilterIdc == 1)
+  WRITE_FLAG( sei.m_sariCancelFlag,                                           "sari_cancel_flag" );
+  if(!sei.m_sariCancelFlag)
   {
-    WRITE_UVLC(sei.m_targetFormatIdc, "target_format_idc");
-    if(sei.m_verChromaFilterIdc == 1)
-    {
-      const int numVerticalFilter = (int)sei.m_verFilterCoeff.size();
-      WRITE_UVLC(numVerticalFilter, "num_vertical_filters");
-      if(numVerticalFilter > 0)
-      {
-        for(int i = 0; i < numVerticalFilter; i ++)
-        {
-          const int verTapLengthMinus1 = (int) sei.m_verFilterCoeff[i].size() - 1;
-          WRITE_UVLC(verTapLengthMinus1, "ver_tap_length_minus_1");
-          for(int j = 0; j < (verTapLengthMinus1 + 1); j ++)
-          {
-            WRITE_SVLC(sei.m_verFilterCoeff[i][j], "ver_filter_coeff");
-          }
-        }
-      }
-    }
-    if(sei.m_horChromaFilterIdc == 1)
+    WRITE_FLAG( sei.m_sariPersistenceFlag,                                    "sari_persistence_flag" );
+    WRITE_CODE( (uint32_t)sei.m_sariAspectRatioIdc, 8,                        "sari_aspect_ratio_idc");
+    if (sei.m_sariAspectRatioIdc == 255)
     {
-      const int numHorizontalFilter = (int) sei.m_horFilterCoeff.size();
-      WRITE_UVLC(numHorizontalFilter, "num_horizontal_filters");
-      if(numHorizontalFilter > 0)
-      {
-        for(int i = 0; i < numHorizontalFilter; i ++)
-        {
-          const int horTapLengthMinus1 = (int) sei.m_horFilterCoeff[i].size() - 1;
-          WRITE_UVLC(horTapLengthMinus1, "hor_tap_length_minus_1");
-          for(int j = 0; j < (horTapLengthMinus1 + 1); j ++)
-          {
-            WRITE_SVLC(sei.m_horFilterCoeff[i][j], "hor_filter_coeff");
-          }
-        }
-      }
+      WRITE_CODE( (uint32_t)sei.m_sariSarWidth, 16,                           "sari_sar_width");
+      WRITE_CODE( (uint32_t)sei.m_sariSarHeight, 16,                           "sari_sar_height");
     }
   }
 }
 
-void SEIWriter::xWriteSEIKneeFunctionInfo(const SEIKneeFunctionInfo &sei)
+void SEIWriter::xWriteSEIUserDataRegistered(const SEIUserDataRegistered &sei)
 {
-  WRITE_UVLC( sei.m_kneeId, "knee_function_id" );
-  WRITE_FLAG( sei.m_kneeCancelFlag, "knee_function_cancel_flag" );
-  if ( !sei.m_kneeCancelFlag )
-  {
-    WRITE_FLAG( sei.m_kneePersistenceFlag, "knee_function_persistence_flag" );
-    WRITE_CODE( (uint32_t)sei.m_kneeInputDrange , 32,  "input_d_range" );
-    WRITE_CODE( (uint32_t)sei.m_kneeInputDispLuminance, 32,  "input_disp_luminance" );
-    WRITE_CODE( (uint32_t)sei.m_kneeOutputDrange, 32,  "output_d_range" );
-    WRITE_CODE( (uint32_t)sei.m_kneeOutputDispLuminance, 32,  "output_disp_luminance" );
-    WRITE_UVLC( sei.m_kneeNumKneePointsMinus1, "num_knee_points_minus1" );
-    for(int i = 0; i <= sei.m_kneeNumKneePointsMinus1; i++ )
-    {
-      WRITE_CODE( (uint32_t)sei.m_kneeInputKneePoint[i], 10,"input_knee_point" );
-      WRITE_CODE( (uint32_t)sei.m_kneeOutputKneePoint[i], 10, "output_knee_point" );
-    }
+  WRITE_CODE((sei.m_ituCountryCode>255) ? 0xff : sei.m_ituCountryCode, 8, "itu_t_t35_country_code");
+  if (sei.m_ituCountryCode >= 255)
+  {
+    assert(sei.m_ituCountryCode < 255 + 256);
+    WRITE_CODE(sei.m_ituCountryCode - 255, 8, "itu_t_t35_country_code_extension_byte");
+  }
+  for (uint32_t i = 0; i<sei.m_userData.size(); i++)
+  {
+    WRITE_CODE(sei.m_userData[i], 8, "itu_t_t35_payload_byte");
   }
 }
 
-void SEIWriter::xWriteSEIColourRemappingInfo(const SEIColourRemappingInfo& sei)
+void SEIWriter::xWriteSEIFilmGrainCharacteristics(const SEIFilmGrainCharacteristics &sei)
 {
-  WRITE_UVLC( sei.m_colourRemapId,                             "colour_remap_id" );
-  WRITE_FLAG( sei.m_colourRemapCancelFlag,                     "colour_remap_cancel_flag" );
-  if( !sei.m_colourRemapCancelFlag )
+  WRITE_FLAG(sei.m_filmGrainCharacteristicsCancelFlag, "film_grain_characteristics_cancel_flag");
+  if (!sei.m_filmGrainCharacteristicsCancelFlag)
   {
-    WRITE_FLAG( sei.m_colourRemapPersistenceFlag,              "colour_remap_persistence_flag" );
-    WRITE_FLAG( sei.m_colourRemapVideoSignalInfoPresentFlag,   "colour_remap_video_signal_info_present_flag" );
-    if ( sei.m_colourRemapVideoSignalInfoPresentFlag )
-    {
-      WRITE_FLAG( sei.m_colourRemapFullRangeFlag,              "colour_remap_full_range_flag" );
-      WRITE_CODE( sei.m_colourRemapPrimaries,               8, "colour_remap_primaries" );
-      WRITE_CODE( sei.m_colourRemapTransferFunction,        8, "colour_remap_transfer_function" );
-      WRITE_CODE( sei.m_colourRemapMatrixCoefficients,      8, "colour_remap_matrix_coefficients" );
-    }
-    WRITE_CODE( sei.m_colourRemapInputBitDepth,             8, "colour_remap_input_bit_depth" );
-    WRITE_CODE( sei.m_colourRemapBitDepth,                  8, "colour_remap_bit_depth" );
-    for( int c=0 ; c<3 ; c++ )
+    WRITE_CODE(sei.m_filmGrainModelId, 2, "film_grain_model_id");
+    WRITE_FLAG(sei.m_separateColourDescriptionPresentFlag, "separate_colour_description_present_flag");
+    if (sei.m_separateColourDescriptionPresentFlag)
     {
-      WRITE_CODE( sei.m_preLutNumValMinus1[c],              8, "pre_lut_num_val_minus1[c]" );
-      if( sei.m_preLutNumValMinus1[c]>0 )
-      {
-        for( int i=0 ; i<=sei.m_preLutNumValMinus1[c] ; i++ )
-        {
-          WRITE_CODE( sei.m_preLut[c][i].codedValue,  (( sei.m_colourRemapInputBitDepth + 7 ) >> 3 ) << 3, "pre_lut_coded_value[c][i]" );
-          WRITE_CODE( sei.m_preLut[c][i].targetValue, (( sei.m_colourRemapBitDepth      + 7 ) >> 3 ) << 3, "pre_lut_target_value[c][i]" );
-        }
-      }
+      WRITE_CODE(sei.m_filmGrainBitDepthLumaMinus8, 3, "film_grain_bit_depth_luma_minus8");
+      WRITE_CODE(sei.m_filmGrainBitDepthChromaMinus8, 3, "film_grain_bit_depth_chroma_minus8");
+      WRITE_FLAG(sei.m_filmGrainFullRangeFlag, "film_grain_full_range_flag");
+      WRITE_CODE(sei.m_filmGrainColourPrimaries, 8, "film_grain_colour_primaries");
+      WRITE_CODE(sei.m_filmGrainTransferCharacteristics, 8, "film_grain_transfer_characteristics");
+      WRITE_CODE(sei.m_filmGrainMatrixCoeffs, 8, "film_grain_matrix_coeffs");
     }
-    WRITE_FLAG( sei.m_colourRemapMatrixPresentFlag,            "colour_remap_matrix_present_flag" );
-    if( sei.m_colourRemapMatrixPresentFlag )
+    WRITE_CODE(sei.m_blendingModeId, 2, "blending_mode_id");
+    WRITE_CODE(sei.m_log2ScaleFactor, 4, "log2_scale_factor");
+    for (int c = 0; c<3; c++)
     {
-      WRITE_CODE( sei.m_log2MatrixDenom,                    4, "log2_matrix_denom" );
-      for( int c=0 ; c<3 ; c++ )
-      {
-        for( int i=0 ; i<3 ; i++ )
-        {
-          WRITE_SVLC( sei.m_colourRemapCoeffs[c][i],           "colour_remap_coeffs[c][i]" );
-        }
-      }
+      const SEIFilmGrainCharacteristics::CompModel &cm = sei.m_compModel[c];
+      const uint32_t numIntensityIntervals = (uint32_t)cm.intensityValues.size();
+      const uint32_t numModelValues = cm.numModelValues;
+      WRITE_FLAG(sei.m_compModel[c].presentFlag && numIntensityIntervals>0 && numModelValues>0, "comp_model_present_flag[c]");
     }
-
-    for( int c=0 ; c<3 ; c++ )
+    for (uint32_t c = 0; c<3; c++)
     {
-      WRITE_CODE( sei.m_postLutNumValMinus1[c],             8, "m_postLutNumValMinus1[c]" );
-      if( sei.m_postLutNumValMinus1[c]>0 )
+      const SEIFilmGrainCharacteristics::CompModel &cm = sei.m_compModel[c];
+      const uint32_t numIntensityIntervals = (uint32_t)cm.intensityValues.size();
+      const uint32_t numModelValues = cm.numModelValues;
+      if (cm.presentFlag && numIntensityIntervals>0 && numModelValues>0)
       {
-        for( int i=0 ; i<=sei.m_postLutNumValMinus1[c] ; i++ )
+        assert(numIntensityIntervals <= 256);
+        assert(numModelValues <= 256);
+        WRITE_CODE(numIntensityIntervals - 1, 8, "num_intensity_intervals_minus1[c]");
+        WRITE_CODE(numModelValues - 1, 8, "num_model_values_minus1[c]");
+        for (uint32_t interval = 0; interval<numIntensityIntervals; interval++)
         {
-          WRITE_CODE( sei.m_postLut[c][i].codedValue, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, "post_lut_coded_value[c][i]" );
-          WRITE_CODE( sei.m_postLut[c][i].targetValue, (( sei.m_colourRemapBitDepth + 7 ) >> 3 ) << 3, "post_lut_target_value[c][i]" );
+          const SEIFilmGrainCharacteristics::CompModelIntensityValues &cmiv = cm.intensityValues[interval];
+          WRITE_CODE(cmiv.intensityIntervalLowerBound, 8, "intensity_interval_lower_bound[c][i]");
+          WRITE_CODE(cmiv.intensityIntervalUpperBound, 8, "intensity_interval_upper_bound[c][i]");
+          assert(cmiv.compModelValue.size() == numModelValues);
+          for (uint32_t j = 0; j<cm.numModelValues; j++)
+          {
+            WRITE_SVLC(cmiv.compModelValue[j], "comp_model_value[c][i]");
+          }
         }
       }
-    }
-  }
+    } // for c
+    WRITE_FLAG(sei.m_filmGrainCharacteristicsPersistenceFlag, "film_grain_characteristics_persistence_flag");
+  } // cancel flag
 }
 
-void SEIWriter::xWriteSEIMasteringDisplayColourVolume(const SEIMasteringDisplayColourVolume& sei)
+void SEIWriter::xWriteSEIContentLightLevelInfo(const SEIContentLightLevelInfo& sei)
 {
-  WRITE_CODE( sei.values.primaries[0][0],  16,  "display_primaries_x[0]" );
-  WRITE_CODE( sei.values.primaries[0][1],  16,  "display_primaries_y[0]" );
-
-  WRITE_CODE( sei.values.primaries[1][0],  16,  "display_primaries_x[1]" );
-  WRITE_CODE( sei.values.primaries[1][1],  16,  "display_primaries_y[1]" );
-
-  WRITE_CODE( sei.values.primaries[2][0],  16,  "display_primaries_x[2]" );
-  WRITE_CODE( sei.values.primaries[2][1],  16,  "display_primaries_y[2]" );
-
-  WRITE_CODE( sei.values.whitePoint[0],    16,  "white_point_x" );
-  WRITE_CODE( sei.values.whitePoint[1],    16,  "white_point_y" );
-
-  WRITE_CODE( sei.values.maxLuminance,     32,  "max_display_mastering_luminance" );
-  WRITE_CODE( sei.values.minLuminance,     32,  "min_display_mastering_luminance" );
+  WRITE_CODE( sei.m_maxContentLightLevel,    16, "max_content_light_level"     );
+  WRITE_CODE( sei.m_maxPicAverageLightLevel, 16, "max_pic_average_light_level" );
 }
 
-
-void SEIWriter::xWriteByteAlign()
+void SEIWriter::xWriteSEIAmbientViewingEnvironment(const SEIAmbientViewingEnvironment& sei)
 {
-  if( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0)
-  {
-    WRITE_FLAG( 1, "payload_bit_equal_to_one" );
-    while( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0 )
-    {
-      WRITE_FLAG( 0, "payload_bit_equal_to_zero" );
-    }
-  }
+  WRITE_CODE(sei.m_ambientIlluminance, 32, "ambient_illuminance" );
+  WRITE_CODE(sei.m_ambientLightX,      16, "ambient_light_x" );
+  WRITE_CODE(sei.m_ambientLightY,      16, "ambient_light_y" );
 }
 
-#if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
-void SEIWriter::xWriteSEIAlternativeTransferCharacteristics(const SEIAlternativeTransferCharacteristics& sei)
+void SEIWriter::xWriteSEIContentColourVolume(const SEIContentColourVolume &sei)
 {
-  WRITE_CODE(sei.m_preferredTransferCharacteristics, 8, "preferred_transfer_characteristics");
-}
-#endif
+  WRITE_FLAG(sei.m_ccvCancelFlag, "ccv_cancel_flag");
+  if (!sei.m_ccvCancelFlag)
+  {
+    WRITE_FLAG(sei.m_ccvPersistenceFlag, "ccv_persistence_flag");
+    WRITE_FLAG(sei.m_ccvPrimariesPresentFlag, "ccv_primaries_present_flag");
+    WRITE_FLAG(sei.m_ccvMinLuminanceValuePresentFlag, "ccv_min_luminance_value_present_flag");
+    WRITE_FLAG(sei.m_ccvMaxLuminanceValuePresentFlag, "ccv_max_luminance_value_present_flag");
+    WRITE_FLAG(sei.m_ccvAvgLuminanceValuePresentFlag, "ccv_avg_luminance_value_present_flag");
 
-void SEIWriter::xWriteSEIGreenMetadataInfo(const SEIGreenMetadataInfo& sei)
-{
-  WRITE_CODE(sei.m_greenMetadataType, 8, "green_metadata_type");
+    if (sei.m_ccvPrimariesPresentFlag == true)
+    {
+      for (int i = 0; i < MAX_NUM_COMPONENT; i++)
+      {
+        WRITE_SCODE((int32_t)sei.m_ccvPrimariesX[i], 32, "ccv_primaries_x[i]");
+        WRITE_SCODE((int32_t)sei.m_ccvPrimariesY[i], 32, "ccv_primaries_y[i]");
+      }
+    }
 
-  WRITE_CODE(sei.m_xsdMetricType, 8, "xsd_metric_type");
-  WRITE_CODE(sei.m_xsdMetricValue, 16, "xsd_metric_value");
+    if (sei.m_ccvMinLuminanceValuePresentFlag == true)
+    {
+      WRITE_CODE((uint32_t)sei.m_ccvMinLuminanceValue, 32, "ccv_min_luminance_value");
+    }
+    if (sei.m_ccvMinLuminanceValuePresentFlag == true)
+    {
+      WRITE_CODE((uint32_t)sei.m_ccvMaxLuminanceValue, 32, "ccv_max_luminance_value");
+    }
+    if (sei.m_ccvMinLuminanceValuePresentFlag == true)
+    {
+      WRITE_CODE((uint32_t)sei.m_ccvAvgLuminanceValue, 32, "ccv_avg_luminance_value");
+    }
+  }
 }
 
 //! \}
diff --git a/source/Lib/EncoderLib/SEIwrite.h b/source/Lib/EncoderLib/SEIwrite.h
index f93a0192968930b97d220f5b6a51a5b733b156eb..77c97f36c60531cc93900e37418e9dab31f32393 100644
--- a/source/Lib/EncoderLib/SEIwrite.h
+++ b/source/Lib/EncoderLib/SEIwrite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,39 +49,35 @@ public:
   SEIWriter() {};
   virtual ~SEIWriter() {};
 
-  void writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, bool isNested);
+  void writeSEImessages(OutputBitstream& bs, const SEIMessages &seiList, const SPS *sps, HRD &hrd, bool isNested, const uint32_t temporalId);
 
 protected:
   void xWriteSEIuserDataUnregistered(const SEIuserDataUnregistered &sei);
-  void xWriteSEIActiveParameterSets(const SEIActiveParameterSets& sei);
-  void xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SPS *sps);
+  void xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId);
   void xWriteSEIDecodedPictureHash(const SEIDecodedPictureHash& sei);
-  void xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei, const SPS *sps);
-  void xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SPS *sps);
-  void xWriteSEIRecoveryPoint(const SEIRecoveryPoint& sei);
+  void xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei);
+  void xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId);
+  void xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei);
+  void xWriteSEIDependentRAPIndication(const SEIDependentRAPIndication& sei);
   void xWriteSEIFramePacking(const SEIFramePacking& sei);
-  void xWriteSEISegmentedRectFramePacking(const SEISegmentedRectFramePacking& sei);
-  void xWriteSEIDisplayOrientation(const SEIDisplayOrientation &sei);
-  void xWriteSEITemporalLevel0Index(const SEITemporalLevel0Index &sei);
-  void xWriteSEIGradualDecodingRefreshInfo(const SEIGradualDecodingRefreshInfo &sei);
-  void xWriteSEINoDisplay(const SEINoDisplay &sei);
-  void xWriteSEIToneMappingInfo(const SEIToneMappingInfo& sei);
-  void xWriteSEISOPDescription(const SEISOPDescription& sei);
-  void xWriteSEIScalableNesting(OutputBitstream& bs, const SEIScalableNesting& sei, const SPS *sps);
-#if HEVC_TILES_WPP
-  void xWriteSEITempMotionConstrainedTileSets(const SEITempMotionConstrainedTileSets& sei);
-#endif
-  void xWriteSEITimeCode(const SEITimeCode& sei);
-  void xWriteSEIChromaResamplingFilterHint(const SEIChromaResamplingFilterHint& sei);
-  void xWriteSEIKneeFunctionInfo(const SEIKneeFunctionInfo &sei);
-  void xWriteSEIColourRemappingInfo(const SEIColourRemappingInfo& sei);
   void xWriteSEIMasteringDisplayColourVolume( const SEIMasteringDisplayColourVolume& sei);
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   void xWriteSEIAlternativeTransferCharacteristics(const SEIAlternativeTransferCharacteristics& sei);
 #endif
-  void xWriteSEIGreenMetadataInfo(const SEIGreenMetadataInfo &sei);
+  void xWriteSEIEquirectangularProjection         (const SEIEquirectangularProjection &sei);
+  void xWriteSEISphereRotation                    (const SEISphereRotation &sei);
+  void xWriteSEIOmniViewport                      (const SEIOmniViewport& sei);
+  void xWriteSEIRegionWisePacking                 (const SEIRegionWisePacking &sei);
+  void xWriteSEIGeneralizedCubemapProjection      (const SEIGeneralizedCubemapProjection &sei);
+  void xWriteSEISubpictureLevelInfo               (const SEISubpicureLevelInfo &sei, const SPS* sps);
+  void xWriteSEISampleAspectRatioInfo             (const SEISampleAspectRatioInfo &sei);
 
-  void xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps);
+  void xWriteSEIUserDataRegistered(const SEIUserDataRegistered& sei);
+  void xWriteSEIFilmGrainCharacteristics(const SEIFilmGrainCharacteristics& sei);
+  void xWriteSEIContentLightLevelInfo(const SEIContentLightLevelInfo& sei);
+  void xWriteSEIAmbientViewingEnvironment(const SEIAmbientViewingEnvironment& sei);
+  void xWriteSEIContentColourVolume(const SEIContentColourVolume &sei);
+  void xWriteSEIpayloadData(OutputBitstream& bs, const SEI& sei, const SPS *sps, HRD &hrd, const uint32_t temporalId);
   void xWriteByteAlign();
 };
 
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index ae81af9f9443f6563f4e755aa4395baf944be319..d00d64a66981ec399ad5b9ddf0eda4bdbd0f9406 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,22 @@
 
 #if ENABLE_TRACING
 
+void  VLCWriter::xWriteSCodeTr (int value, uint32_t  length, const char *pSymbolName)
+{
+  xWriteSCode (value,length);
+  if( g_HLSTraceEnable )
+  {
+    if( length<10 )
+    {
+      DTRACE( g_trace_ctx, D_HEADER, "%-50s u(%d)  : %d\n", pSymbolName, length, value );
+    }
+    else
+    {
+      DTRACE( g_trace_ctx, D_HEADER, "%-50s u(%d) : %d\n", pSymbolName, length, value );
+    }
+  }
+}
+
 void  VLCWriter::xWriteCodeTr (uint32_t value, uint32_t  length, const char *pSymbolName)
 {
   xWriteCode (value,length);
@@ -98,10 +114,16 @@ bool g_HLSTraceEnable = true;
 
 #endif
 
+void VLCWriter::xWriteSCode    ( int code, uint32_t length )
+{
+  assert ( length > 0 && length<=32 );
+  assert( length==32 || (code>=-(1<<(length-1)) && code<(1<<(length-1))) );
+  m_pcBitIf->write( length==32 ? uint32_t(code) : ( uint32_t(code)&((1<<length)-1) ), length );
+}
 
 void VLCWriter::xWriteCode     ( uint32_t uiCode, uint32_t uiLength )
 {
-  CHECK( uiLength == 0, "Code of lenght '0' not supported" );
+  CHECK( uiLength == 0, "Code of length '0' not supported" );
   m_pcBitIf->write( uiCode, uiLength );
 }
 
@@ -157,117 +179,235 @@ void AUDWriter::codeAUD(OutputBitstream& bs, const int pictureType)
   xWriteRbspTrailingBits();
 }
 
-void HLSWriter::xCodeShortTermRefPicSet( const ReferencePictureSet* rps, bool calledFromSliceHeader, int idx)
+void HLSWriter::xCodeRefPicList( const ReferencePictureList* rpl, bool isLongTermPresent, uint32_t ltLsbBitsCount, const bool isForbiddenZeroDeltaPoc )
 {
-  //int lastBits = getNumberOfWrittenBits();
+  uint32_t numRefPic = rpl->getNumberOfShorttermPictures() + rpl->getNumberOfLongtermPictures() + rpl->getNumberOfInterLayerPictures();
+  WRITE_UVLC( numRefPic, "num_ref_entries[ listIdx ][ rplsIdx ]" );
 
-  if (idx > 0)
+  if (isLongTermPresent)
   {
-    WRITE_FLAG( rps->getInterRPSPrediction(), "inter_ref_pic_set_prediction_flag" ); // inter_RPS_prediction_flag
+    WRITE_FLAG(rpl->getLtrpInSliceHeaderFlag(), "ltrp_in_slice_header_flag[ listIdx ][ rplsIdx ]");
   }
-  if (rps->getInterRPSPrediction())
+  int prevDelta = MAX_INT;
+  int deltaValue = 0;
+  bool firstSTRP = true;
+  for (int ii = 0; ii < numRefPic; ii++)
   {
-    int deltaRPS = rps->getDeltaRPS();
-    if(calledFromSliceHeader)
+    if( rpl->getInterLayerPresentFlag() )
     {
-      WRITE_UVLC( rps->getDeltaRIdxMinus1(), "delta_idx_minus1" ); // delta index of the Reference Picture Set used for prediction minus 1
+      WRITE_FLAG( rpl->isInterLayerRefPic( ii ), "inter_layer_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]" );
+
+      if( rpl->isInterLayerRefPic( ii ) )
+      {
+        CHECK( rpl->getInterLayerRefPicIdx( ii ) < 0, "Wrong inter-layer reference index" );
+        WRITE_UVLC( rpl->getInterLayerRefPicIdx( ii ), "ilrp_idx[ listIdx ][ rplsIdx ][ i ]" );
+      }
     }
 
-    WRITE_CODE( (deltaRPS >=0 ? 0: 1), 1, "delta_rps_sign" ); //delta_rps_sign
-    WRITE_UVLC( abs(deltaRPS) - 1, "abs_delta_rps_minus1"); // absolute delta RPS minus 1
+    if( !rpl->isInterLayerRefPic( ii ) )
+    {
+    if( isLongTermPresent )
+    {
+      WRITE_FLAG( !rpl->isRefPicLongterm( ii ), "st_ref_pic_flag[ listIdx ][ rplsIdx ][ i ]" );
+    }
 
-    for(int j=0; j < rps->getNumRefIdc(); j++)
+    if (!rpl->isRefPicLongterm(ii))
     {
-      int refIdc = rps->getRefIdc(j);
-      WRITE_CODE( (refIdc==1? 1: 0), 1, "used_by_curr_pic_flag" ); //first bit is "1" if Idc is 1
-      if (refIdc != 1)
+      if (firstSTRP)
+      {
+        firstSTRP = false;
+        deltaValue = prevDelta = rpl->getRefPicIdentifier(ii);
+      }
+      else
       {
-        WRITE_CODE( refIdc>>1, 1, "use_delta_flag" ); //second bit is "1" if Idc is 2, "0" otherwise.
+        deltaValue = rpl->getRefPicIdentifier(ii) - prevDelta;
+        prevDelta = rpl->getRefPicIdentifier(ii);
       }
+      unsigned int absDeltaValue = (deltaValue < 0) ? 0 - deltaValue : deltaValue;
+      if( isForbiddenZeroDeltaPoc )
+      {
+        CHECK( !absDeltaValue, "Zero delta POC is not used without WP" );
+        WRITE_UVLC( absDeltaValue - 1, "abs_delta_poc_st[ listIdx ][ rplsIdx ][ i ]" );
+      }
+      else
+      WRITE_UVLC(absDeltaValue, "abs_delta_poc_st[ listIdx ][ rplsIdx ][ i ]");
+      if (absDeltaValue > 0)
+        WRITE_FLAG((deltaValue < 0) ? 0 : 1, "strp_entry_sign_flag[ listIdx ][ rplsIdx ][ i ]");  //0  means negative delta POC : 1 means positive
     }
-  }
-  else
-  {
-    WRITE_UVLC( rps->getNumberOfNegativePictures(), "num_negative_pics" );
-    WRITE_UVLC( rps->getNumberOfPositivePictures(), "num_positive_pics" );
-    int prev = 0;
-    for(int j=0 ; j < rps->getNumberOfNegativePictures(); j++)
+    else if (!rpl->getLtrpInSliceHeaderFlag())
     {
-      WRITE_UVLC( prev-rps->getDeltaPOC(j)-1, "delta_poc_s0_minus1" );
-      prev = rps->getDeltaPOC(j);
-      WRITE_FLAG( rps->getUsed(j), "used_by_curr_pic_s0_flag");
+      WRITE_CODE(rpl->getRefPicIdentifier(ii), ltLsbBitsCount, "poc_lsb_lt[listIdx][rplsIdx][i]");
     }
-    prev = 0;
-    for(int j=rps->getNumberOfNegativePictures(); j < rps->getNumberOfNegativePictures()+rps->getNumberOfPositivePictures(); j++)
-    {
-      WRITE_UVLC( rps->getDeltaPOC(j)-prev-1, "delta_poc_s1_minus1" );
-      prev = rps->getDeltaPOC(j);
-      WRITE_FLAG( rps->getUsed(j), "used_by_curr_pic_s1_flag" );
     }
   }
-
-  //DTRACE( g_trace_ctx, D_RPSINFO, "irps=%d (%2d bits) ", rps->getInterRPSPrediction(), getNumberOfWrittenBits() - lastBits );
-  rps->printDeltaPOC();
 }
 
-void HLSWriter::codePPS( const PPS* pcPPS )
+void HLSWriter::codePPS( const PPS* pcPPS, const SPS* pcSPS )
 {
 #if ENABLE_TRACING
   xTracePPSHeader ();
 #endif
 
   WRITE_UVLC( pcPPS->getPPSId(),                             "pps_pic_parameter_set_id" );
-  WRITE_UVLC( pcPPS->getSPSId(),                             "pps_seq_parameter_set_id" );
-#if HEVC_DEPENDENT_SLICES
-  WRITE_FLAG( pcPPS->getDependentSliceSegmentsEnabledFlag()    ? 1 : 0, "dependent_slice_segments_enabled_flag" );
-#endif
+  WRITE_CODE( pcPPS->getSPSId(), 4,                          "pps_seq_parameter_set_id" );
+
+  WRITE_UVLC( pcPPS->getPicWidthInLumaSamples(), "pic_width_in_luma_samples" );
+  WRITE_UVLC( pcPPS->getPicHeightInLumaSamples(), "pic_height_in_luma_samples" );
+  Window conf = pcPPS->getConformanceWindow();
+
+  WRITE_FLAG( conf.getWindowEnabledFlag(), "conformance_window_flag" );
+  if( conf.getWindowEnabledFlag() )
+  {
+    WRITE_UVLC( conf.getWindowLeftOffset(),   "conf_win_left_offset" );
+    WRITE_UVLC( conf.getWindowRightOffset(),  "conf_win_right_offset" );
+    WRITE_UVLC( conf.getWindowTopOffset(),    "conf_win_top_offset" );
+    WRITE_UVLC( conf.getWindowBottomOffset(), "conf_win_bottom_offset" );
+  }
+  Window scalingWindow = pcPPS->getScalingWindow();
+
+  WRITE_FLAG( scalingWindow.getWindowEnabledFlag(), "scaling_window_flag" );
+  if( scalingWindow.getWindowEnabledFlag() )
+  {
+    WRITE_UVLC( scalingWindow.getWindowLeftOffset(), "scaling_win_left_offset" );
+    WRITE_UVLC( scalingWindow.getWindowRightOffset(), "scaling_win_right_offset" );
+    WRITE_UVLC( scalingWindow.getWindowTopOffset(), "scaling_win_top_offset" );
+    WRITE_UVLC( scalingWindow.getWindowBottomOffset(), "scaling_win_bottom_offset" );
+  }
+
   WRITE_FLAG( pcPPS->getOutputFlagPresentFlag() ? 1 : 0,     "output_flag_present_flag" );
-  WRITE_CODE( pcPPS->getNumExtraSliceHeaderBits(), 3,        "num_extra_slice_header_bits");
+  WRITE_FLAG(pcPPS->getSubPicIdSignallingPresentFlag(), "pps_subpic_id_signalling_present_flag");
+  if( pcPPS->getSubPicIdSignallingPresentFlag() )
+  {
+    WRITE_UVLC( pcPPS->getNumSubPics() - 1, "pps_num_subpics_minus1" );
+
+    WRITE_UVLC( pcPPS->getSubPicIdLen() - 1, "pps_subpic_id_len_minus1" );
+
+    for( int picIdx = 0; picIdx < pcPPS->getNumSubPics( ); picIdx++ )
+    {
+      WRITE_CODE( pcPPS->getSubPicId(picIdx), pcPPS->getSubPicIdLen( ), "pps_subpic_id[i]" );
+    }
+  }
+
+  WRITE_FLAG( pcPPS->getNoPicPartitionFlag( ) ? 1 : 0, "no_pic_partition_flag" );
+  if( !pcPPS->getNoPicPartitionFlag() )
+  {
+    int colIdx, rowIdx;
+
+    // CTU size - required to match size in SPS
+    WRITE_CODE( pcPPS->getLog2CtuSize() - 5, 2, "pps_log2_ctu_size_minus5" );
+    
+    // number of explicit tile columns/rows
+    WRITE_UVLC( pcPPS->getNumExpTileColumns() - 1, "num_exp_tile_columns_minus1" );
+    WRITE_UVLC( pcPPS->getNumExpTileRows() - 1,    "num_exp_tile_rows_minus1" );
+        
+    // tile sizes
+    for( colIdx = 0; colIdx < pcPPS->getNumExpTileColumns(); colIdx++ )
+    {
+      WRITE_UVLC( pcPPS->getTileColumnWidth( colIdx ) - 1, "tile_column_width_minus1[i]" );
+    }
+    for( rowIdx = 0; rowIdx < pcPPS->getNumExpTileRows(); rowIdx++ )
+    {
+      WRITE_UVLC( pcPPS->getTileRowHeight( rowIdx ) - 1, "tile_row_height_minus1[i]" );
+    }
+     
+    // rectangular slice signalling
+    WRITE_FLAG( pcPPS->getRectSliceFlag( ) ? 1 : 0, "rect_slice_flag");
+    if (pcPPS->getRectSliceFlag())
+    {
+      WRITE_FLAG(pcPPS->getSingleSlicePerSubPicFlag( ) ? 1 : 0, "single_slice_per_subpic_flag");
+    }
+    if (pcPPS->getRectSliceFlag() & !(pcPPS->getSingleSlicePerSubPicFlag()))
+    {      
+      WRITE_UVLC( pcPPS->getNumSlicesInPic( ) - 1, "num_slices_in_pic_minus1" );
+      WRITE_FLAG( pcPPS->getTileIdxDeltaPresentFlag( ) ? 1 : 0, "tile_idx_delta_present_flag");
+      
+      // write rectangular slice parameters
+      for( int i = 0; i < pcPPS->getNumSlicesInPic()-1; i++ )
+      {
+        // complete tiles within a single slice
+        WRITE_UVLC( pcPPS->getSliceWidthInTiles( i ) - 1,  "slice_width_in_tiles_minus1[i]" );
+#if JVET_Q0480_RASTER_RECT_SLICES
+        if( pcPPS->getTileIdxDeltaPresentFlag() || ( (pcPPS->getSliceTileIdx( i ) % pcPPS->getNumTileColumns()) == 0 ) )
+        {
+          WRITE_UVLC( pcPPS->getSliceHeightInTiles( i ) - 1, "slice_height_in_tiles_minus1[i]" );
+        }
+#else
+        WRITE_UVLC( pcPPS->getSliceHeightInTiles( i ) - 1, "slice_height_in_tiles_minus1[i]" );
+#endif
+
+        // multiple slices within a single tile special case
+        if( pcPPS->getSliceWidthInTiles( i ) == 1 && pcPPS->getSliceHeightInTiles( i ) == 1 ) 
+        {
+          WRITE_UVLC( pcPPS->getNumSlicesInTile( i ) - 1,  "num_slices_in_tile_minus1[i]" );
+          uint32_t numSlicesInTile = pcPPS->getNumSlicesInTile( i );
+          for( int j = 0; j < numSlicesInTile-1; j++ )
+          {
+            WRITE_UVLC( pcPPS->getSliceHeightInCtu( i ) - 1,  "slice_height_in_ctu_minus1[i]" );
+            i++;
+          }
+        }
+
+        // tile index offset to start of next slice
+        if( i < pcPPS->getNumSlicesInPic()-1 ) 
+        {
+          if( pcPPS->getTileIdxDeltaPresentFlag() ) 
+          {
+            int32_t  tileIdxDelta = pcPPS->getSliceTileIdx( i + 1 ) - pcPPS->getSliceTileIdx( i );
+            WRITE_SVLC( tileIdxDelta,  "tile_idx_delta[i]" );
+          }
+        }
+      }
+    }
+
+    // loop filtering across slice/tile controls
+    WRITE_FLAG( pcPPS->getLoopFilterAcrossTilesEnabledFlag(), "loop_filter_across_tiles_enabled_flag");
+    WRITE_FLAG( pcPPS->getLoopFilterAcrossSlicesEnabledFlag(), "loop_filter_across_slices_enabled_flag");
+  }
+
+  WRITE_FLAG( pcPPS->getEntropyCodingSyncEnabledFlag() ? 1 : 0, "entropy_coding_sync_enabled_flag" );
   WRITE_FLAG( pcPPS->getCabacInitPresentFlag() ? 1 : 0,   "cabac_init_present_flag" );
   WRITE_UVLC( pcPPS->getNumRefIdxL0DefaultActive()-1,     "num_ref_idx_l0_default_active_minus1");
   WRITE_UVLC( pcPPS->getNumRefIdxL1DefaultActive()-1,     "num_ref_idx_l1_default_active_minus1");
+  WRITE_FLAG(pcPPS->getRpl1IdxPresentFlag() ? 1 : 0, "rpl1IdxPresentFlag");
+
 
   WRITE_SVLC( pcPPS->getPicInitQPMinus26(),                  "init_qp_minus26");
-  WRITE_FLAG( pcPPS->getConstrainedIntraPred() ? 1 : 0,      "constrained_intra_pred_flag" );
-  WRITE_FLAG( pcPPS->getUseTransformSkip() ? 1 : 0,  "transform_skip_enabled_flag" );
+  WRITE_UVLC( pcPPS->getLog2MaxTransformSkipBlockSize() - 2, "log2_transform_skip_max_size_minus2");
   WRITE_FLAG( pcPPS->getUseDQP() ? 1 : 0, "cu_qp_delta_enabled_flag" );
-  if ( pcPPS->getUseDQP() )
-  {
-    WRITE_UVLC( pcPPS->getCuQpDeltaSubdiv(), "cu_qp_delta_subdiv" );
-  }
 
   WRITE_SVLC( pcPPS->getQpOffset(COMPONENT_Cb), "pps_cb_qp_offset" );
   WRITE_SVLC( pcPPS->getQpOffset(COMPONENT_Cr), "pps_cr_qp_offset" );
+  if (pcSPS->getJointCbCrEnabledFlag() == false || pcSPS->getChromaFormatIdc() == CHROMA_400)
+  {
+    CHECK(pcPPS->getJointCbCrQpOffsetPresentFlag(), "pps_jcbcr_qp_offset_present_flag should be false");
+  }
+  WRITE_FLAG(pcPPS->getJointCbCrQpOffsetPresentFlag() ? 1 : 0, "pps_joint_cbcr_qp_offset_present_flag");
+  if (pcPPS->getJointCbCrQpOffsetPresentFlag())
+  {
+    WRITE_SVLC(pcPPS->getQpOffset(JOINT_CbCr), "pps_joint_cbcr_qp_offset_value");
+  }
 
   WRITE_FLAG( pcPPS->getSliceChromaQpFlag() ? 1 : 0,          "pps_slice_chroma_qp_offsets_present_flag" );
 
-  WRITE_FLAG( pcPPS->getUseWP() ? 1 : 0,  "weighted_pred_flag" );   // Use of Weighting Prediction (P_SLICE)
-  WRITE_FLAG( pcPPS->getWPBiPred() ? 1 : 0, "weighted_bipred_flag" );  // Use of Weighting Bi-Prediction (B_SLICE)
-  WRITE_FLAG( pcPPS->getTransquantBypassEnabledFlag()  ? 1 : 0, "transquant_bypass_enabled_flag" );
-#if HEVC_TILES_WPP
-  WRITE_FLAG( pcPPS->getTilesEnabledFlag() ? 1 : 0, "tiles_enabled_flag" );
-  WRITE_FLAG( pcPPS->getEntropyCodingSyncEnabledFlag() ? 1 : 0, "entropy_coding_sync_enabled_flag" );
-  if( pcPPS->getTilesEnabledFlag() )
+  WRITE_FLAG(uint32_t(pcPPS->getCuChromaQpOffsetEnabledFlag()),         "cu_chroma_qp_offset_enabled_flag" );
+  if (pcPPS->getCuChromaQpOffsetEnabledFlag())
   {
-    WRITE_UVLC( pcPPS->getNumTileColumnsMinus1(),                                    "num_tile_columns_minus1" );
-    WRITE_UVLC( pcPPS->getNumTileRowsMinus1(),                                       "num_tile_rows_minus1" );
-    WRITE_FLAG( pcPPS->getTileUniformSpacingFlag(),                                  "uniform_spacing_flag" );
-    if( !pcPPS->getTileUniformSpacingFlag() )
+    WRITE_UVLC(pcPPS->getChromaQpOffsetListLen() - 1,                   "chroma_qp_offset_list_len_minus1");
+    /* skip zero index */
+    for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx < pcPPS->getChromaQpOffsetListLen(); cuChromaQpOffsetIdx++)
     {
-      for(uint32_t i=0; i<pcPPS->getNumTileColumnsMinus1(); i++)
-      {
-        WRITE_UVLC( pcPPS->getTileColumnWidth(i)-1,                                  "column_width_minus1" );
-      }
-      for(uint32_t i=0; i<pcPPS->getNumTileRowsMinus1(); i++)
+      WRITE_SVLC(pcPPS->getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CbOffset,     "cb_qp_offset_list[i]");
+      WRITE_SVLC(pcPPS->getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CrOffset,     "cr_qp_offset_list[i]");
+      if (pcPPS->getJointCbCrQpOffsetPresentFlag())
       {
-        WRITE_UVLC( pcPPS->getTileRowHeight(i)-1,                                    "row_height_minus1" );
+        WRITE_SVLC(pcPPS->getChromaQpOffsetListEntry(cuChromaQpOffsetIdx + 1).u.comp.JointCbCrOffset, "joint_cbcr_qp_offset_list[i]");
       }
     }
-    CHECK ((pcPPS->getNumTileColumnsMinus1() + pcPPS->getNumTileRowsMinus1()) == 0, "Invalid tile parameters read");
-    WRITE_FLAG( pcPPS->getLoopFilterAcrossTilesEnabledFlag()?1 : 0,       "loop_filter_across_tiles_enabled_flag");
   }
-#endif
-  WRITE_FLAG( pcPPS->getLoopFilterAcrossSlicesEnabledFlag()?1 : 0,        "pps_loop_filter_across_slices_enabled_flag");
+
+  WRITE_FLAG( pcPPS->getUseWP() ? 1 : 0,  "weighted_pred_flag" );   // Use of Weighting Prediction (P_SLICE)
+  WRITE_FLAG( pcPPS->getWPBiPred() ? 1 : 0, "weighted_bipred_flag" );  // Use of Weighting Bi-Prediction (B_SLICE)
+
   WRITE_FLAG( pcPPS->getDeblockingFilterControlPresentFlag()?1 : 0,       "deblocking_filter_control_present_flag");
   if(pcPPS->getDeblockingFilterControlPresentFlag())
   {
@@ -279,28 +419,23 @@ void HLSWriter::codePPS( const PPS* pcPPS )
       WRITE_SVLC( pcPPS->getDeblockingFilterTcOffsetDiv2(),               "pps_tc_offset_div2" );
     }
   }
-#if HEVC_USE_SCALING_LISTS
-  WRITE_FLAG( pcPPS->getScalingListPresentFlag() ? 1 : 0,                          "pps_scaling_list_data_present_flag" );
-  if( pcPPS->getScalingListPresentFlag() )
-  {
-    codeScalingList( pcPPS->getScalingList() );
+  WRITE_FLAG( pcPPS->getConstantSliceHeaderParamsEnabledFlag(),              "constant_slice_header_params_enabled_flag");
+  if ( pcPPS->getConstantSliceHeaderParamsEnabledFlag() ) {
+    WRITE_CODE( pcPPS->getPPSDepQuantEnabledIdc(), 2,                        "pps_dep_quant_enabled_idc");
+    WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc0(), 2,                         "pps_ref_pic_list_sps_idc[0]");
+    WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc1(), 2,                         "pps_ref_pic_list_sps_idc[1]");
+    WRITE_CODE( pcPPS->getPPSMvdL1ZeroIdc(), 2,                              "pps_mvd_l1_zero_idc");
+    WRITE_CODE( pcPPS->getPPSCollocatedFromL0Idc(), 2,                       "pps_collocated_from_l0_idc");
+    WRITE_UVLC( pcPPS->getPPSSixMinusMaxNumMergeCandPlus1(),                 "pps_six_minus_max_num_merge_cand_plus1");
+    WRITE_UVLC( pcPPS->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1(),  "pps_max_num_merge_cand_minus_max_num_triangle_cand_plus1");
   }
-#endif
-  WRITE_FLAG( pcPPS->getListsModificationPresentFlag(), "lists_modification_present_flag");
-  WRITE_UVLC( pcPPS->getLog2ParallelMergeLevelMinus2(), "log2_parallel_merge_level_minus2");
-  WRITE_FLAG( pcPPS->getSliceHeaderExtensionPresentFlag() ? 1 : 0, "slice_segment_header_extension_present_flag");
 
-  bool pps_extension_present_flag=false;
-  bool pps_extension_flags[NUM_PPS_EXTENSION_FLAGS]={false};
 
-  pps_extension_flags[PPS_EXT__REXT] = pcPPS->getPpsRangeExtension().settingsDifferFromDefaults(pcPPS->getUseTransformSkip());
+  WRITE_FLAG( pcPPS->getPictureHeaderExtensionPresentFlag() ? 1 : 0, "picture_header_extension_present_flag");
+  WRITE_FLAG( pcPPS->getSliceHeaderExtensionPresentFlag() ? 1 : 0, "slice_header_extension_present_flag");
 
-  // Other PPS extension flags checked here.
-
-  for(int i=0; i<NUM_PPS_EXTENSION_FLAGS; i++)
-  {
-    pps_extension_present_flag|=pps_extension_flags[i];
-  }
+  bool pps_extension_present_flag=false;
+  bool pps_extension_flags[NUM_PPS_EXTENSION_FLAGS]={false};
 
   WRITE_FLAG( (pps_extension_present_flag?1:0), "pps_extension_present_flag" );
 
@@ -331,26 +466,9 @@ void HLSWriter::codePPS( const PPS* pcPPS )
         case PPS_EXT__REXT:
         {
           const PPSRExt &ppsRangeExtension = pcPPS->getPpsRangeExtension();
-          if (pcPPS->getUseTransformSkip())
-          {
-            WRITE_UVLC( ppsRangeExtension.getLog2MaxTransformSkipBlockSize()-2,            "log2_max_transform_skip_block_size_minus2");
-          }
 
           WRITE_FLAG((ppsRangeExtension.getCrossComponentPredictionEnabledFlag() ? 1 : 0), "cross_component_prediction_enabled_flag" );
 
-          WRITE_FLAG(uint32_t(ppsRangeExtension.getChromaQpOffsetListEnabledFlag()),           "chroma_qp_offset_list_enabled_flag" );
-          if (ppsRangeExtension.getChromaQpOffsetListEnabledFlag())
-          {
-            WRITE_UVLC(ppsRangeExtension.getCuChromaQpOffsetSubdiv(),                      "cu_chroma_qp_offset_subdiv");
-            WRITE_UVLC(ppsRangeExtension.getChromaQpOffsetListLen() - 1,                   "chroma_qp_offset_list_len_minus1");
-            /* skip zero index */
-            for (int cuChromaQpOffsetIdx = 0; cuChromaQpOffsetIdx < ppsRangeExtension.getChromaQpOffsetListLen(); cuChromaQpOffsetIdx++)
-            {
-              WRITE_SVLC(ppsRangeExtension.getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CbOffset,     "cb_qp_offset_list[i]");
-              WRITE_SVLC(ppsRangeExtension.getChromaQpOffsetListEntry(cuChromaQpOffsetIdx+1).u.comp.CrOffset,     "cr_qp_offset_list[i]");
-            }
-          }
-
           WRITE_UVLC( ppsRangeExtension.getLog2SaoOffsetScale(CHANNEL_TYPE_LUMA),           "log2_sao_offset_scale_luma"   );
           WRITE_UVLC( ppsRangeExtension.getLog2SaoOffsetScale(CHANNEL_TYPE_CHROMA),         "log2_sao_offset_scale_chroma" );
         }
@@ -365,43 +483,121 @@ void HLSWriter::codePPS( const PPS* pcPPS )
   xWriteRbspTrailingBits();
 }
 
-void HLSWriter::codeAPS( APS* pcAPS)
+void HLSWriter::codeAPS( APS* pcAPS )
 {
 #if ENABLE_TRACING
   xTraceAPSHeader();
 #endif
 
-  AlfSliceParam param = pcAPS->getAlfAPSParam();
   WRITE_CODE(pcAPS->getAPSId(), 5, "adaptation_parameter_set_id");
+  WRITE_CODE( (int)pcAPS->getAPSType(), 3, "aps_params_type" );
+
+  if (pcAPS->getAPSType() == ALF_APS)
+  {
+    codeAlfAps(pcAPS);
+  }
+  else if (pcAPS->getAPSType() == LMCS_APS)
+  {
+    codeLmcsAps (pcAPS);
+  }
+  else if( pcAPS->getAPSType() == SCALING_LIST_APS )
+  {
+    codeScalingListAps( pcAPS );
+  }
+  WRITE_FLAG(0, "aps_extension_flag");   //Implementation when this flag is equal to 1 should be added when it is needed. Currently in the spec we don't have case when this flag is equal to 1
+  xWriteRbspTrailingBits();
+}
+
+void HLSWriter::codeAlfAps( APS* pcAPS )
+{
+  AlfParam param = pcAPS->getAlfAPSParam();
+
+  WRITE_FLAG(param.newFilterFlag[CHANNEL_TYPE_LUMA], "alf_luma_new_filter");
+  WRITE_FLAG(param.newFilterFlag[CHANNEL_TYPE_CHROMA], "alf_chroma_new_filter");
+
+  if (param.newFilterFlag[CHANNEL_TYPE_LUMA])
+  {
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_LUMA], "alf_luma_clip" );
+#else
+    WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_LUMA][0], "alf_luma_clip" );
+#endif
 
-  const int alfChromaIdc = param.enabledFlag[COMPONENT_Cb] * 2 + param.enabledFlag[COMPONENT_Cr];
-  truncatedUnaryEqProb(alfChromaIdc, 3);   // alf_chroma_idc
+    WRITE_UVLC(param.numLumaFilters - 1, "alf_luma_num_filters_signalled_minus1");
+    if (param.numLumaFilters > 1)
+    {
+      const int length =  ceilLog2( param.numLumaFilters);
+      for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++)
+      {
+        WRITE_CODE(param.filterCoeffDeltaIdx[i], length, "alf_luma_coeff_delta_idx" );
+      }
+    }
+    alfFilter(param, false, 0);
 
-  xWriteTruncBinCode(param.numLumaFilters - 1, MAX_NUM_ALF_CLASSES);  //number_of_filters_minus1
-  if (param.numLumaFilters > 1)
+  }
+  if (param.newFilterFlag[CHANNEL_TYPE_CHROMA])
   {
-    for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++)
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+    WRITE_FLAG(param.nonLinearFlag[CHANNEL_TYPE_CHROMA], "alf_nonlinear_enable_flag_chroma");
+#endif
+    if( MAX_NUM_ALF_ALTERNATIVES_CHROMA > 1 )
+      WRITE_UVLC( param.numAlternativesChroma - 1, "alf_chroma_num_alts_minus1" );
+    for( int altIdx=0; altIdx < param.numAlternativesChroma; ++altIdx )
     {
-      xWriteTruncBinCode((uint32_t)param.filterCoeffDeltaIdx[i], param.numLumaFilters);  //filter_coeff_delta[i]
+#if !JVET_Q0249_ALF_CHROMA_CLIPFLAG
+      WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_CHROMA][altIdx], "alf_nonlinear_enable_flag_chroma" );
+#endif
+      alfFilter(param, true, altIdx);
     }
   }
+}
 
-  alfFilter(param, false);
+void HLSWriter::codeLmcsAps( APS* pcAPS )
+{
+  SliceReshapeInfo param = pcAPS->getReshaperAPSInfo();
+  WRITE_UVLC(param.reshaperModelMinBinIdx, "lmcs_min_bin_idx");
+  WRITE_UVLC(PIC_CODE_CW_BINS - 1 - param.reshaperModelMaxBinIdx, "lmcs_delta_max_bin_idx");
+  assert(param.maxNbitsNeededDeltaCW > 0);
+  WRITE_UVLC(param.maxNbitsNeededDeltaCW - 1, "lmcs_delta_cw_prec_minus1");
 
-  if (alfChromaIdc)
+  for (int i = param.reshaperModelMinBinIdx; i <= param.reshaperModelMaxBinIdx; i++)
+  {
+    int deltaCW = param.reshaperModelBinCWDelta[i];
+    int signCW = (deltaCW < 0) ? 1 : 0;
+    int absCW = (deltaCW < 0) ? (-deltaCW) : deltaCW;
+    WRITE_CODE(absCW, param.maxNbitsNeededDeltaCW, "lmcs_delta_abs_cw[ i ]");
+    if (absCW > 0)
+    {
+      WRITE_FLAG(signCW, "lmcs_delta_sign_cw_flag[ i ]");
+    }
+  }
+  int deltaCRS = param.chrResScalingOffset;
+  int signCRS = (deltaCRS < 0) ? 1 : 0;
+  int absCRS = (deltaCRS < 0) ? (-deltaCRS) : deltaCRS;
+  WRITE_CODE(absCRS, 3, "lmcs_delta_crs_val");
+  if (absCRS > 0)
   {
-    alfFilter(param, true);
+    WRITE_FLAG(signCRS, "lmcs_delta_crs_val_flag");
   }
-  xWriteRbspTrailingBits();
 }
+
+void HLSWriter::codeScalingListAps( APS* pcAPS )
+{
+  ScalingList param = pcAPS->getScalingList();
+  codeScalingList( param );
+}
+
 void HLSWriter::codeVUI( const VUI *pcVUI, const SPS* pcSPS )
 {
 #if ENABLE_TRACING
   DTRACE( g_trace_ctx, D_HEADER, "----------- vui_parameters -----------\n");
 #endif
+
+
   WRITE_FLAG(pcVUI->getAspectRatioInfoPresentFlag(),            "aspect_ratio_info_present_flag");
   if (pcVUI->getAspectRatioInfoPresentFlag())
   {
+    WRITE_FLAG(pcVUI->getAspectRatioConstantFlag(),             "aspect_ratio_constant_flag");
     WRITE_CODE(pcVUI->getAspectRatioIdc(), 8,                   "aspect_ratio_idc" );
     if (pcVUI->getAspectRatioIdc() == 255)
     {
@@ -409,108 +605,52 @@ void HLSWriter::codeVUI( const VUI *pcVUI, const SPS* pcSPS )
       WRITE_CODE(pcVUI->getSarHeight(), 16,                     "sar_height");
     }
   }
-  WRITE_FLAG(pcVUI->getOverscanInfoPresentFlag(),               "overscan_info_present_flag");
-  if (pcVUI->getOverscanInfoPresentFlag())
-  {
-    WRITE_FLAG(pcVUI->getOverscanAppropriateFlag(),             "overscan_appropriate_flag");
-  }
-  WRITE_FLAG(pcVUI->getVideoSignalTypePresentFlag(),            "video_signal_type_present_flag");
-  if (pcVUI->getVideoSignalTypePresentFlag())
+  WRITE_FLAG(pcVUI->getColourDescriptionPresentFlag(),        "colour_description_present_flag");
+  if (pcVUI->getColourDescriptionPresentFlag())
   {
-    WRITE_CODE(pcVUI->getVideoFormat(), 3,                      "video_format");
-    WRITE_FLAG(pcVUI->getVideoFullRangeFlag(),                  "video_full_range_flag");
-    WRITE_FLAG(pcVUI->getColourDescriptionPresentFlag(),        "colour_description_present_flag");
-    if (pcVUI->getColourDescriptionPresentFlag())
-    {
-      WRITE_CODE(pcVUI->getColourPrimaries(), 8,                "colour_primaries");
-      WRITE_CODE(pcVUI->getTransferCharacteristics(), 8,        "transfer_characteristics");
-      WRITE_CODE(pcVUI->getMatrixCoefficients(), 8,             "matrix_coeffs");
-    }
+    WRITE_CODE(pcVUI->getColourPrimaries(), 8,                "colour_primaries");
+    WRITE_CODE(pcVUI->getTransferCharacteristics(), 8,        "transfer_characteristics");
+    WRITE_CODE(pcVUI->getMatrixCoefficients(), 8,             "matrix_coeffs");
+    WRITE_FLAG(pcVUI->getVideoFullRangeFlag(),                "video_full_range_flag");
   }
-
+  WRITE_FLAG(pcVUI->getFieldSeqFlag(),                          "field_seq_flag");
   WRITE_FLAG(pcVUI->getChromaLocInfoPresentFlag(),              "chroma_loc_info_present_flag");
   if (pcVUI->getChromaLocInfoPresentFlag())
   {
-    WRITE_UVLC(pcVUI->getChromaSampleLocTypeTopField(),         "chroma_sample_loc_type_top_field");
-    WRITE_UVLC(pcVUI->getChromaSampleLocTypeBottomField(),      "chroma_sample_loc_type_bottom_field");
-  }
-
-  WRITE_FLAG(pcVUI->getNeutralChromaIndicationFlag(),           "neutral_chroma_indication_flag");
-  WRITE_FLAG(pcVUI->getFieldSeqFlag(),                          "field_seq_flag");
-  WRITE_FLAG(pcVUI->getFrameFieldInfoPresentFlag(),             "frame_field_info_present_flag");
-
-  Window defaultDisplayWindow = pcVUI->getDefaultDisplayWindow();
-  WRITE_FLAG(defaultDisplayWindow.getWindowEnabledFlag(),       "default_display_window_flag");
-  if( defaultDisplayWindow.getWindowEnabledFlag() )
-  {
-    WRITE_UVLC(defaultDisplayWindow.getWindowLeftOffset()  / SPS::getWinUnitX(pcSPS->getChromaFormatIdc()), "def_disp_win_left_offset");
-    WRITE_UVLC(defaultDisplayWindow.getWindowRightOffset() / SPS::getWinUnitX(pcSPS->getChromaFormatIdc()), "def_disp_win_right_offset");
-    WRITE_UVLC(defaultDisplayWindow.getWindowTopOffset()   / SPS::getWinUnitY(pcSPS->getChromaFormatIdc()), "def_disp_win_top_offset");
-    WRITE_UVLC(defaultDisplayWindow.getWindowBottomOffset()/ SPS::getWinUnitY(pcSPS->getChromaFormatIdc()), "def_disp_win_bottom_offset");
-  }
-  const TimingInfo *timingInfo = pcVUI->getTimingInfo();
-  WRITE_FLAG(timingInfo->getTimingInfoPresentFlag(),          "vui_timing_info_present_flag");
-  if(timingInfo->getTimingInfoPresentFlag())
-  {
-    WRITE_CODE(timingInfo->getNumUnitsInTick(), 32,           "vui_num_units_in_tick");
-    WRITE_CODE(timingInfo->getTimeScale(),      32,           "vui_time_scale");
-    WRITE_FLAG(timingInfo->getPocProportionalToTimingFlag(),  "vui_poc_proportional_to_timing_flag");
-    if(timingInfo->getPocProportionalToTimingFlag())
+    if(pcVUI->getFieldSeqFlag())
     {
-      WRITE_UVLC(timingInfo->getNumTicksPocDiffOneMinus1(),   "vui_num_ticks_poc_diff_one_minus1");
+      WRITE_UVLC(pcVUI->getChromaSampleLocTypeTopField(),         "chroma_sample_loc_type_top_field");
+      WRITE_UVLC(pcVUI->getChromaSampleLocTypeBottomField(),      "chroma_sample_loc_type_bottom_field");
     }
-    WRITE_FLAG(pcVUI->getHrdParametersPresentFlag(),              "vui_hrd_parameters_present_flag");
-    if( pcVUI->getHrdParametersPresentFlag() )
+    else
     {
-      codeHrdParameters(pcVUI->getHrdParameters(), 1, pcSPS->getMaxTLayers() - 1 );
+      WRITE_UVLC(pcVUI->getChromaSampleLocType(),         "chroma_sample_loc_type");
     }
   }
-
-  WRITE_FLAG(pcVUI->getBitstreamRestrictionFlag(),              "bitstream_restriction_flag");
-  if (pcVUI->getBitstreamRestrictionFlag())
+  WRITE_FLAG(pcVUI->getOverscanInfoPresentFlag(),               "overscan_info_present_flag");
+  if (pcVUI->getOverscanInfoPresentFlag())
   {
-#if HEVC_TILES_WPP
-    WRITE_FLAG(pcVUI->getTilesFixedStructureFlag(),             "tiles_fixed_structure_flag");
-#endif
-    WRITE_FLAG(pcVUI->getMotionVectorsOverPicBoundariesFlag(),  "motion_vectors_over_pic_boundaries_flag");
-    WRITE_FLAG(pcVUI->getRestrictedRefPicListsFlag(),           "restricted_ref_pic_lists_flag");
-    WRITE_UVLC(pcVUI->getMinSpatialSegmentationIdc(),           "min_spatial_segmentation_idc");
-    WRITE_UVLC(pcVUI->getMaxBytesPerPicDenom(),                 "max_bytes_per_pic_denom");
-    WRITE_UVLC(pcVUI->getMaxBitsPerMinCuDenom(),                "max_bits_per_min_cu_denom");
-    WRITE_UVLC(pcVUI->getLog2MaxMvLengthHorizontal(),           "log2_max_mv_length_horizontal");
-    WRITE_UVLC(pcVUI->getLog2MaxMvLengthVertical(),             "log2_max_mv_length_vertical");
+    WRITE_FLAG(pcVUI->getOverscanAppropriateFlag(),             "overscan_appropriate_flag");
   }
 }
 
-void HLSWriter::codeHrdParameters( const HRD *hrd, bool commonInfPresentFlag, uint32_t maxNumSubLayersMinus1 )
+void HLSWriter::codeHrdParameters( const HRDParameters *hrd, const uint32_t firstSubLayer, const uint32_t maxNumSubLayersMinus1)
 {
-  if( commonInfPresentFlag )
+  WRITE_FLAG( hrd->getNalHrdParametersPresentFlag() ? 1 : 0 ,  "general_nal_hrd_parameters_present_flag" );
+  WRITE_FLAG( hrd->getVclHrdParametersPresentFlag() ? 1 : 0 ,  "general_vcl_hrd_parameters_present_flag" );
+  WRITE_FLAG( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ? 1 : 0,  "general_decoding_unit_hrd_params_present_flag" );
+  if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
   {
-    WRITE_FLAG( hrd->getNalHrdParametersPresentFlag() ? 1 : 0 ,  "nal_hrd_parameters_present_flag" );
-    WRITE_FLAG( hrd->getVclHrdParametersPresentFlag() ? 1 : 0 ,  "vcl_hrd_parameters_present_flag" );
-    if( hrd->getNalHrdParametersPresentFlag() || hrd->getVclHrdParametersPresentFlag() )
-    {
-      WRITE_FLAG( hrd->getSubPicCpbParamsPresentFlag() ? 1 : 0,  "sub_pic_hrd_params_present_flag" );
-      if( hrd->getSubPicCpbParamsPresentFlag() )
-      {
-        WRITE_CODE( hrd->getTickDivisorMinus2(), 8,              "tick_divisor_minus2" );
-        WRITE_CODE( hrd->getDuCpbRemovalDelayLengthMinus1(), 5,  "du_cpb_removal_delay_increment_length_minus1" );
-        WRITE_FLAG( hrd->getSubPicCpbParamsInPicTimingSEIFlag() ? 1 : 0, "sub_pic_cpb_params_in_pic_timing_sei_flag" );
-        WRITE_CODE( hrd->getDpbOutputDelayDuLengthMinus1(), 5,   "dpb_output_delay_du_length_minus1"  );
-      }
-      WRITE_CODE( hrd->getBitRateScale(), 4,                     "bit_rate_scale" );
-      WRITE_CODE( hrd->getCpbSizeScale(), 4,                     "cpb_size_scale" );
-      if( hrd->getSubPicCpbParamsPresentFlag() )
-      {
-        WRITE_CODE( hrd->getDuCpbSizeScale(), 4,                "du_cpb_size_scale" );
-      }
-      WRITE_CODE( hrd->getInitialCpbRemovalDelayLengthMinus1(), 5, "initial_cpb_removal_delay_length_minus1" );
-      WRITE_CODE( hrd->getCpbRemovalDelayLengthMinus1(),        5, "au_cpb_removal_delay_length_minus1" );
-      WRITE_CODE( hrd->getDpbOutputDelayLengthMinus1(),         5, "dpb_output_delay_length_minus1" );
-    }
+    WRITE_CODE( hrd->getTickDivisorMinus2(), 8,            "tick_divisor_minus2" );
+  }
+  WRITE_CODE( hrd->getBitRateScale(), 4,                     "bit_rate_scale" );
+  WRITE_CODE( hrd->getCpbSizeScale(), 4,                     "cpb_size_scale" );
+  if( hrd->getGeneralDecodingUnitHrdParamsPresentFlag() )
+  {
+    WRITE_CODE( hrd->getCpbSizeDuScale(), 4,               "cpb_size_du_scale" );
   }
-  int i, j, nalOrVcl;
-  for( i = 0; i <= maxNumSubLayersMinus1; i ++ )
+
+  for( int i = firstSubLayer; i <= maxNumSubLayersMinus1; i ++ )
   {
     WRITE_FLAG( hrd->getFixedPicRateFlag( i ) ? 1 : 0,          "fixed_pic_rate_general_flag");
     bool fixedPixRateWithinCvsFlag = true;
@@ -532,20 +672,15 @@ void HLSWriter::codeHrdParameters( const HRD *hrd, bool commonInfPresentFlag, ui
       WRITE_UVLC( hrd->getCpbCntMinus1( i ),                      "cpb_cnt_minus1");
     }
 
-    for( nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
+    for( int nalOrVcl = 0; nalOrVcl < 2; nalOrVcl ++ )
     {
       if( ( ( nalOrVcl == 0 ) && ( hrd->getNalHrdParametersPresentFlag() ) ) ||
           ( ( nalOrVcl == 1 ) && ( hrd->getVclHrdParametersPresentFlag() ) ) )
       {
-        for( j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ )
+        for( int j = 0; j <= ( hrd->getCpbCntMinus1( i ) ); j ++ )
         {
           WRITE_UVLC( hrd->getBitRateValueMinus1( i, j, nalOrVcl ), "bit_rate_value_minus1");
           WRITE_UVLC( hrd->getCpbSizeValueMinus1( i, j, nalOrVcl ), "cpb_size_value_minus1");
-          if( hrd->getSubPicCpbParamsPresentFlag() )
-          {
-            WRITE_UVLC( hrd->getDuCpbSizeValueMinus1( i, j, nalOrVcl ), "cpb_size_du_value_minus1");
-            WRITE_UVLC( hrd->getDuBitRateValueMinus1( i, j, nalOrVcl ), "bit_rate_du_value_minus1");
-          }
           WRITE_FLAG( hrd->getCbrFlag( i, j, nalOrVcl ) ? 1 : 0, "cbr_flag");
         }
       }
@@ -553,91 +688,25 @@ void HLSWriter::codeHrdParameters( const HRD *hrd, bool commonInfPresentFlag, ui
   }
 }
 
-void HLSWriter::codeReshaper(const SliceReshapeInfo& pSliceReshaperInfo, const SPS* pcSPS, const bool isIntra)
-{
-  WRITE_FLAG(pSliceReshaperInfo.getSliceReshapeModelPresentFlag() ? 1 : 0, "tile_group_reshaper_model_present_flag");
-  if (pSliceReshaperInfo.getSliceReshapeModelPresentFlag())
-  {
-    WRITE_UVLC(pSliceReshaperInfo.reshaperModelMinBinIdx, "reshaper_model_min_bin_idx");
-    WRITE_UVLC(PIC_CODE_CW_BINS - 1 - pSliceReshaperInfo.reshaperModelMaxBinIdx, "reshaper_model_delta_max_bin_idx");
-    assert(pSliceReshaperInfo.maxNbitsNeededDeltaCW > 0);
-    WRITE_UVLC(pSliceReshaperInfo.maxNbitsNeededDeltaCW - 1, "reshaper_model_bin_delta_abs_cw_prec_minus1");
-
-    for (int i = pSliceReshaperInfo.reshaperModelMinBinIdx; i <= pSliceReshaperInfo.reshaperModelMaxBinIdx; i++)
-    {
-      int deltaCW = pSliceReshaperInfo.reshaperModelBinCWDelta[i];
-      int signCW = (deltaCW < 0) ? 1 : 0;
-      int absCW = (deltaCW < 0) ? (-deltaCW) : deltaCW;
-      WRITE_CODE(absCW, pSliceReshaperInfo.maxNbitsNeededDeltaCW, "reshaper_model_bin_delta_abs_CW");
-      if (absCW > 0)
-      {
-        WRITE_FLAG(signCW, "reshaper_model_bin_delta_sign_CW_flag");
-      }
-    }
-  }
-
-  WRITE_FLAG(pSliceReshaperInfo.getUseSliceReshaper() ? 1 : 0, "tile_group_reshaper_enable_flag");
-
-  if (!pSliceReshaperInfo.getUseSliceReshaper())
-    return;
-
-  if (!(pcSPS->getUseDualITree() && isIntra))
-    WRITE_FLAG(pSliceReshaperInfo.getSliceReshapeChromaAdj(), "tile_group_reshaper_chroma_residual_scale_flag");
-};
 
 void HLSWriter::codeSPS( const SPS* pcSPS )
 {
 #if ENABLE_TRACING
   xTraceSPSHeader ();
 #endif
-#if HEVC_VPS
-  WRITE_CODE( pcSPS->getVPSId (),          4,       "sps_video_parameter_set_id" );
-#endif
-#if !JVET_M0101_HLS
-  WRITE_UVLC( pcSPS->getSPSId (),                   "sps_seq_parameter_set_id" );
-
-  WRITE_FLAG(pcSPS->getIntraOnlyConstraintFlag() ? 1 : 0, "intra_only_constraint_flag");
-  WRITE_CODE(pcSPS->getMaxBitDepthConstraintIdc(), 4, "max_bitdepth_constraint_idc");
-  WRITE_CODE(pcSPS->getMaxChromaFormatConstraintIdc(), 2, "max_chroma_format_constraint_idc");
-  WRITE_FLAG(pcSPS->getFrameConstraintFlag() ? 1 : 0, "frame_only_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoQtbttDualTreeIntraConstraintFlag() ? 1 : 0, "no_qtbtt_dual_tree_intra_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoSaoConstraintFlag() ? 1 : 0, "no_sao_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoAlfConstraintFlag() ? 1 : 0, "no_alf_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoPcmConstraintFlag() ? 1 : 0, "no_pcm_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoRefWraparoundConstraintFlag() ? 1 : 0, "no_ref_wraparound_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoTemporalMvpConstraintFlag() ? 1 : 0, "no_temporal_mvp_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoSbtmvpConstraintFlag() ? 1 : 0, "no_sbtmvp_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoAmvrConstraintFlag() ? 1 : 0, "no_amvr_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoBdofConstraintFlag() ? 1 : 0, "no_bdof_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoCclmConstraintFlag() ? 1 : 0, "no_cclm_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoMtsConstraintFlag() ? 1 : 0, "no_mts_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoAffineMotionConstraintFlag() ? 1 : 0, "no_affine_motion_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoGbiConstraintFlag() ? 1 : 0, "no_gbi_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoMhIntraConstraintFlag() ? 1 : 0, "no_mh_intra_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoTriangleConstraintFlag() ? 1 : 0, "no_triangle_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoLadfConstraintFlag() ? 1 : 0, "no_ladf_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoCurrPicRefConstraintFlag() ? 1 : 0, "no_curr_pic_ref_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoQpDeltaConstraintFlag() ? 1 : 0, "no_qp_delta_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoDepQuantConstraintFlag() ? 1 : 0, "no_dep_quant_constraint_flag");
-  WRITE_FLAG(pcSPS->getNoSignDataHidingConstraintFlag() ? 1 : 0, "no_sign_data_hiding_constraint_flag");
-
-  CHECK( pcSPS->getMaxTLayers() == 0, "Maximum number of temporal sub-layers is '0'" );
-  WRITE_CODE( pcSPS->getMaxTLayers() - 1,  3,       "sps_max_sub_layers_minus1" );
-
-  WRITE_FLAG( pcSPS->getTemporalIdNestingFlag() ? 1 : 0, "sps_temporal_id_nesting_flag" );
-  codePTL( pcSPS->getPTL(), true, pcSPS->getMaxTLayers() - 1 );
-#else
+  WRITE_CODE( pcSPS->getDecodingParameterSetId (), 4,       "sps_decoding_parameter_set_id" );
+  WRITE_CODE( pcSPS->getVPSId(), 4, "sps_video_parameter_set_id" );
   CHECK(pcSPS->getMaxTLayers() == 0, "Maximum number of temporal sub-layers is '0'");
 
   WRITE_CODE(pcSPS->getMaxTLayers() - 1, 3, "sps_max_sub_layers_minus1");
   WRITE_CODE(0,                          5, "sps_reserved_zero_5bits");
 
   codeProfileTierLevel( pcSPS->getProfileTierLevel(), pcSPS->getMaxTLayers() - 1 );
+  WRITE_FLAG(pcSPS->getGDREnabledFlag(), "gdr_enabled_flag");
 
-  WRITE_UVLC(pcSPS->getSPSId (), "sps_seq_parameter_set_id");
-#endif
+  WRITE_CODE( pcSPS->getSPSId (), 4, "sps_seq_parameter_set_id" );
 
-  WRITE_UVLC( int(pcSPS->getChromaFormatIdc ()),    "chroma_format_idc" );
+  WRITE_CODE(int(pcSPS->getChromaFormatIdc ()), 2, "chroma_format_idc");
 
   const ChromaFormat format                = pcSPS->getChromaFormatIdc();
   if( format == CHROMA_444 )
@@ -645,30 +714,55 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
     WRITE_FLAG( 0,                                  "separate_colour_plane_flag");
   }
 
-  WRITE_UVLC( pcSPS->getPicWidthInLumaSamples (),   "pic_width_in_luma_samples" );
-  WRITE_UVLC( pcSPS->getPicHeightInLumaSamples(),   "pic_height_in_luma_samples" );
-  Window conf = pcSPS->getConformanceWindow();
+  WRITE_FLAG( pcSPS->getRprEnabledFlag(), "ref_pic_resampling_enabled_flag" );
 
-  // KJS: not removing yet
-  WRITE_FLAG( conf.getWindowEnabledFlag(),          "conformance_window_flag" );
-  if (conf.getWindowEnabledFlag())
+  WRITE_UVLC( pcSPS->getMaxPicWidthInLumaSamples(), "pic_width_max_in_luma_samples" );
+  WRITE_UVLC( pcSPS->getMaxPicHeightInLumaSamples(), "pic_height_max_in_luma_samples" );
+  WRITE_CODE(floorLog2(pcSPS->getCTUSize()) - 5, 2, "sps_log2_ctu_size_minus5");
+
+  WRITE_FLAG(pcSPS->getSubPicPresentFlag(), "subpics_present_flag");
+  if(pcSPS->getSubPicPresentFlag())
   {
-    WRITE_UVLC( conf.getWindowLeftOffset()   / SPS::getWinUnitX(pcSPS->getChromaFormatIdc() ), "conf_win_left_offset" );
-    WRITE_UVLC( conf.getWindowRightOffset()  / SPS::getWinUnitX(pcSPS->getChromaFormatIdc() ), "conf_win_right_offset" );
-    WRITE_UVLC( conf.getWindowTopOffset()    / SPS::getWinUnitY(pcSPS->getChromaFormatIdc() ), "conf_win_top_offset" );
-    WRITE_UVLC( conf.getWindowBottomOffset() / SPS::getWinUnitY(pcSPS->getChromaFormatIdc() ), "conf_win_bottom_offset" );
+    WRITE_CODE(pcSPS->getNumSubPics() - 1, 8, "sps_num_subpics_minus1");
+    for (int picIdx = 0; picIdx < pcSPS->getNumSubPics(); picIdx++)
+    {
+      WRITE_CODE( pcSPS->getSubPicCtuTopLeftX(picIdx), std::max(1, ceilLog2((( pcSPS->getMaxPicWidthInLumaSamples()  +  pcSPS->getCTUSize() - 1)  >> floorLog2( pcSPS->getCTUSize())))), "subpic_ctu_top_left_x[ i ]"  );
+      WRITE_CODE( pcSPS->getSubPicCtuTopLeftY(picIdx), std::max(1, ceilLog2((( pcSPS->getMaxPicHeightInLumaSamples() +  pcSPS->getCTUSize() - 1)  >> floorLog2( pcSPS->getCTUSize())))), "subpic_ctu_top_left_y[ i ]"  );
+      WRITE_CODE( pcSPS->getSubPicWidth(picIdx) - 1,   std::max(1, ceilLog2((( pcSPS->getMaxPicWidthInLumaSamples()  +  pcSPS->getCTUSize() - 1)  >> floorLog2( pcSPS->getCTUSize())))), "subpic_width_minus1[ i ]"    );
+      WRITE_CODE( pcSPS->getSubPicHeight(picIdx) - 1,  std::max(1, ceilLog2((( pcSPS->getMaxPicHeightInLumaSamples() +  pcSPS->getCTUSize() - 1)  >> floorLog2( pcSPS->getCTUSize())))), "subpic_height_minus1[ i ]"   );
+      WRITE_FLAG( pcSPS->getSubPicTreatedAsPicFlag(picIdx),  "subpic_treated_as_pic_flag[ i ]" );
+      WRITE_FLAG( pcSPS->getLoopFilterAcrossSubpicEnabledFlag(picIdx),  "loop_filter_across_subpic_enabled_flag[ i ]" );
+    }
   }
 
-  WRITE_UVLC( pcSPS->getBitDepth(CHANNEL_TYPE_LUMA) - 8,                      "bit_depth_luma_minus8" );
+  WRITE_FLAG( pcSPS->getSubPicIdPresentFlag(), "sps_subpic_id_present_flag");
+  if( pcSPS->getSubPicIdPresentFlag() )
+  {
+    WRITE_FLAG(pcSPS->getSubPicIdSignallingPresentFlag(), "sps_subpic_id_signalling_present_flag");
+    if( pcSPS->getSubPicIdSignallingPresentFlag() )
+    {
+      WRITE_UVLC( pcSPS->getSubPicIdLen( ) - 1, "sps_subpic_id_len_minus1" );
+      for( int picIdx = 0; picIdx < pcSPS->getNumSubPics( ); picIdx++ )
+      {
+        WRITE_CODE( pcSPS->getSubPicId(picIdx), pcSPS->getSubPicIdLen( ), "sps_subpic_id[i]" );
+      }
+    }
+  }
 
-  const bool         chromaEnabled         = isChromaEnabled(format);
-  WRITE_UVLC( chromaEnabled ? (pcSPS->getBitDepth(CHANNEL_TYPE_CHROMA) - 8):0,  "bit_depth_chroma_minus8" );
+  WRITE_UVLC( pcSPS->getBitDepth(CHANNEL_TYPE_LUMA) - 8,                      "bit_depth_minus8" );
 
-  WRITE_UVLC( pcSPS->getBitsForPOC()-4,                 "log2_max_pic_order_cnt_lsb_minus4" );
+  WRITE_UVLC( pcSPS->getMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA),                      "min_qp_prime_ts_minus4" );
+  
+  WRITE_FLAG( pcSPS->getUseWP() ? 1 : 0, "sps_weighted_pred_flag" );   // Use of Weighting Prediction (P_SLICE)
+  WRITE_FLAG( pcSPS->getUseWPBiPred() ? 1 : 0, "sps_weighted_bipred_flag" );  // Use of Weighting Bi-Prediction (B_SLICE)
 
+  WRITE_CODE(pcSPS->getBitsForPOC()-4, 4, "log2_max_pic_order_cnt_lsb_minus4");
   // KJS: Marakech decision: sub-layers added back
   const bool subLayerOrderingInfoPresentFlag = 1;
-  WRITE_FLAG(subLayerOrderingInfoPresentFlag,       "sps_sub_layer_ordering_info_present_flag");
+  if (pcSPS->getMaxTLayers() > 1)
+  {
+    WRITE_FLAG(subLayerOrderingInfoPresentFlag,       "sps_sub_layer_ordering_info_present_flag");
+  }
   for(uint32_t i=0; i <= pcSPS->getMaxTLayers()-1; i++)
   {
     WRITE_UVLC( pcSPS->getMaxDecPicBuffering(i) - 1,       "sps_max_dec_pic_buffering_minus1[i]" );
@@ -680,50 +774,107 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
     }
   }
   CHECK( pcSPS->getMaxCUWidth() != pcSPS->getMaxCUHeight(),                          "Rectangular CTUs not supported" );
-  WRITE_FLAG(pcSPS->getUseDualITree(), "qtbtt_dual_tree_intra_flag");
-  WRITE_UVLC(g_aucLog2[pcSPS->getCTUSize()] - MIN_CU_LOG2, "log2_ctu_size_minus2");
+  WRITE_FLAG(pcSPS->getLongTermRefsPresent() ? 1 : 0, "long_term_ref_pics_flag");
+  WRITE_FLAG( pcSPS->getInterLayerPresentFlag() ? 1 : 0, "inter_layer_ref_pics_present_flag" );
+  WRITE_FLAG(pcSPS->getIDRRefParamListPresent() ? 1 : 0, "sps_idr_rpl_present_flag" );
+  WRITE_FLAG(pcSPS->getRPL1CopyFromRPL0Flag() ? 1 : 0, "rpl1_copy_from_rpl0_flag");
+
+  const RPLList* rplList0 = pcSPS->getRPLList0();
+  const RPLList* rplList1 = pcSPS->getRPLList1();
+
+  //Write candidate for List0
+  uint32_t numberOfRPL = pcSPS->getNumRPL0();
+  WRITE_UVLC(numberOfRPL, "num_ref_pic_lists_in_sps[0]");
+  for (int ii = 0; ii < numberOfRPL; ii++)
+  {
+    const ReferencePictureList* rpl = rplList0->getReferencePictureList(ii);
+    xCodeRefPicList( rpl, pcSPS->getLongTermRefsPresent(), pcSPS->getBitsForPOC(), !pcSPS->getUseWP() && !pcSPS->getUseWPBiPred() );
+  }
+
+  //Write candidate for List1
+  if (!pcSPS->getRPL1CopyFromRPL0Flag())
+  {
+    numberOfRPL = pcSPS->getNumRPL1();
+    WRITE_UVLC(numberOfRPL, "num_ref_pic_lists_in_sps[1]");
+    for (int ii = 0; ii < numberOfRPL; ii++)
+    {
+      const ReferencePictureList* rpl = rplList1->getReferencePictureList(ii);
+      xCodeRefPicList( rpl, pcSPS->getLongTermRefsPresent(), pcSPS->getBitsForPOC(), !pcSPS->getUseWP() && !pcSPS->getUseWPBiPred() );
+    }
+  }
+  if( pcSPS->getChromaFormatIdc() != CHROMA_400 ) 
+  {
+    WRITE_FLAG(pcSPS->getUseDualITree(), "qtbtt_dual_tree_intra_flag");
+  }
   WRITE_UVLC(pcSPS->getLog2MinCodingBlockSize() - 2, "log2_min_luma_coding_block_size_minus2");
   WRITE_FLAG(pcSPS->getSplitConsOverrideEnabledFlag(), "partition_constraints_override_enabled_flag");
-  WRITE_UVLC(g_aucLog2[pcSPS->getMinQTSize(I_SLICE)] - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_tile_group_luma");
-  WRITE_UVLC(g_aucLog2[pcSPS->getMinQTSize(B_SLICE)] - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_inter_tile_group");
-  WRITE_UVLC(pcSPS->getMaxBTDepth(), "sps_max_mtt_hierarchy_depth_inter_tile_group");
-  WRITE_UVLC(pcSPS->getMaxBTDepthI(), "sps_max_mtt_hierarchy_depth_intra_tile_group_luma");
-  if (pcSPS->getMaxBTDepthI() != 0)
+  WRITE_UVLC(floorLog2(pcSPS->getMinQTSize(I_SLICE)) - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
+  WRITE_UVLC(floorLog2(pcSPS->getMinQTSize(B_SLICE)) - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_inter_slice");
+  WRITE_UVLC(pcSPS->getMaxMTTHierarchyDepth(), "sps_max_mtt_hierarchy_depth_inter_slice");
+  WRITE_UVLC(pcSPS->getMaxMTTHierarchyDepthI(), "sps_max_mtt_hierarchy_depth_intra_slice_luma");
+  if (pcSPS->getMaxMTTHierarchyDepthI() != 0)
   {
-    WRITE_UVLC(g_aucLog2[pcSPS->getMaxBTSizeI()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE)], "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma");
-    WRITE_UVLC(g_aucLog2[pcSPS->getMaxTTSizeI()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE)], "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma");
+    WRITE_UVLC(floorLog2(pcSPS->getMaxBTSizeI()) - floorLog2(pcSPS->getMinQTSize(I_SLICE)), "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
+    WRITE_UVLC(floorLog2(pcSPS->getMaxTTSizeI()) - floorLog2(pcSPS->getMinQTSize(I_SLICE)), "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
   }
-  if (pcSPS->getMaxBTDepth() != 0)
+  if (pcSPS->getMaxMTTHierarchyDepth() != 0)
   {
-    WRITE_UVLC(g_aucLog2[pcSPS->getMaxBTSize()] - g_aucLog2[pcSPS->getMinQTSize(B_SLICE)], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
-    WRITE_UVLC(g_aucLog2[pcSPS->getMaxTTSize()] - g_aucLog2[pcSPS->getMinQTSize(B_SLICE)], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
+    WRITE_UVLC(floorLog2(pcSPS->getMaxBTSize()) - floorLog2(pcSPS->getMinQTSize(B_SLICE)), "sps_log2_diff_max_bt_min_qt_inter_slice");
+    WRITE_UVLC(floorLog2(pcSPS->getMaxTTSize()) - floorLog2(pcSPS->getMinQTSize(B_SLICE)), "sps_log2_diff_max_tt_min_qt_inter_slice");
   }
   if (pcSPS->getUseDualITree())
   {
-    WRITE_UVLC(g_aucLog2[pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)] - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma");
-    WRITE_UVLC(pcSPS->getMaxBTDepthIChroma(), "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma");
-    if (pcSPS->getMaxBTDepthIChroma() != 0)
+    WRITE_UVLC(floorLog2(pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - pcSPS->getLog2MinCodingBlockSize(), "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
+    WRITE_UVLC(pcSPS->getMaxMTTHierarchyDepthIChroma(), "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
+    if (pcSPS->getMaxMTTHierarchyDepthIChroma() != 0)
     {
-      WRITE_UVLC(g_aucLog2[pcSPS->getMaxBTSizeIChroma()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)], "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma");
-      WRITE_UVLC(g_aucLog2[pcSPS->getMaxTTSizeIChroma()] - g_aucLog2[pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)], "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma");
+      WRITE_UVLC(floorLog2(pcSPS->getMaxBTSizeIChroma()) - floorLog2(pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
+      WRITE_UVLC(floorLog2(pcSPS->getMaxTTSizeIChroma()) - floorLog2(pcSPS->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
+    }
+  }
+
+  WRITE_FLAG( (pcSPS->getLog2MaxTbSize() - 5) ? 1 : 0,                       "sps_max_luma_transform_size_64_flag" );
+
+  WRITE_FLAG(pcSPS->getJointCbCrEnabledFlag(), "sps_joint_cbcr_enabled_flag");
+  if (pcSPS->getChromaFormatIdc() != CHROMA_400)
+  {
+    const ChromaQpMappingTable& chromaQpMappingTable = pcSPS->getChromaQpMappingTable();
+    WRITE_FLAG(chromaQpMappingTable.getSameCQPTableForAllChromaFlag(), "same_qp_table_for_chroma");
+    int numQpTables = chromaQpMappingTable.getSameCQPTableForAllChromaFlag() ? 1 : (pcSPS->getJointCbCrEnabledFlag() ? 3 : 2);
+    CHECK(numQpTables != chromaQpMappingTable.getNumQpTables(), " numQpTables does not match at encoder side ");
+    for (int i = 0; i < numQpTables; i++)
+    {
+      WRITE_SVLC(chromaQpMappingTable.getQpTableStartMinus26(i), "qp_table_starts_minus26");
+      WRITE_UVLC(chromaQpMappingTable.getNumPtsInCQPTableMinus1(i), "num_points_in_qp_table_minus1");
+
+      for (int j = 0; j <= chromaQpMappingTable.getNumPtsInCQPTableMinus1(i); j++)
+      {
+        WRITE_UVLC(chromaQpMappingTable.getDeltaQpInValMinus1(i,j),  "delta_qp_in_val_minus1");
+        WRITE_UVLC(chromaQpMappingTable.getDeltaQpOutVal(i, j) ^ chromaQpMappingTable.getDeltaQpInValMinus1(i, j),
+                   "delta_qp_diff_val");
+      }
     }
   }
 
-#if MAX_TB_SIZE_SIGNALLING
-  // KJS: Not in syntax
-  WRITE_UVLC( pcSPS->getLog2MaxTbSize() - 2,                                 "log2_max_luma_transform_block_size_minus2" );
-#endif
   WRITE_FLAG( pcSPS->getSAOEnabledFlag(),                                            "sps_sao_enabled_flag");
   WRITE_FLAG( pcSPS->getALFEnabledFlag(),                                            "sps_alf_enabled_flag" );
 
-  WRITE_FLAG( pcSPS->getPCMEnabledFlag() ? 1 : 0,                                    "sps_pcm_enabled_flag");
-  if( pcSPS->getPCMEnabledFlag() )
+  WRITE_FLAG(pcSPS->getTransformSkipEnabledFlag() ? 1 : 0, "sps_transform_skip_enabled_flag");
+  if (pcSPS->getTransformSkipEnabledFlag())
+  {
+      WRITE_FLAG(pcSPS->getBDPCMEnabled() ? 1 : 0, "sps_bdpcm_enabled_flag");
+      if (pcSPS->getBDPCMEnabled() && pcSPS->getChromaFormatIdc() == CHROMA_444)
+      {
+          WRITE_FLAG(pcSPS->getBDPCMEnabled() == BDPCM_LUMACHROMA ? 1 : 0, "sps_bdpcm_enabled_chroma_flag");
+      }
+      else 
+      {
+        CHECK(pcSPS->getBDPCMEnabled() == BDPCM_LUMACHROMA, "BDPCM for chroma can be used for 444 only.")
+      }
+  }
+  else
   {
-    WRITE_CODE( pcSPS->getPCMBitDepth(CHANNEL_TYPE_LUMA) - 1, 4,                            "pcm_sample_bit_depth_luma_minus1" );
-    WRITE_CODE( chromaEnabled ? (pcSPS->getPCMBitDepth(CHANNEL_TYPE_CHROMA) - 1) : 0, 4,    "pcm_sample_bit_depth_chroma_minus1" );
-    WRITE_UVLC( pcSPS->getPCMLog2MinSize() - 3,                                      "log2_min_pcm_luma_coding_block_size_minus3" );
-    WRITE_UVLC( pcSPS->getPCMLog2MaxSize() - pcSPS->getPCMLog2MinSize(),             "log2_diff_max_min_pcm_luma_coding_block_size" );
-    WRITE_FLAG( pcSPS->getPCMFilterDisableFlag()?1 : 0,                              "pcm_loop_filter_disable_flag");
+    CHECK(pcSPS->getBDPCMEnabled()!=0, "BDPCM cannot be used when transform skip is disabled");
   }
 
   WRITE_FLAG( pcSPS->getWrapAroundEnabledFlag() ? 1 : 0,                              "sps_ref_wraparound_enabled_flag" );
@@ -742,48 +893,70 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   WRITE_FLAG( pcSPS->getAMVREnabledFlag() ? 1 : 0,                                   "sps_amvr_enabled_flag" );
 
   WRITE_FLAG( pcSPS->getBDOFEnabledFlag() ? 1 : 0,                                   "sps_bdof_enabled_flag" );
-
-  WRITE_FLAG( pcSPS->getAffineAmvrEnabledFlag() ? 1 : 0,                             "sps_affine_amvr_enabled_flag" );
-
-  WRITE_FLAG( pcSPS->getUseDMVR() ? 1 : 0,                                            "sps_dmvr_enable_flag" );
-
-  // KJS: sps_cclm_enabled_flag
-  WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0,                                                 "lm_chroma_enabled_flag" );
-  if ( pcSPS->getUseLMChroma() && pcSPS->getChromaFormatIdc() == CHROMA_420 )
+  if (pcSPS->getBDOFEnabledFlag())
+  {
+    WRITE_FLAG(pcSPS->getBdofControlPresentFlag() ? 1 : 0,                           "sps_bdof_pic_present_flag");
+  }
+  WRITE_FLAG( pcSPS->getUseSMVD() ? 1 : 0,                                            "sps_smvd_enabled_flag" );
+  WRITE_FLAG( pcSPS->getUseDMVR() ? 1 : 0,                                            "sps_dmvr_enabled_flag" );
+  if (pcSPS->getUseDMVR())
+  {
+    WRITE_FLAG(pcSPS->getDmvrControlPresentFlag() ? 1 : 0,                            "sps_dmvr_pic_present_flag");
+  }
+  WRITE_FLAG(pcSPS->getUseMMVD() ? 1 : 0,                                             "sps_mmvd_enabled_flag");
+  WRITE_FLAG( pcSPS->getUseISP() ? 1 : 0,                                             "sps_isp_enabled_flag");
+  WRITE_FLAG( pcSPS->getUseMRL() ? 1 : 0,                                             "sps_mrl_enabled_flag");
+  WRITE_FLAG( pcSPS->getUseMIP() ? 1 : 0,                                             "sps_mip_enabled_flag");
+  if( pcSPS->getChromaFormatIdc() != CHROMA_400) 
+  {
+    WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0,                                      "sps_cclm_enabled_flag");
+  }
+  if( pcSPS->getChromaFormatIdc() == CHROMA_420 )
   {
-    WRITE_FLAG( pcSPS->getCclmCollocatedChromaFlag() ? 1 : 0,                                  "sps_cclm_collocated_chroma_flag" );
+    WRITE_FLAG( pcSPS->getHorCollocatedChromaFlag() ? 1 : 0, "sps_chroma_horizontal_collocated_flag" );
+    WRITE_FLAG( pcSPS->getVerCollocatedChromaFlag() ? 1 : 0, "sps_chroma_vertical_collocated_flag" );
   }
 
-  WRITE_FLAG( pcSPS->getUseMTS() ? 1 : 0,                                                      "mts_enabled_flag" );
+  WRITE_FLAG( pcSPS->getUseMTS() ? 1 : 0,                                                      "sps_mts_enabled_flag" );
   if ( pcSPS->getUseMTS() )
   {
-    WRITE_FLAG( pcSPS->getUseIntraMTS() ? 1 : 0,                                               "mts_intra_enabled_flag" );
-    WRITE_FLAG( pcSPS->getUseInterMTS() ? 1 : 0,                                               "mts_inter_enabled_flag" );
+    WRITE_FLAG( pcSPS->getUseIntraMTS() ? 1 : 0,                                               "sps_explicit_mts_intra_enabled_flag" );
+    WRITE_FLAG( pcSPS->getUseInterMTS() ? 1 : 0,                                               "sps_explicit_mts_inter_enabled_flag" );
   }
-  // KJS: sps_affine_enabled_flag
-  WRITE_FLAG( pcSPS->getUseAffine() ? 1 : 0,                                                   "affine_flag" );
+  WRITE_FLAG( pcSPS->getUseSBT() ? 1 : 0,                                                      "sps_sbt_enabled_flag");
+  WRITE_FLAG( pcSPS->getUseAffine() ? 1 : 0,                                                   "sps_affine_enabled_flag" );
   if ( pcSPS->getUseAffine() )
   {
-    WRITE_FLAG( pcSPS->getUseAffineType() ? 1 : 0,                                             "affine_type_flag" );
+    WRITE_FLAG( pcSPS->getUseAffineType() ? 1 : 0,                                             "sps_affine_type_flag" );
+    WRITE_FLAG( pcSPS->getAffineAmvrEnabledFlag() ? 1 : 0,                                     "sps_affine_amvr_enabled_flag" );
+    WRITE_FLAG( pcSPS->getUsePROF() ? 1 : 0,                                                   "sps_affine_prof_enabled_flag" );
+    if (pcSPS->getUsePROF())
+    {
+      WRITE_FLAG(pcSPS->getProfControlPresentFlag() ? 1 : 0,                                   "sps_prof_pic_present_flag" );
+    }
+  }
+  if (pcSPS->getChromaFormatIdc() == CHROMA_444)
+  {
+    WRITE_FLAG(pcSPS->getUseColorTrans() ? 1 : 0, "sps_act_enabled_flag");
+  }
+  if (pcSPS->getChromaFormatIdc() == CHROMA_444)
+  {
+    WRITE_FLAG(pcSPS->getPLTMode() ? 1 : 0,                                                    "sps_palette_enabled_flag" );
   }
-  WRITE_FLAG( pcSPS->getUseGBi() ? 1 : 0,                                                      "gbi_flag" );
-  WRITE_FLAG(pcSPS->getIBCFlag() ? 1 : 0,                                                      "ibc_flag");
+  WRITE_FLAG( pcSPS->getUseBcw() ? 1 : 0,                                                      "sps_bcw_enabled_flag" );
+  WRITE_FLAG(pcSPS->getIBCFlag() ? 1 : 0,                                                      "sps_ibc_enabled_flag");
 
   // KJS: sps_ciip_enabled_flag
-  WRITE_FLAG( pcSPS->getUseMHIntra() ? 1 : 0,                                                  "mhintra_flag" );
-
-  WRITE_FLAG( pcSPS->getUseTriangle() ? 1: 0,                                                  "triangle_flag" );
+  WRITE_FLAG( pcSPS->getUseCiip() ? 1 : 0,                                                  "sps_ciip_enabled_flag" );
 
-  // KJS: not in draft yet
-  WRITE_FLAG( pcSPS->getDisFracMmvdEnabledFlag() ? 1 : 0,                            "sps_fracmmvd_disabled_flag" );
-  // KJS: not in draft yet
-  WRITE_FLAG( pcSPS->getUseSBT() ? 1 : 0,                                             "sbt_enable_flag");
-  if( pcSPS->getUseSBT() )
+  if ( pcSPS->getUseMMVD() )
   {
-    WRITE_FLAG(pcSPS->getMaxSbtSize() == 64 ? 1 : 0,                                  "max_sbt_size_64_flag");
+    WRITE_FLAG( pcSPS->getFpelMmvdEnabledFlag() ? 1 : 0,                            "sps_fpel_mmvd_enabled_flag" );
   }
-  // KJS: not in draft yet
-  WRITE_FLAG(pcSPS->getUseReshaper() ? 1 : 0, "sps_reshaper_enable_flag");
+  WRITE_FLAG( pcSPS->getUseTriangle() ? 1: 0,                                                  "sps_triangle_enabled_flag" );
+
+  WRITE_FLAG(pcSPS->getUseLmcs() ? 1 : 0, "sps_lmcs_enable_flag");
+  WRITE_FLAG( pcSPS->getUseLFNST() ? 1 : 0,                                                    "sps_lfnst_enabled_flag" );
 
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   WRITE_FLAG( pcSPS->getLadfEnabled() ? 1 : 0,                                                 "sps_ladf_enabled_flag" );
@@ -800,44 +973,43 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
 #endif
 
   // KJS: reference picture sets to be replaced
-  const RPSList* rpsList = pcSPS->getRPSList();
 
-  WRITE_UVLC(rpsList->getNumberOfReferencePictureSets(), "num_short_term_ref_pic_sets" );
-  for(int i=0; i < rpsList->getNumberOfReferencePictureSets(); i++)
-  {
-    const ReferencePictureSet*rps = rpsList->getReferencePictureSet(i);
-    xCodeShortTermRefPicSet( rps,false, i);
-  }
-  WRITE_FLAG( pcSPS->getLongTermRefsPresent() ? 1 : 0,         "long_term_ref_pics_present_flag" );
-  if (pcSPS->getLongTermRefsPresent())
+
+  // KJS: remove scaling lists?
+  WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0,                                   "sps_scaling_list_enabled_flag" );
+
+  WRITE_FLAG( pcSPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag(), "sps_loop_filter_across_virtual_boundaries_disabled_present_flag" );
+  if( pcSPS->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
   {
-    WRITE_UVLC(pcSPS->getNumLongTermRefPicSPS(), "num_long_term_ref_pics_sps" );
-    for (uint32_t k = 0; k < pcSPS->getNumLongTermRefPicSPS(); k++)
+    WRITE_CODE( pcSPS->getNumVerVirtualBoundaries(), 2, "sps_num_ver_virtual_boundaries");
+    for( unsigned i = 0; i < pcSPS->getNumVerVirtualBoundaries(); i++ )
     {
-      WRITE_CODE( pcSPS->getLtRefPicPocLsbSps(k), pcSPS->getBitsForPOC(), "lt_ref_pic_poc_lsb_sps");
-      WRITE_FLAG( pcSPS->getUsedByCurrPicLtSPSFlag(k), "used_by_curr_pic_lt_sps_flag[i]");
+      WRITE_CODE((pcSPS->getVirtualBoundariesPosX(i)>>3), 13, "sps_virtual_boundaries_pos_x");
+    }
+    WRITE_CODE(pcSPS->getNumHorVirtualBoundaries(), 2, "sps_num_hor_virtual_boundaries");
+    for( unsigned i = 0; i < pcSPS->getNumHorVirtualBoundaries(); i++ )
+    {
+      WRITE_CODE((pcSPS->getVirtualBoundariesPosY(i)>>3), 13, "sps_virtual_boundaries_pos_y");
     }
   }
 
-#if HEVC_USE_INTRA_SMOOTHING_T32 || HEVC_USE_INTRA_SMOOTHING_T64
-  WRITE_FLAG( pcSPS->getUseStrongIntraSmoothing(),             "strong_intra_smoothing_enable_flag" );
-
-#endif
-
-  // KJS: remove scaling lists?
-#if HEVC_USE_SCALING_LISTS
-  WRITE_FLAG( pcSPS->getScalingListFlag() ? 1 : 0,                                   "scaling_list_enabled_flag" );
-  if(pcSPS->getScalingListFlag())
+  const TimingInfo *timingInfo = pcSPS->getTimingInfo();
+  WRITE_FLAG(pcSPS->getHrdParametersPresentFlag(),          "general_hrd_parameters_present_flag");
+    if( pcSPS->getHrdParametersPresentFlag() )
   {
-    WRITE_FLAG( pcSPS->getScalingListPresentFlag() ? 1 : 0,                          "sps_scaling_list_data_present_flag" );
-    if(pcSPS->getScalingListPresentFlag())
+    WRITE_CODE(timingInfo->getNumUnitsInTick(), 32,           "num_units_in_tick");
+    WRITE_CODE(timingInfo->getTimeScale(),      32,           "time_scale");
+    WRITE_FLAG(pcSPS->getSubLayerParametersPresentFlag(), "sub_layer_cpb_parameters_present_flag");
+    if (pcSPS->getSubLayerParametersPresentFlag())
     {
-      codeScalingList( pcSPS->getScalingList() );
+      codeHrdParameters(pcSPS->getHrdParameters(), 0, pcSPS->getMaxTLayers() - 1);
+    }
+    else
+    {
+      codeHrdParameters(pcSPS->getHrdParameters(), pcSPS->getMaxTLayers() - 1, pcSPS->getMaxTLayers() - 1);
     }
   }
-#endif
 
-  // KJS: no VUI defined yet
   WRITE_FLAG( pcSPS->getVuiParametersPresentFlag(),            "vui_parameters_present_flag" );
   if (pcSPS->getVuiParametersPresentFlag())
   {
@@ -907,367 +1079,859 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   xWriteRbspTrailingBits();
 }
 
-#if HEVC_VPS
-void HLSWriter::codeVPS( const VPS* pcVPS )
+void HLSWriter::codeDPS( const DPS* dps )
 {
 #if ENABLE_TRACING
-  xTraceVPSHeader();
+  xTraceDPSHeader();
 #endif
-  WRITE_CODE( pcVPS->getVPSId(),                    4,        "vps_video_parameter_set_id" );
-  WRITE_FLAG(                                       1,        "vps_base_layer_internal_flag" );
-  WRITE_FLAG(                                       1,        "vps_base_layer_available_flag" );
-  WRITE_CODE( 0,                                    6,        "vps_max_layers_minus1" );
-  WRITE_CODE( pcVPS->getMaxTLayers() - 1,           3,        "vps_max_sub_layers_minus1" );
-  WRITE_FLAG( pcVPS->getTemporalNestingFlag(),                "vps_temporal_id_nesting_flag" );
-  CHECK(pcVPS->getMaxTLayers()<=1&&!pcVPS->getTemporalNestingFlag(), "Invalud parameters");
-  WRITE_CODE( 0xffff,                              16,        "vps_reserved_0xffff_16bits" );
-  codePTL( pcVPS->getPTL(), true, pcVPS->getMaxTLayers() - 1 );
-  const bool subLayerOrderingInfoPresentFlag = 1;
-  WRITE_FLAG(subLayerOrderingInfoPresentFlag,              "vps_sub_layer_ordering_info_present_flag");
-  for(uint32_t i=0; i <= pcVPS->getMaxTLayers()-1; i++)
+  WRITE_CODE( dps->getDecodingParameterSetId(),     4,        "dps_decoding_parameter_set_id" );
+  WRITE_CODE( dps->getMaxSubLayersMinus1(),         3,        "dps_max_sub_layers_minus1" );
+  WRITE_CODE( 0,                                    5,         "dps_reserved_zero_5bits" );
+  uint32_t numPTLs = (uint32_t) dps->getNumPTLs();
+  CHECK (numPTLs<1, "At least one PTL must be available in DPS");
+
+  WRITE_CODE( numPTLs - 1,                          4,         "dps_num_ptls_minus1" );
+
+  for (int i=0; i< numPTLs; i++)
   {
-    WRITE_UVLC( pcVPS->getMaxDecPicBuffering(i) - 1,       "vps_max_dec_pic_buffering_minus1[i]" );
-    WRITE_UVLC( pcVPS->getNumReorderPics(i),               "vps_max_num_reorder_pics[i]" );
-    WRITE_UVLC( pcVPS->getMaxLatencyIncrease(i),           "vps_max_latency_increase_plus1[i]" );
-    if (!subLayerOrderingInfoPresentFlag)
-    {
-      break;
-    }
+    ProfileTierLevel ptl = dps->getProfileTierLevel(i);
+    codeProfileTierLevel( &ptl, dps->getMaxSubLayersMinus1() );
   }
+  WRITE_FLAG( 0,                                              "dps_extension_flag" );
+  xWriteRbspTrailingBits();
+}
 
-  CHECK( pcVPS->getNumHrdParameters() > MAX_VPS_NUM_HRD_PARAMETERS, "Too many HRD parameters" );
-  CHECK( pcVPS->getMaxNuhReservedZeroLayerId() >= MAX_VPS_NUH_RESERVED_ZERO_LAYER_ID_PLUS1, "Invalid parameters read" );
-  WRITE_CODE( pcVPS->getMaxNuhReservedZeroLayerId(), 6,     "vps_max_layer_id" );
-  WRITE_UVLC( pcVPS->getMaxOpSets() - 1,                    "vps_num_layer_sets_minus1" );
-  for( uint32_t opsIdx = 1; opsIdx <= ( pcVPS->getMaxOpSets() - 1 ); opsIdx ++ )
+void HLSWriter::codeVPS(const VPS* pcVPS)
+{
+#if ENABLE_TRACING
+  xTraceVPSHeader();
+#endif
+  WRITE_CODE(pcVPS->getVPSId(), 4, "vps_video_parameter_set_id");
+  WRITE_CODE(pcVPS->getMaxLayers() - 1, 6, "vps_max_layers_minus1");
+  WRITE_CODE(pcVPS->getMaxSubLayers() - 1, 3, "vps_max_sublayers_minus1");
+  if (pcVPS->getMaxLayers() > 1 && pcVPS->getMaxSubLayers() > 1) 
+  {
+    WRITE_FLAG(pcVPS->getAllLayersSameNumSublayersFlag(), "vps_all_layers_same_num_sublayers_flag");
+  }
+  if (pcVPS->getMaxLayers() > 1)
   {
-    // Operation point set
-    for( uint32_t i = 0; i <= pcVPS->getMaxNuhReservedZeroLayerId(); i ++ )
+    WRITE_FLAG(pcVPS->getAllIndependentLayersFlag(), "vps_all_independent_layers_flag");
+  }
+  for (uint32_t i = 0; i < pcVPS->getMaxLayers(); i++)
+  {
+    WRITE_CODE(pcVPS->getLayerId(i), 6, "vps_layer_id");
+    if (i > 0 && !pcVPS->getAllIndependentLayersFlag())
     {
-      // Only applicable for version 1
-      // pcVPS->setLayerIdIncludedFlag( true, opsIdx, i );
-      WRITE_FLAG( pcVPS->getLayerIdIncludedFlag( opsIdx, i ) ? 1 : 0, "layer_id_included_flag[opsIdx][i]" );
+      WRITE_FLAG(pcVPS->getIndependentLayerFlag(i), "vps_independent_layer_flag");
+      if (!pcVPS->getIndependentLayerFlag(i))
+      {
+        for (int j = 0; j < i; j++)
+        {
+          WRITE_FLAG(pcVPS->getDirectRefLayerFlag(i, j), "vps_direct_dependency_flag");
+        }
+      }
     }
   }
-  const TimingInfo *timingInfo = pcVPS->getTimingInfo();
-  WRITE_FLAG(timingInfo->getTimingInfoPresentFlag(),          "vps_timing_info_present_flag");
-  if(timingInfo->getTimingInfoPresentFlag())
+  if( pcVPS->getMaxLayers() > 1 )
   {
-    WRITE_CODE(timingInfo->getNumUnitsInTick(), 32,           "vps_num_units_in_tick");
-    WRITE_CODE(timingInfo->getTimeScale(),      32,           "vps_time_scale");
-    WRITE_FLAG(timingInfo->getPocProportionalToTimingFlag(),  "vps_poc_proportional_to_timing_flag");
-    if(timingInfo->getPocProportionalToTimingFlag())
+    if (pcVPS->getAllIndependentLayersFlag()) 
     {
-      WRITE_UVLC(timingInfo->getNumTicksPocDiffOneMinus1(),   "vps_num_ticks_poc_diff_one_minus1");
+      WRITE_FLAG(pcVPS->getEachLayerIsAnOlsFlag(), "vps_each_layer_is_an_ols_flag");
     }
-    WRITE_UVLC( pcVPS->getNumHrdParameters(),                 "vps_num_hrd_parameters" );
-
-    if( pcVPS->getNumHrdParameters() > 0 )
+    if (!pcVPS->getEachLayerIsAnOlsFlag()) 
     {
-      for( uint32_t i = 0; i < pcVPS->getNumHrdParameters(); i ++ )
+      if (!pcVPS->getAllIndependentLayersFlag()) {
+        WRITE_CODE(pcVPS->getOlsModeIdc(), 2, "vps_ols_mode_idc");
+      }
+      if (pcVPS->getOlsModeIdc() == 2)
       {
-        // Only applicable for version 1
-        WRITE_UVLC( pcVPS->getHrdOpSetIdx( i ),                "hrd_layer_set_idx" );
-        if( i > 0 )
+        WRITE_CODE(pcVPS->getNumOutputLayerSets() - 1, 8, "vps_num_output_layer_sets_minus1");
+        for (uint32_t i = 1; i < pcVPS->getNumOutputLayerSets(); i++)
         {
-          WRITE_FLAG( pcVPS->getCprmsPresentFlag( i ) ? 1 : 0, "cprms_present_flag[i]" );
+          for (uint32_t j = 0; j < pcVPS->getMaxLayers(); j++)
+          {
+            WRITE_FLAG(pcVPS->getOlsOutputLayerFlag(i, j), "vps_ols_output_layer_flag");
+          }
         }
-        codeHrdParameters(pcVPS->getHrdParameters(i), pcVPS->getCprmsPresentFlag( i ), pcVPS->getMaxTLayers() - 1);
       }
     }
   }
-  WRITE_FLAG( 0,                     "vps_extension_flag" );
+  WRITE_FLAG(0, "vps_extension_flag");
 
   //future extensions here..
   xWriteRbspTrailingBits();
 }
-#endif
 
-void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
+void HLSWriter::codePictureHeader( PicHeader* picHeader )
 {
+  const PPS*  pps = NULL;
+  const SPS*  sps = NULL;
+  
 #if ENABLE_TRACING
-  xTraceSliceHeader ();
+  xTracePictureHeader ();
 #endif
 
-  CodingStructure& cs = *pcSlice->getPic()->cs;
-  const ChromaFormat format                = pcSlice->getSPS()->getChromaFormatIdc();
-  const uint32_t         numberValidComponents = getNumberValidComponents(format);
-  const bool         chromaEnabled         = isChromaEnabled(format);
+  CodingStructure& cs = *picHeader->getPic()->cs;
 
-  //calculate number of bits required for slice address
-  int maxSliceSegmentAddress = cs.pcv->sizeInCtus;
-  int bitsSliceSegmentAddress = 0;
-  while(maxSliceSegmentAddress>(1<<bitsSliceSegmentAddress))
+  WRITE_FLAG(picHeader->getNonReferencePictureFlag(), "non_reference_picture_flag");
+  WRITE_FLAG(picHeader->getGdrPicFlag(), "gdr_pic_flag");
+  WRITE_FLAG(picHeader->getNoOutputOfPriorPicsFlag(), "no_output_of_prior_pics_flag");
+  if( picHeader->getGdrPicFlag() ) 
   {
-    bitsSliceSegmentAddress++;
+    WRITE_UVLC(picHeader->getRecoveryPocCnt(), "recovery_poc_cnt");
   }
-#if HEVC_DEPENDENT_SLICES
-  const int ctuTsAddress = pcSlice->getSliceSegmentCurStartCtuTsAddr();
-#else
-  const int ctuTsAddress = pcSlice->getSliceCurStartCtuTsAddr();
-#endif
-
-  //write slice address
-#if HEVC_TILES_WPP
-  const int sliceSegmentRsAddress = pcSlice->getPic()->tileMap->getCtuTsToRsAddrMap(ctuTsAddress);
-#else
-  const int sliceSegmentRsAddress = ctuTsAddress;
-#endif
-
-  WRITE_FLAG( sliceSegmentRsAddress==0, "first_slice_segment_in_pic_flag" );
-  if ( pcSlice->getRapPicFlag() )
+  else 
   {
-    WRITE_FLAG( pcSlice->getNoOutputPriorPicsFlag() ? 1 : 0, "no_output_of_prior_pics_flag" );
+    picHeader->setRecoveryPocCnt( 0 );
   }
-  WRITE_UVLC( pcSlice->getPPS()->getPPSId(), "slice_pic_parameter_set_id" );
-#if HEVC_DEPENDENT_SLICES
-  if ( pcSlice->getPPS()->getDependentSliceSegmentsEnabledFlag() && (sliceSegmentRsAddress!=0) )
+  
+  // parameter sets
+  WRITE_UVLC(picHeader->getPPSId(), "ph_pic_parameter_set_id");
+  pps = cs.slice->getPPS();
+  CHECK(pps==0, "Invalid PPS");  
+  sps = cs.slice->getSPS();
+  CHECK(sps==0, "Invalid SPS");
+  
+  // sub-picture IDs
+  if( sps->getSubPicIdPresentFlag() ) 
   {
-    WRITE_FLAG( pcSlice->getDependentSliceSegmentFlag() ? 1 : 0, "dependent_slice_segment_flag" );
+    if( sps->getSubPicIdSignallingPresentFlag() ) 
+    {
+      for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+      {
+        picHeader->setSubPicId( picIdx, sps->getSubPicId( picIdx ) );
+      }
+    }
+    else 
+    {
+      WRITE_FLAG(picHeader->getSubPicIdSignallingPresentFlag(), "ph_subpic_id_signalling_present_flag");
+      if( picHeader->getSubPicIdSignallingPresentFlag() )
+      {
+        WRITE_UVLC( picHeader->getSubPicIdLen() - 1, "ph_subpic_id_len_minus1" );
+        for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+        {
+          WRITE_CODE(picHeader->getSubPicId(picIdx), picHeader->getSubPicIdLen( ), "ph_subpic_id[i]" );
+        }
+      }
+      else 
+      {
+        for( int picIdx = 0; picIdx < pps->getNumSubPics( ); picIdx++ )
+        {
+          picHeader->setSubPicId( picIdx, pps->getSubPicId( picIdx ) );
+        }
+      }
+    }
   }
-#endif
-  if(sliceSegmentRsAddress>0)
+  else 
   {
-    WRITE_CODE( sliceSegmentRsAddress, bitsSliceSegmentAddress, "slice_segment_address" );
+    for( int picIdx = 0; picIdx < sps->getNumSubPics( ); picIdx++ )
+    {
+      picHeader->setSubPicId( picIdx, picIdx );
+    }
   }
-#if HEVC_DEPENDENT_SLICES
-  if( !pcSlice->getDependentSliceSegmentFlag() )
+
+  // virtual boundaries
+  if( !sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
   {
-#endif
-    for( int i = 0; i < pcSlice->getPPS()->getNumExtraSliceHeaderBits(); i++ )
+    WRITE_FLAG( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag(), "ph_loop_filter_across_virtual_boundaries_disabled_present_flag" );
+    if( picHeader->getLoopFilterAcrossVirtualBoundariesDisabledFlag() )
     {
-      WRITE_FLAG( 0, "slice_reserved_flag[]" );
+      WRITE_CODE(picHeader->getNumVerVirtualBoundaries(), 2, "ph_num_ver_virtual_boundaries");
+      for( unsigned i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++ )
+      {
+        WRITE_CODE(picHeader->getVirtualBoundariesPosX(i) >> 3, 13, "ph_virtual_boundaries_pos_x");
+      }
+      WRITE_CODE(picHeader->getNumHorVirtualBoundaries(), 2, "ph_num_hor_virtual_boundaries");
+      for( unsigned i = 0; i < picHeader->getNumHorVirtualBoundaries(); i++ )
+      {
+        WRITE_CODE(picHeader->getVirtualBoundariesPosY(i)>>3, 13, "ph_virtual_boundaries_pos_y");
+      }
     }
-
-    WRITE_UVLC( pcSlice->getSliceType(), "slice_type" );
-
-    if( pcSlice->getPPS()->getOutputFlagPresentFlag() )
+    else
     {
-      WRITE_FLAG( pcSlice->getPicOutputFlag() ? 1 : 0, "pic_output_flag" );
+      picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( 0 );
+      picHeader->setNumVerVirtualBoundaries( 0 );
+      picHeader->setNumHorVirtualBoundaries( 0 );
     }
-
-    int pocBits = pcSlice->getSPS()->getBitsForPOC();
-    int pocMask = (1 << pocBits) - 1;
-    WRITE_CODE(pcSlice->getPOC() & pocMask, pocBits, "slice_pic_order_cnt_lsb");
-    if( !pcSlice->getIdrPicFlag() )
+  }
+  else
+  {
+    picHeader->setLoopFilterAcrossVirtualBoundariesDisabledFlag( sps->getLoopFilterAcrossVirtualBoundariesDisabledFlag() );
+    picHeader->setNumVerVirtualBoundaries( sps->getNumVerVirtualBoundaries() );
+    picHeader->setNumHorVirtualBoundaries( sps->getNumHorVirtualBoundaries() );
+    for( unsigned i = 0; i < 3; i++ ) 
     {
-      const ReferencePictureSet* rps = pcSlice->getRPS();
+      picHeader->setVirtualBoundariesPosX( sps->getVirtualBoundariesPosX(i), i );
+      picHeader->setVirtualBoundariesPosY( sps->getVirtualBoundariesPosY(i), i );
+    }
+  }
+  
+  // 4:4:4 colour plane ID
+  if( sps->getSeparateColourPlaneFlag() )
+  {
+    WRITE_CODE( picHeader->getColourPlaneId(), 2, "colour_plane_id" );
+  }
+  else 
+  {
+    picHeader->setColourPlaneId( 0 );
+  }
+  
+  // picture output flag
+  if( pps->getOutputFlagPresentFlag() )
+  {
+    WRITE_FLAG( picHeader->getPicOutputFlag(), "pic_output_flag" );
+  }
+  else 
+  {
+    picHeader->setPicOutputFlag(true);
+  }
 
-      // check for bitstream restriction stating that:
-      // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
-      // Ideally this process should not be repeated for each slice in a picture
-      if( pcSlice->isIRAP() )
+  // reference picture lists
+  WRITE_FLAG( picHeader->getPicRplPresentFlag(), "pic_rpl_present_flag" ); 
+  if( picHeader->getPicRplPresentFlag() )
+  {
+    // List0 and List1
+    for(int listIdx = 0; listIdx < 2; listIdx++) 
+    {                 
+      // copy L1 index from L0 index
+      if (listIdx == 1 && !pps->getRpl1IdxPresentFlag())
       {
-        for( int picIdx = 0; picIdx < rps->getNumberOfPictures(); picIdx++ )
+        picHeader->setRPL1idx(picHeader->getRPL0idx());
+      }      
+      // RPL in picture header or SPS
+      else if (sps->getNumRPL(listIdx) > 0)
+      {
+        if (!pps->getPPSRefPicListSPSIdc(listIdx))
+        {
+          WRITE_FLAG(picHeader->getRPLIdx(listIdx) != -1 ? 1 : 0, "pic_rpl_sps_flag[i]");
+        }
+        else if (pps->getPPSRefPicListSPSIdc( listIdx ) == 1)
         {
-          CHECK( rps->getUsed( picIdx ), "Picture should not be used" );
+          picHeader->setRPLIdx( listIdx, -1);
         }
       }
-
-      if( pcSlice->getRPSidx() < 0 )
+      else 
       {
-        WRITE_FLAG( 0, "short_term_ref_pic_set_sps_flag" );
-        xCodeShortTermRefPicSet( rps, true, pcSlice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() );
+          picHeader->setRPLIdx( listIdx, -1 );
       }
-      else
+
+      // use list from SPS
+      if (picHeader->getRPLIdx(listIdx) != -1)
       {
-        WRITE_FLAG( 1, "short_term_ref_pic_set_sps_flag" );
-        int numBits = 0;
-        while( ( 1 << numBits ) < pcSlice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() )
+        if (listIdx == 1 && !pps->getRpl1IdxPresentFlag())
         {
-          numBits++;
         }
-        if( numBits > 0 )
+        else if (sps->getNumRPL( listIdx ) > 1)
         {
-          WRITE_CODE( pcSlice->getRPSidx(), numBits, "short_term_ref_pic_set_idx" );
+          int numBits = ceilLog2(sps->getNumRPL( listIdx ));
+          WRITE_CODE(picHeader->getRPLIdx(listIdx), numBits, "pic_rpl_idx[i]");
         }
+        else
+        {
+          picHeader->setRPLIdx( listIdx, 0 );
+        }
+        picHeader->setRPL( listIdx, sps->getRPLList( listIdx )->getReferencePictureList(picHeader->getRPLIdx(listIdx)));
+      }
+      // explicit RPL in picture header
+      else
+      {
+        xCodeRefPicList( picHeader->getRPL(listIdx), sps->getLongTermRefsPresent(), sps->getBitsForPOC(), !sps->getUseWP() && !sps->getUseWPBiPred() );
       }
-      if( pcSlice->getSPS()->getLongTermRefsPresent() )
+
+      // POC MSB cycle signalling for LTRP
+      if (picHeader->getRPL(listIdx)->getNumberOfLongtermPictures())
       {
-        int numLtrpInSH = rps->getNumberOfLongtermPictures();
-        int ltrpInSPS[MAX_NUM_REF_PICS];
-        int numLtrpInSPS = 0;
-        uint32_t ltrpIndex;
-        int counter = 0;
-        // WARNING: The following code only works only if a matching long-term RPS is
-        //          found in the SPS for ALL long-term pictures
-        //          The problem is that the SPS coded long-term pictures are moved to the
-        //          beginning of the list which causes a mismatch when no reference picture
-        //          list reordering is used
-        //          NB: Long-term coding is currently not supported in general by the HM encoder
-        for( int k = rps->getNumberOfPictures() - 1; k > rps->getNumberOfPictures() - rps->getNumberOfLongtermPictures() - 1; k-- )
+        for (int i = 0; i < picHeader->getRPL(listIdx)->getNumberOfLongtermPictures() + picHeader->getRPL(listIdx)->getNumberOfShorttermPictures(); i++)
         {
-          if( xFindMatchingLTRP( pcSlice, &ltrpIndex, rps->getPOC( k ), rps->getUsed( k ) ) )
-          {
-            ltrpInSPS[numLtrpInSPS] = ltrpIndex;
-            numLtrpInSPS++;
-          }
-          else
+          if (picHeader->getRPL(listIdx)->isRefPicLongterm(i))
           {
-            counter++;
+            if (picHeader->getRPL(listIdx)->getLtrpInSliceHeaderFlag())
+            { 
+              WRITE_CODE(picHeader->getRPL(listIdx)->getRefPicIdentifier(i), sps->getBitsForPOC(),
+                         "pic_poc_lsb_lt[listIdx][rplsIdx][j]");
+            }
+            WRITE_FLAG(picHeader->getLocalRPL(listIdx)->getDeltaPocMSBPresentFlag(i) ? 1 : 0, "pic_delta_poc_msb_present_flag[i][j]");
+            if (picHeader->getLocalRPL(listIdx)->getDeltaPocMSBPresentFlag(i))
+            {
+              WRITE_UVLC(picHeader->getLocalRPL(listIdx)->getDeltaPocMSBCycleLT(i), "pic_delta_poc_msb_cycle_lt[i][j]");
+            }
           }
         }
-        numLtrpInSH -= numLtrpInSPS;
-        // check that either all long-term pictures are coded in SPS or in slice header (no mixing)
-        CHECK( numLtrpInSH != 0 && numLtrpInSPS != 0, "Long term picture not coded" );
+      }
+    }
+  }
+
+  // partitioning constraint overrides
+  if (sps->getSplitConsOverrideEnabledFlag())
+  {
+    WRITE_FLAG(picHeader->getSplitConsOverrideFlag(), "partition_constraints_override_flag");
+    if (picHeader->getSplitConsOverrideFlag())
+    {
+      WRITE_UVLC(floorLog2(picHeader->getMinQTSize(I_SLICE)) - sps->getLog2MinCodingBlockSize(), "pic_log2_diff_min_qt_min_cb_intra_slice_luma");
+      WRITE_UVLC(floorLog2(picHeader->getMinQTSize(P_SLICE)) - sps->getLog2MinCodingBlockSize(), "pic_log2_diff_min_qt_min_cb_inter_slice");
+      WRITE_UVLC(picHeader->getMaxMTTHierarchyDepth(P_SLICE),  "pic_max_mtt_hierarchy_depth_inter_slice");
+      WRITE_UVLC(picHeader->getMaxMTTHierarchyDepth(I_SLICE), "pic_max_mtt_hierarchy_depth_intra_slice_luma");
+      if (picHeader->getMaxMTTHierarchyDepth(I_SLICE) != 0)
+      {
+        WRITE_UVLC(floorLog2(picHeader->getMaxBTSize(I_SLICE)) - floorLog2(picHeader->getMinQTSize(I_SLICE)), "pic_log2_diff_max_bt_min_qt_intra_slice_luma");
+        WRITE_UVLC(floorLog2(picHeader->getMaxTTSize(I_SLICE)) - floorLog2(picHeader->getMinQTSize(I_SLICE)), "pic_log2_diff_max_tt_min_qt_intra_slice_luma");
+      }
+      if (picHeader->getMaxMTTHierarchyDepth(P_SLICE) != 0)
+      {
+        WRITE_UVLC(floorLog2(picHeader->getMaxBTSize(P_SLICE)) - floorLog2(picHeader->getMinQTSize(P_SLICE)), "pic_log2_diff_max_bt_min_qt_inter_slice");
+        WRITE_UVLC(floorLog2(picHeader->getMaxTTSize(P_SLICE)) - floorLog2(picHeader->getMinQTSize(P_SLICE)), "pic_log2_diff_max_tt_min_qt_inter_slice");
+      }
+      if (sps->getUseDualITree())
+      {
+        WRITE_UVLC(floorLog2(picHeader->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - sps->getLog2MinCodingBlockSize(), "pic_log2_diff_min_qt_min_cb_intra_slice_chroma");
+        WRITE_UVLC(picHeader->getMaxMTTHierarchyDepth(I_SLICE, CHANNEL_TYPE_CHROMA), "pic_max_mtt_hierarchy_depth_intra_slice_chroma");
+        if (picHeader->getMaxMTTHierarchyDepth(I_SLICE, CHANNEL_TYPE_CHROMA) != 0)
+        {
+          WRITE_UVLC(floorLog2(picHeader->getMaxBTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - floorLog2(picHeader->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "pic_log2_diff_max_bt_min_qt_intra_slice_chroma");
+          WRITE_UVLC(floorLog2(picHeader->getMaxTTSize(I_SLICE, CHANNEL_TYPE_CHROMA)) - floorLog2(picHeader->getMinQTSize(I_SLICE, CHANNEL_TYPE_CHROMA)), "pic_log2_diff_max_tt_min_qt_intra_slice_chroma");
+        }
+      }
+    }
+  }
+  else 
+  {
+    picHeader->setSplitConsOverrideFlag(0);
+  }
+
+  // inherit constraint values from SPS
+  if (!sps->getSplitConsOverrideEnabledFlag() || !picHeader->getSplitConsOverrideFlag()) 
+  {
+      picHeader->setMinQTSizes(sps->getMinQTSizes());
+      picHeader->setMaxMTTHierarchyDepths(sps->getMaxMTTHierarchyDepths());
+      picHeader->setMaxBTSizes(sps->getMaxBTSizes());
+      picHeader->setMaxTTSizes(sps->getMaxTTSizes());
+  }
+
+  // delta quantization and chrom and chroma offset
+  if (pps->getUseDQP())
+  {
+    WRITE_UVLC( picHeader->getCuQpDeltaSubdivIntra(), "pic_cu_qp_delta_subdiv_intra_slice" );
+    WRITE_UVLC( picHeader->getCuQpDeltaSubdivInter(), "pic_cu_qp_delta_subdiv_inter_slice" );
+  }
+  else 
+  {
+    picHeader->setCuQpDeltaSubdivIntra( 0 );
+    picHeader->setCuQpDeltaSubdivInter( 0 );
+  }
+  if (pps->getCuChromaQpOffsetEnabledFlag())
+  {
+    WRITE_UVLC( picHeader->getCuChromaQpOffsetSubdivIntra(), "pic_cu_chroma_qp_offset_subdiv_intra_slice" );
+    WRITE_UVLC( picHeader->getCuChromaQpOffsetSubdivInter(), "pic_cu_chroma_qp_offset_subdiv_inter_slice" );
+  }
+  else 
+  {
+    picHeader->setCuChromaQpOffsetSubdivIntra( 0 );
+    picHeader->setCuChromaQpOffsetSubdivInter( 0 );
+  }
+  
+  // temporal motion vector prediction
+  if (sps->getSPSTemporalMVPEnabledFlag())
+  {
+    WRITE_FLAG( picHeader->getEnableTMVPFlag(), "pic_temporal_mvp_enabled_flag" );
+  }
+  else
+  {
+    picHeader->setEnableTMVPFlag(false);
+  }
+
+  // mvd L1 zero flag
+  if (!pps->getPPSMvdL1ZeroIdc())
+  {
+    WRITE_FLAG(picHeader->getMvdL1ZeroFlag(), "pic_mvd_l1_zero_flag");
+  }
+  else
+  {
+    picHeader->setMvdL1ZeroFlag( pps->getPPSMvdL1ZeroIdc() - 1 );
+  }
+   
+  // merge candidate list size
+  if (!pps->getPPSSixMinusMaxNumMergeCandPlus1())
+  {
+    CHECK(picHeader->getMaxNumMergeCand() > MRG_MAX_NUM_CANDS, "More merge candidates signalled than supported");
+    WRITE_UVLC(MRG_MAX_NUM_CANDS - picHeader->getMaxNumMergeCand(), "pic_six_minus_max_num_merge_cand");
+  }
+  else
+  {
+    picHeader->setMaxNumMergeCand(MRG_MAX_NUM_CANDS - (pps->getPPSSixMinusMaxNumMergeCandPlus1() - 1));
+  }
+
+  // subblock merge candidate list size
+  if ( sps->getUseAffine() )
+  {
+    CHECK( picHeader->getMaxNumAffineMergeCand() > AFFINE_MRG_MAX_NUM_CANDS, "More affine merge candidates signalled than supported" );
+    WRITE_UVLC(AFFINE_MRG_MAX_NUM_CANDS - picHeader->getMaxNumAffineMergeCand(), "pic_five_minus_max_num_subblock_merge_cand");
+  }
+  else
+  {
+    picHeader->setMaxNumAffineMergeCand( sps->getSBTMVPEnabledFlag() && picHeader->getEnableTMVPFlag() );
+  }
 
-        int bitsForLtrpInSPS = 0;
-        while( pcSlice->getSPS()->getNumLongTermRefPicSPS() > ( 1 << bitsForLtrpInSPS ) )
+  // full-pel MMVD flag
+  if (sps->getFpelMmvdEnabledFlag())
+  {
+    WRITE_FLAG( picHeader->getDisFracMMVD(), "pic_fpel_mmvd_enabled_flag" );
+  }
+  else
+  {
+    picHeader->setDisFracMMVD(false);
+  }
+  
+  // picture level BDOF disable flags
+  if (sps->getBdofControlPresentFlag())
+  {
+    WRITE_FLAG(picHeader->getDisBdofFlag(), "pic_disable_bdof_flag");
+  }
+  else
+  {
+    picHeader->setDisBdofFlag(0);
+  }
+
+  // picture level DMVR disable flags
+  if (sps->getDmvrControlPresentFlag())
+  {
+    WRITE_FLAG(picHeader->getDisDmvrFlag(), "pic_disable_dmvr_flag");
+  }
+  else
+  {
+    picHeader->setDisDmvrFlag(0);
+  }
+
+  // picture level PROF disable flags
+  if (sps->getProfControlPresentFlag())
+  {
+    WRITE_FLAG(picHeader->getDisProfFlag(), "pic_disable_prof_flag");
+  }
+  else
+  {
+    picHeader->setDisProfFlag(0);
+  }
+
+  // triangle merge candidate list size
+  if (sps->getUseTriangle() && picHeader->getMaxNumMergeCand() >= 2)
+  {
+    if (!pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1())
+    {
+      CHECK(picHeader->getMaxNumMergeCand() < picHeader->getMaxNumTriangleCand(), "Incorrrect max number of triangle candidates!");
+      WRITE_UVLC(picHeader->getMaxNumMergeCand() - picHeader->getMaxNumTriangleCand(), "pic_max_num_merge_cand_minus_max_num_triangle_cand");
+    }
+    else
+    {
+      picHeader->setMaxNumTriangleCand((uint32_t)(picHeader->getMaxNumMergeCand() - (pps->getPPSMaxNumMergeCandMinusMaxNumTriangleCandPlus1() - 1)));
+    }    
+  }
+
+  // ibc merge candidate list size
+  if (sps->getIBCFlag())
+  {
+    CHECK( picHeader->getMaxNumIBCMergeCand() > IBC_MRG_MAX_NUM_CANDS, "More IBC merge candidates signalled than supported" );
+    WRITE_UVLC(IBC_MRG_MAX_NUM_CANDS - picHeader->getMaxNumIBCMergeCand(), "pic_six_minus_max_num_ibc_merge_cand");
+  }
+
+  // joint Cb/Cr sign flag
+  if (sps->getJointCbCrEnabledFlag())
+  {
+    WRITE_FLAG( picHeader->getJointCbCrSignFlag(), "pic_joint_cbcr_sign_flag" );
+  }
+  else
+  {
+    picHeader->setJointCbCrSignFlag(false);
+  }
+
+  // sao enable flags
+  if(sps->getSAOEnabledFlag())
+  {
+    WRITE_FLAG(picHeader->getSaoEnabledPresentFlag(), "pic_sao_enabled_present_flag");
+    if (picHeader->getSaoEnabledPresentFlag())
+    {    
+      WRITE_FLAG(picHeader->getSaoEnabledFlag(CHANNEL_TYPE_LUMA), "slice_sao_luma_flag");  
+      if (sps->getChromaFormatIdc() != CHROMA_400)
+      {
+        WRITE_FLAG(picHeader->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA), "slice_sao_chroma_flag");
+      }
+    }
+    else 
+    {
+      picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA,   true);
+      picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, true);
+    }
+  }
+  else 
+  {
+    picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA,   false);
+    picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false);
+  }
+  
+  // alf enable flags and aps IDs
+  if( sps->getALFEnabledFlag() )
+  {
+    WRITE_FLAG(picHeader->getAlfEnabledPresentFlag(), "pic_alf_enabled_present_flag");
+    if (picHeader->getAlfEnabledPresentFlag()) 
+    {
+      WRITE_FLAG(picHeader->getAlfEnabledFlag(COMPONENT_Y), "pic_alf_enabled_flag");
+      if (picHeader->getAlfEnabledFlag(COMPONENT_Y))
+      {
+        WRITE_CODE(picHeader->getNumAlfAps(), 3, "pic_num_alf_aps_ids_luma");
+        const std::vector<int>&   apsId = picHeader->getAlfAPSs();
+        for (int i = 0; i < picHeader->getNumAlfAps(); i++)
         {
-          bitsForLtrpInSPS++;
+          WRITE_CODE(apsId[i], 3, "pic_alf_aps_id_luma");
         }
-        if( pcSlice->getSPS()->getNumLongTermRefPicSPS() > 0 )
+        
+        const int alfChromaIdc = picHeader->getAlfEnabledFlag(COMPONENT_Cb) + picHeader->getAlfEnabledFlag(COMPONENT_Cr) * 2 ;
+        if (sps->getChromaFormatIdc() != CHROMA_400)
         {
-          WRITE_UVLC( numLtrpInSPS, "num_long_term_sps" );
+          WRITE_CODE(alfChromaIdc, 2, "pic_alf_chroma_idc");
         }
-        WRITE_UVLC( numLtrpInSH, "num_long_term_pics" );
-        // Note that the LSBs of the LT ref. pic. POCs must be sorted before.
-        // Not sorted here because LT ref indices will be used in setRefPicList()
-        int prevDeltaMSB = 0, prevLSB = 0;
-        int offset = rps->getNumberOfNegativePictures() + rps->getNumberOfPositivePictures();
-        counter = 0;
-        // Warning: If some pictures are moved to ltrpInSPS, i is referring to a wrong index
-        //          (mapping would be required)
-        for( int i = rps->getNumberOfPictures() - 1; i > offset - 1; i--, counter++ )
+        if (alfChromaIdc)
         {
-          if( counter < numLtrpInSPS )
-          {
-            if( bitsForLtrpInSPS > 0 )
-            {
-              WRITE_CODE( ltrpInSPS[counter], bitsForLtrpInSPS, "lt_idx_sps[i]" );
-            }
-          }
-          else
-          {
-            WRITE_CODE( rps->getPocLSBLT( i ), pcSlice->getSPS()->getBitsForPOC(), "poc_lsb_lt" );
-            WRITE_FLAG( rps->getUsed( i ), "used_by_curr_pic_lt_flag" );
-          }
-          WRITE_FLAG( rps->getDeltaPocMSBPresentFlag( i ), "delta_poc_msb_present_flag" );
-
-          if( rps->getDeltaPocMSBPresentFlag( i ) )
-          {
-            bool deltaFlag = false;
-            //  First LTRP from SPS                 ||  First LTRP from SH                              || curr LSB            != prev LSB
-            if( ( i == rps->getNumberOfPictures() - 1 ) || ( i == rps->getNumberOfPictures() - 1 - numLtrpInSPS ) || ( rps->getPocLSBLT( i ) != prevLSB ) )
-            {
-              deltaFlag = true;
-            }
-            if( deltaFlag )
-            {
-              WRITE_UVLC( rps->getDeltaPocMSBCycleLT( i ), "delta_poc_msb_cycle_lt[i]" );
-            }
-            else
-            {
-              int differenceInDeltaMSB = rps->getDeltaPocMSBCycleLT( i ) - prevDeltaMSB;
-              CHECK( differenceInDeltaMSB < 0, "Negative diff. delta MSB" );
-              WRITE_UVLC( differenceInDeltaMSB, "delta_poc_msb_cycle_lt[i]" );
-            }
-            prevLSB = rps->getPocLSBLT( i );
-            prevDeltaMSB = rps->getDeltaPocMSBCycleLT( i );
-          }
+          WRITE_CODE(picHeader->getAlfApsIdChroma(), 3, "pic_alf_aps_id_chroma");
         }
       }
-      if( pcSlice->getSPS()->getSPSTemporalMVPEnabledFlag() )
+    }
+    else 
+    {
+      picHeader->setAlfEnabledFlag(COMPONENT_Y,  true);
+      picHeader->setAlfEnabledFlag(COMPONENT_Cb, true);
+      picHeader->setAlfEnabledFlag(COMPONENT_Cr, true);
+    }
+  }
+  else 
+  {
+    picHeader->setAlfEnabledFlag(COMPONENT_Y,  false);
+    picHeader->setAlfEnabledFlag(COMPONENT_Cb, false);
+    picHeader->setAlfEnabledFlag(COMPONENT_Cr, false);
+  }
+
+  // dependent quantization
+  if (!pps->getPPSDepQuantEnabledIdc())
+  {
+    WRITE_FLAG(picHeader->getDepQuantEnabledFlag(), "pic_dep_quant_enabled_flag");
+  }
+  else
+  {
+    picHeader->setDepQuantEnabledFlag( pps->getPPSDepQuantEnabledIdc() - 1 );
+  }
+
+  // sign data hiding
+  if( !picHeader->getDepQuantEnabledFlag() )
+  {
+    WRITE_FLAG( picHeader->getSignDataHidingEnabledFlag(), "pic_sign_data_hiding_enabled_flag" );
+  }
+  else
+  {
+    picHeader->setSignDataHidingEnabledFlag(false);
+  }
+
+  // deblocking filter controls
+  if (pps->getDeblockingFilterControlPresentFlag())
+  {
+    if(pps->getDeblockingFilterOverrideEnabledFlag())
+    {    
+      WRITE_FLAG ( picHeader->getDeblockingFilterOverridePresentFlag(), "pic_deblocking_filter_override_present_flag" );    
+      if( picHeader->getDeblockingFilterOverridePresentFlag() ) 
       {
-        WRITE_FLAG( pcSlice->getEnableTMVPFlag() ? 1 : 0, "slice_temporal_mvp_enabled_flag" );
+        WRITE_FLAG ( picHeader->getDeblockingFilterOverrideFlag(), "pic_deblocking_filter_override_flag" );
       }
+      else
+      {    
+        picHeader->setDeblockingFilterOverrideFlag(false);
+      }
+    }
+    else
+    {
+      picHeader->setDeblockingFilterOverridePresentFlag(false);
+      picHeader->setDeblockingFilterOverrideFlag(false);
     }
-    if( pcSlice->getSPS()->getSAOEnabledFlag() )
+
+    if(picHeader->getDeblockingFilterOverrideFlag())
     {
-      WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_LUMA ), "slice_sao_luma_flag" );
-      if( chromaEnabled )
+      WRITE_FLAG ( picHeader->getDeblockingFilterDisable(), "pic_deblocking_filter_disabled_flag" );
+      if(!picHeader->getDeblockingFilterDisable())
       {
-        WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_CHROMA ), "slice_sao_chroma_flag" );
+        WRITE_SVLC( picHeader->getDeblockingFilterBetaOffsetDiv2(), "pic_beta_offset_div2" );
+        WRITE_SVLC( picHeader->getDeblockingFilterTcOffsetDiv2(), "pic_tc_offset_div2" );
       }
     }
+    else
+    {
+      picHeader->setDeblockingFilterDisable       ( pps->getPPSDeblockingFilterDisabledFlag() );
+      picHeader->setDeblockingFilterBetaOffsetDiv2( pps->getDeblockingFilterBetaOffsetDiv2() );
+      picHeader->setDeblockingFilterTcOffsetDiv2  ( pps->getDeblockingFilterTcOffsetDiv2() );
+    }
+  }
+  else
+  {
+    picHeader->setDeblockingFilterDisable       ( false );
+    picHeader->setDeblockingFilterBetaOffsetDiv2( 0 );
+    picHeader->setDeblockingFilterTcOffsetDiv2  ( 0 );
+  }
 
-    if( pcSlice->getSPS()->getALFEnabledFlag() )
+  // luma mapping / chroma scaling controls
+  if (sps->getUseLmcs())
+  {
+    WRITE_FLAG(picHeader->getLmcsEnabledFlag(), "pic_lmcs_enabled_flag");
+    if (picHeader->getLmcsEnabledFlag())
     {
-      const int alfEnabled = pcSlice->getAPS()->getAlfAPSParam().enabledFlag[COMPONENT_Y] ? 1 : 0;
-      WRITE_FLAG( alfEnabled, "tile_group_alf_enabled_flag");
-      if (alfEnabled)
+      WRITE_CODE(picHeader->getLmcsAPSId(), 2, "pic_lmcs_aps_id");
+      if (sps->getChromaFormatIdc() != CHROMA_400)
       {
-        WRITE_CODE(pcSlice->getAPSId(), 5, "tile_group_aps_id");
+        WRITE_FLAG(picHeader->getLmcsChromaResidualScaleFlag(), "pic_chroma_residual_scale_flag");
       }
+      else
+      {
+        picHeader->setLmcsChromaResidualScaleFlag(false);
+      }
+    }
+  }
+  else
+  {
+    picHeader->setLmcsEnabledFlag(false);
+    picHeader->setLmcsChromaResidualScaleFlag(false);
+  }
+
+  // quantization scaling lists
+  if( sps->getScalingListFlag() )
+  {
+    WRITE_FLAG( picHeader->getScalingListPresentFlag(), "pic_scaling_list_present_flag" );
+    if( picHeader->getScalingListPresentFlag() )
+    {
+      WRITE_CODE( picHeader->getScalingListAPSId(), 3, "pic_scaling_list_aps_id" );
+    }
+  }
+  else 
+  {
+    picHeader->setScalingListPresentFlag( false );
+  }
+
+  // picture header extension
+  if(pps->getPictureHeaderExtensionPresentFlag())
+  {
+    WRITE_UVLC(0,"pic_segment_header_extension_length");
+  }
+  
+  xWriteRbspTrailingBits();
+}
+
+void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
+{
+#if ENABLE_TRACING
+  xTraceSliceHeader ();
+#endif
+
+  CodingStructure& cs = *pcSlice->getPic()->cs;
+  const PicHeader *picHeader = cs.picHeader;
+  const ChromaFormat format                = pcSlice->getSPS()->getChromaFormatIdc();
+  const uint32_t         numberValidComponents = getNumberValidComponents(format);
+  const bool         chromaEnabled         = isChromaEnabled(format);
+
+  int pocBits = pcSlice->getSPS()->getBitsForPOC();
+  int pocMask = (1 << pocBits) - 1;
+  WRITE_CODE(pcSlice->getPOC() & pocMask, pocBits, "slice_pic_order_cnt_lsb");
+  
+
+  if (pcSlice->getSPS()->getSubPicPresentFlag())
+  {
+    uint32_t bitsSubPicId;
+    if (pcSlice->getSPS()->getSubPicIdSignallingPresentFlag())
+    {
+      bitsSubPicId = pcSlice->getSPS()->getSubPicIdLen();
     }
+    else if (picHeader->getSubPicIdSignallingPresentFlag())
+    {
+      bitsSubPicId = picHeader->getSubPicIdLen();
+    }
+    else if (pcSlice->getPPS()->getSubPicIdSignallingPresentFlag())
+    {
+      bitsSubPicId = pcSlice->getPPS()->getSubPicIdLen();
+    }
+    else
+    {
+      bitsSubPicId = ceilLog2(pcSlice->getSPS()->getNumSubPics());
+    }
+    WRITE_CODE(pcSlice->getSliceSubPicId(), bitsSubPicId, "slice_subpic_id");
+  }
+
+  // raster scan slices
+  if( pcSlice->getPPS()->getRectSliceFlag() == 0 ) 
+  {
+    // slice address is the raster scan tile index of first tile in slice
+    if( pcSlice->getPPS()->getNumTiles() > 1 ) 
+    {      
+      int bitsSliceAddress = ceilLog2(pcSlice->getPPS()->getNumTiles());
+      WRITE_CODE( pcSlice->getSliceID(), bitsSliceAddress, "slice_address");
+      WRITE_UVLC( pcSlice->getNumTilesInSlice() - 1, "num_tiles_in_slice_minus1");
+    }
+  }
+  // rectangular slices
+  else 
+  {
+    // slice address is the index of the slice within the current sub-picture
+    if( pcSlice->getPPS()->getNumSlicesInPic() > 1 ) 
+    {
+      int bitsSliceAddress = ceilLog2(pcSlice->getPPS()->getNumSlicesInPic());  // change to NumSlicesInSubPic when available
+      WRITE_CODE( pcSlice->getSliceID(), bitsSliceAddress, "slice_address");
+    }
+  }
+
+
+    WRITE_UVLC( pcSlice->getSliceType(), "slice_type" );
+
 
-    //check if numrefidxes match the defaults. If not, override
 
-    if( !pcSlice->isIntra() )
+    if( !picHeader->getPicRplPresentFlag() && (!pcSlice->getIdrPicFlag() || pcSlice->getSPS()->getIDRRefParamListPresent()) )
     {
-      bool overrideFlag = ( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) != pcSlice->getPPS()->getNumRefIdxL0DefaultActive() || ( pcSlice->isInterB() && pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) != pcSlice->getPPS()->getNumRefIdxL1DefaultActive() ) );
-      WRITE_FLAG( overrideFlag ? 1 : 0, "num_ref_idx_active_override_flag" );
-      if( overrideFlag )
+      //Write L0 related syntax elements
+      if (pcSlice->getSPS()->getNumRPL0() > 0)
       {
-        WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) - 1, "num_ref_idx_l0_active_minus1" );
-        if( pcSlice->isInterB() )
+        if (!pcSlice->getPPS()->getPPSRefPicListSPSIdc0())
         {
-          WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) - 1, "num_ref_idx_l1_active_minus1" );
+          WRITE_FLAG(pcSlice->getRPL0idx() != -1 ? 1 : 0, "ref_pic_list_sps_flag[0]");
         }
-        else
+      }
+      if (pcSlice->getRPL0idx() != -1)
+      {
+        if (pcSlice->getSPS()->getNumRPL0() > 1)
         {
-          pcSlice->setNumRefIdx( REF_PIC_LIST_1, 0 );
+          int numBits = 0;
+          while ((1 << numBits) < pcSlice->getSPS()->getNumRPL0())
+          {
+            numBits++;
+          }
+          WRITE_CODE(pcSlice->getRPL0idx(), numBits, "ref_pic_list_idx[0]");
         }
       }
-    }
-    else
-    {
-      pcSlice->setNumRefIdx( REF_PIC_LIST_0, 0 );
-      pcSlice->setNumRefIdx( REF_PIC_LIST_1, 0 );
-    }
-
-    if( pcSlice->getPPS()->getListsModificationPresentFlag() && pcSlice->getNumRpsCurrTempList() > 1 )
-    {
-      RefPicListModification* refPicListModification = pcSlice->getRefPicListModification();
-      if( !pcSlice->isIntra() )
+      else
+      {  //write local RPL0
+        xCodeRefPicList( pcSlice->getRPL0(), pcSlice->getSPS()->getLongTermRefsPresent(), pcSlice->getSPS()->getBitsForPOC(), !pcSlice->getSPS()->getUseWP() && !pcSlice->getSPS()->getUseWPBiPred() );
+      }
+      //Deal POC Msb cycle signalling for LTRP
+      if (pcSlice->getRPL0()->getNumberOfLongtermPictures())
       {
-        WRITE_FLAG( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL0() ? 1 : 0, "ref_pic_list_modification_flag_l0" );
-        if( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL0() )
+        for (int i = 0; i < pcSlice->getRPL0()->getNumberOfLongtermPictures() + pcSlice->getRPL0()->getNumberOfShorttermPictures(); i++)
         {
-          int numRpsCurrTempList0 = pcSlice->getNumRpsCurrTempList();
-          if( numRpsCurrTempList0 > 1 )
+          if (pcSlice->getRPL0()->isRefPicLongterm(i))
           {
-            int length = 1;
-            numRpsCurrTempList0--;
-            while( numRpsCurrTempList0 >>= 1 )
+            if (pcSlice->getRPL0()->getLtrpInSliceHeaderFlag())
+            {
+              WRITE_CODE(pcSlice->getRPL0()->getRefPicIdentifier(i), pcSlice->getSPS()->getBitsForPOC(),
+                         "slice_poc_lsb_lt[listIdx][rplsIdx][j]");
+            }
+            WRITE_FLAG(pcSlice->getLocalRPL0()->getDeltaPocMSBPresentFlag(i) ? 1 : 0, "delta_poc_msb_present_flag[i][j]");
+            if (pcSlice->getLocalRPL0()->getDeltaPocMSBPresentFlag(i))
             {
-              length++;
+              WRITE_UVLC(pcSlice->getLocalRPL0()->getDeltaPocMSBCycleLT(i), "delta_poc_msb_cycle_lt[i][j]");
             }
-            for( int i = 0; i < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); i++ )
+          }
+        }
+      }
+
+      //Write L1 related syntax elements
+      if (!pcSlice->getPPS()->getRpl1IdxPresentFlag())
+      {
+        CHECK(pcSlice->getRPL1idx() != pcSlice->getRPL0idx(), "RPL1Idx is not signalled but it is not the same as RPL0Idx");
+        if (pcSlice->getRPL1idx() == -1)
+        {  //write local RPL1
+          xCodeRefPicList( pcSlice->getRPL1(), pcSlice->getSPS()->getLongTermRefsPresent(), pcSlice->getSPS()->getBitsForPOC(), !pcSlice->getSPS()->getUseWP() && !pcSlice->getSPS()->getUseWPBiPred() );
+        }
+      }
+      else
+      {
+        if (pcSlice->getSPS()->getNumRPL1() > 0)
+        {
+        if (!pcSlice->getPPS()->getPPSRefPicListSPSIdc1())
+        {
+          WRITE_FLAG(pcSlice->getRPL1idx() != -1 ? 1 : 0, "ref_pic_list_sps_flag[1]");
+        }
+        }
+        if (pcSlice->getRPL1idx() != -1)
+        {
+          if (pcSlice->getSPS()->getNumRPL1() > 1)
+          {
+            int numBits = 0;
+            while ((1 << numBits) < pcSlice->getSPS()->getNumRPL1())
             {
-              WRITE_CODE( refPicListModification->getRefPicSetIdxL0( i ), length, "list_entry_l0" );
+              numBits++;
             }
+            WRITE_CODE(pcSlice->getRPL1idx(), numBits, "ref_pic_list_idx[1]");
           }
         }
+        else
+        {  //write local RPL1
+          xCodeRefPicList( pcSlice->getRPL1(), pcSlice->getSPS()->getLongTermRefsPresent(), pcSlice->getSPS()->getBitsForPOC(), !pcSlice->getSPS()->getUseWP() && !pcSlice->getSPS()->getUseWPBiPred() );
+        }
       }
-      if( pcSlice->isInterB() )
+      //Deal POC Msb cycle signalling for LTRP
+      if (pcSlice->getRPL1()->getNumberOfLongtermPictures())
       {
-        WRITE_FLAG( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL1() ? 1 : 0, "ref_pic_list_modification_flag_l1" );
-        if( pcSlice->getRefPicListModification()->getRefPicListModificationFlagL1() )
+        for (int i = 0; i < pcSlice->getRPL1()->getNumberOfLongtermPictures() + pcSlice->getRPL1()->getNumberOfShorttermPictures(); i++)
         {
-          int numRpsCurrTempList1 = pcSlice->getNumRpsCurrTempList();
-          if( numRpsCurrTempList1 > 1 )
+          if (pcSlice->getRPL1()->isRefPicLongterm(i))
           {
-            int length = 1;
-            numRpsCurrTempList1--;
-            while( numRpsCurrTempList1 >>= 1 )
+            if (pcSlice->getRPL1()->getLtrpInSliceHeaderFlag())
             {
-              length++;
+              WRITE_CODE(pcSlice->getRPL1()->getRefPicIdentifier(i), pcSlice->getSPS()->getBitsForPOC(),
+                         "slice_poc_lsb_lt[listIdx][rplsIdx][j]");
             }
-            for( int i = 0; i < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); i++ )
+            WRITE_FLAG(pcSlice->getLocalRPL1()->getDeltaPocMSBPresentFlag(i) ? 1 : 0, "delta_poc_msb_present_flag[i][j]");
+            if (pcSlice->getLocalRPL1()->getDeltaPocMSBPresentFlag(i))
             {
-              WRITE_CODE( refPicListModification->getRefPicSetIdxL1( i ), length, "list_entry_l1" );
+              WRITE_UVLC(pcSlice->getLocalRPL1()->getDeltaPocMSBCycleLT(i), "delta_poc_msb_cycle_lt[i][j]");
             }
           }
         }
       }
     }
 
-    if( pcSlice->isInterB() )
+    if( picHeader->getPicRplPresentFlag() || !pcSlice->getIdrPicFlag() || pcSlice->getSPS()->getIDRRefParamListPresent() )
     {
-      WRITE_FLAG( pcSlice->getMvdL1ZeroFlag() ? 1 : 0, "mvd_l1_zero_flag" );
+      //check if numrefidxes match the defaults. If not, override
+
+      if ((!pcSlice->isIntra() && pcSlice->getRPL0()->getNumRefEntries() > 1) ||
+          (pcSlice->isInterB() && pcSlice->getRPL1()->getNumRefEntries() > 1) )
+      {
+        int defaultL0 = std::min<int>(pcSlice->getRPL0()->getNumRefEntries(), pcSlice->getPPS()->getNumRefIdxL0DefaultActive());
+        int defaultL1 = pcSlice->isInterB() ? std::min<int>(pcSlice->getRPL1()->getNumRefEntries(), pcSlice->getPPS()->getNumRefIdxL1DefaultActive()) : 0;
+        bool overrideFlag = ( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) != defaultL0 || ( pcSlice->isInterB() && pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) != defaultL1 ) );
+        WRITE_FLAG( overrideFlag ? 1 : 0, "num_ref_idx_active_override_flag" );
+        if( overrideFlag )
+        {
+          if(pcSlice->getRPL0()->getNumRefEntries() > 1)
+          {
+            WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_0 ) - 1, "num_ref_idx_l0_active_minus1" );
+          }
+          else
+          {
+            pcSlice->setNumRefIdx( REF_PIC_LIST_0, 1);
+          }
+
+          if( pcSlice->isInterB() && pcSlice->getRPL1()->getNumRefEntries() > 1)
+          {
+            WRITE_UVLC( pcSlice->getNumRefIdx( REF_PIC_LIST_1 ) - 1, "num_ref_idx_l1_active_minus1" );
+          }
+          else
+          {
+            pcSlice->setNumRefIdx( REF_PIC_LIST_1, pcSlice->isInterB() ? 1 : 0);
+          }
+        }
+        else
+        {
+          pcSlice->setNumRefIdx( REF_PIC_LIST_0, defaultL0 );
+          pcSlice->setNumRefIdx( REF_PIC_LIST_1, defaultL1 );
+        }
+      }
+      else
+      {
+        pcSlice->setNumRefIdx( REF_PIC_LIST_0, pcSlice->isIntra() ? 0 : 1 );
+        pcSlice->setNumRefIdx( REF_PIC_LIST_1, pcSlice->isInterB() ? 1 : 0 );
+      }
     }
 
+
     if( !pcSlice->isIntra() )
     {
       if( !pcSlice->isIntra() && pcSlice->getPPS()->getCabacInitPresentFlag() )
@@ -1280,11 +1944,14 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
       }
     }
 
-    if( pcSlice->getEnableTMVPFlag() )
+    if( pcSlice->getPicHeader()->getEnableTMVPFlag() )
     {
       if( pcSlice->getSliceType() == B_SLICE )
       {
-        WRITE_FLAG( pcSlice->getColFromL0Flag(), "collocated_from_l0_flag" );
+        if (!pcSlice->getPPS()->getPPSCollocatedFromL0Idc())
+        {
+          WRITE_FLAG( pcSlice->getColFromL0Flag(), "collocated_from_l0_flag" );
+        }
       }
 
       if( pcSlice->getSliceType() != I_SLICE &&
@@ -1294,81 +1961,13 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
         WRITE_UVLC( pcSlice->getColRefIdx(), "collocated_ref_idx" );
       }
     }
+
     if( ( pcSlice->getPPS()->getUseWP() && pcSlice->getSliceType() == P_SLICE ) || ( pcSlice->getPPS()->getWPBiPred() && pcSlice->getSliceType() == B_SLICE ) )
     {
       xCodePredWeightTable( pcSlice );
     }
-    WRITE_FLAG( pcSlice->getDepQuantEnabledFlag() ? 1 : 0, "dep_quant_enabled_flag" );
-#if HEVC_USE_SIGN_HIDING
-    if( !pcSlice->getDepQuantEnabledFlag() )
-    {
-      WRITE_FLAG( pcSlice->getSignDataHidingEnabledFlag() ? 1 : 0, "sign_data_hiding_enabled_flag" );
-    }
-    else
-    {
-      CHECK( pcSlice->getSignDataHidingEnabledFlag(), "sign data hiding not supported when dependent quantization is enabled" );
-    }
-#endif
-    if (
-      pcSlice->getSPS()->getSplitConsOverrideEnabledFlag()
-      )
-    {
-      WRITE_FLAG(pcSlice->getSplitConsOverrideFlag() ? 1 : 0, "partition_constrainst_override_flag");
-      if (pcSlice->getSplitConsOverrideFlag())
-      {
-        WRITE_UVLC(g_aucLog2[pcSlice->getMinQTSize()] - pcSlice->getSPS()->getLog2MinCodingBlockSize(), "log2_diff_min_qt_min_cb");
-        WRITE_UVLC(pcSlice->getMaxBTDepth(), "max_bt_depth");
-        if (pcSlice->getMaxBTDepth() != 0)
-        {
-          CHECK(pcSlice->getMaxBTSize() < pcSlice->getMinQTSize(), "maxBtSize is smaller than minQtSize");
-          WRITE_UVLC(g_aucLog2[pcSlice->getMaxBTSize()] - g_aucLog2[pcSlice->getMinQTSize()], "log2_diff_max_bt_min_qt");
-          CHECK(pcSlice->getMaxTTSize() < pcSlice->getMinQTSize(), "maxTtSize is smaller than minQtSize");
-          WRITE_UVLC(g_aucLog2[pcSlice->getMaxTTSize()] - g_aucLog2[pcSlice->getMinQTSize()], "log2_diff_max_tt_min_qt");
-        }
-        if (
-          pcSlice->isIntra() && pcSlice->getSPS()->getUseDualITree()
-          )
-        {
-          WRITE_UVLC(g_aucLog2[pcSlice->getMinQTSizeIChroma()] - pcSlice->getSPS()->getLog2MinCodingBlockSize(), "log2_diff_min_qt_min_cb_chroma");
-          WRITE_UVLC(pcSlice->getMaxBTDepthIChroma(), "max_mtt_hierarchy_depth_chroma");
-          if (pcSlice->getMaxBTDepthIChroma() != 0)
-          {
-            CHECK(pcSlice->getMaxBTSizeIChroma() < pcSlice->getMinQTSizeIChroma(), "maxBtSizeC is smaller than minQtSizeC");
-            WRITE_UVLC(g_aucLog2[pcSlice->getMaxBTSizeIChroma()] - g_aucLog2[pcSlice->getMinQTSizeIChroma()], "log2_diff_max_bt_min_qt_chroma");
-            CHECK(pcSlice->getMaxTTSizeIChroma() < pcSlice->getMinQTSizeIChroma(), "maxTtSizeC is smaller than minQtSizeC");
-            WRITE_UVLC(g_aucLog2[pcSlice->getMaxTTSizeIChroma()] - g_aucLog2[pcSlice->getMinQTSizeIChroma()], "log2_diff_max_tt_min_qt_chroma");
-          }
-        }
-      }
-    }
-    if (!cs.slice->isIntra() || cs.slice->getSPS()->getIBCFlag())
-    {
-      CHECK(pcSlice->getMaxNumMergeCand() > MRG_MAX_NUM_CANDS, "More merge candidates signalled than supported");
-      WRITE_UVLC(MRG_MAX_NUM_CANDS - pcSlice->getMaxNumMergeCand(), "six_minus_max_num_merge_cand");
-    }
-    if( !pcSlice->isIntra() )
-    {
 
-      if ( pcSlice->getSPS()->getSBTMVPEnabledFlag() && !pcSlice->getSPS()->getUseAffine() ) // ATMVP only
-      {
-        CHECK( pcSlice->getMaxNumAffineMergeCand() != 1, "Sub-block merge can number should be 1" );
-      }
-      else
-      if ( !pcSlice->getSPS()->getSBTMVPEnabledFlag() && !pcSlice->getSPS()->getUseAffine() ) // both off
-      {
-        CHECK( pcSlice->getMaxNumAffineMergeCand() != 0, "Sub-block merge can number should be 0" );
-      }
-      else
-      if ( pcSlice->getSPS()->getUseAffine() )
-      {
-        CHECK( pcSlice->getMaxNumAffineMergeCand() > AFFINE_MRG_MAX_NUM_CANDS, "More affine merge candidates signalled than supported" );
-        WRITE_UVLC( AFFINE_MRG_MAX_NUM_CANDS - pcSlice->getMaxNumAffineMergeCand(), "five_minus_max_num_affine_merge_cand" );
-      }
-      if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() )
-      {
-        WRITE_FLAG( pcSlice->getDisFracMMVD(), "tile_group_fracmmvd_disabled_flag" );
-      }
-    }
+
     int iCode = pcSlice->getSliceQp() - ( pcSlice->getPPS()->getPicInitQPMinus26() + 26 );
     WRITE_SVLC( iCode, "slice_qp_delta" );
     if (pcSlice->getPPS()->getSliceChromaQpFlag())
@@ -1380,21 +1979,64 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
       if (numberValidComponents > COMPONENT_Cr)
       {
         WRITE_SVLC( pcSlice->getSliceChromaQpDelta(COMPONENT_Cr), "slice_cr_qp_offset" );
+        if (pcSlice->getSPS()->getJointCbCrEnabledFlag())
+        {
+          WRITE_SVLC( pcSlice->getSliceChromaQpDelta(JOINT_CbCr), "slice_joint_cbcr_qp_offset");
+        }
       }
       CHECK(numberValidComponents < COMPONENT_Cr+1, "Too many valid components");
     }
 
-    if (pcSlice->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEnabledFlag())
+    if (pcSlice->getPPS()->getCuChromaQpOffsetEnabledFlag())
     {
       WRITE_FLAG(pcSlice->getUseChromaQpAdj(), "cu_chroma_qp_offset_enabled_flag");
     }
 
+    if( pcSlice->getSPS()->getSAOEnabledFlag() && !picHeader->getSaoEnabledPresentFlag() )
+    {
+      WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_LUMA ), "slice_sao_luma_flag" );
+      if( chromaEnabled )
+      {
+        WRITE_FLAG( pcSlice->getSaoEnabledFlag( CHANNEL_TYPE_CHROMA ), "slice_sao_chroma_flag" );
+      }
+    }
+
+    if( pcSlice->getSPS()->getALFEnabledFlag() && !picHeader->getAlfEnabledPresentFlag() )
+    {
+      const int alfEnabled = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y);
+      WRITE_FLAG(alfEnabled, "slice_alf_enabled_flag");
+
+      if (alfEnabled)
+      {
+        WRITE_CODE(pcSlice->getTileGroupNumAps(), 3, "slice_num_alf_aps_ids_luma");
+        const std::vector<int>&   apsId = pcSlice->getTileGroupApsIdLuma();
+        for (int i = 0; i < pcSlice->getTileGroupNumAps(); i++)
+        {
+          WRITE_CODE(apsId[i], 3, "slice_alf_aps_id_luma");
+        }
+
+        const int alfChromaIdc = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) + pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) * 2 ;
+        if (chromaEnabled)
+        {
+          WRITE_CODE(alfChromaIdc, 2, "slice_alf_chroma_idc");
+        }
+        if (alfChromaIdc)
+        {
+          WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 3, "slice_alf_aps_id_chroma");
+        }
+      }
+    }
+
     if (pcSlice->getPPS()->getDeblockingFilterControlPresentFlag())
     {
-      if (pcSlice->getPPS()->getDeblockingFilterOverrideEnabledFlag() )
+      if (pcSlice->getPPS()->getDeblockingFilterOverrideEnabledFlag() && !picHeader->getDeblockingFilterOverridePresentFlag())
       {
         WRITE_FLAG(pcSlice->getDeblockingFilterOverrideFlag(), "deblocking_filter_override_flag");
       }
+      else
+      {
+        pcSlice->setDeblockingFilterOverrideFlag(0);
+      }
       if (pcSlice->getDeblockingFilterOverrideFlag())
       {
         WRITE_FLAG(pcSlice->getDeblockingFilterDisable(), "slice_deblocking_filter_disabled_flag");
@@ -1404,23 +2046,19 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
           WRITE_SVLC (pcSlice->getDeblockingFilterTcOffsetDiv2(),   "slice_tc_offset_div2");
         }
       }
+      else
+      {
+        pcSlice->setDeblockingFilterDisable       ( picHeader->getDeblockingFilterDisable() );
+        pcSlice->setDeblockingFilterBetaOffsetDiv2( picHeader->getDeblockingFilterBetaOffsetDiv2() );
+        pcSlice->setDeblockingFilterTcOffsetDiv2  ( picHeader->getDeblockingFilterTcOffsetDiv2() );
+      }
     }
-
-    bool isSAOEnabled = pcSlice->getSPS()->getSAOEnabledFlag() && (pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA) || (chromaEnabled && pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA)));
-    bool isDBFEnabled = (!pcSlice->getDeblockingFilterDisable());
-
-    if(pcSlice->getPPS()->getLoopFilterAcrossSlicesEnabledFlag() && ( isSAOEnabled || isDBFEnabled ))
-    {
-      WRITE_FLAG(pcSlice->getLFCrossSliceBoundaryFlag()?1:0, "slice_loop_filter_across_slices_enabled_flag");
-    }
-
-    if (pcSlice->getSPS()->getUseReshaper())
+    else
     {
-      codeReshaper(pcSlice->getReshapeInfo(), pcSlice->getSPS(), pcSlice->isIntra());
+      pcSlice->setDeblockingFilterDisable       ( false );
+      pcSlice->setDeblockingFilterBetaOffsetDiv2( 0 );
+      pcSlice->setDeblockingFilterTcOffsetDiv2  ( 0 );
     }
-#if HEVC_DEPENDENT_SLICES
-  }
-#endif
 
   if(pcSlice->getPPS()->getSliceHeaderExtensionPresentFlag())
   {
@@ -1429,7 +2067,6 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
 
 }
 
-#if JVET_M0101_HLS
 void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
 {
   WRITE_FLAG(cinfo->getProgressiveSourceFlag(),   "general_progressive_source_flag"         );
@@ -1442,25 +2079,39 @@ void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
   WRITE_CODE(cinfo->getMaxChromaFormatConstraintIdc(), 2, "max_chroma_format_constraint_idc" );
 
   WRITE_FLAG(cinfo->getNoQtbttDualTreeIntraConstraintFlag() ? 1 : 0, "no_qtbtt_dual_tree_intra_constraint_flag");
+  WRITE_FLAG(cinfo->getNoPartitionConstraintsOverrideConstraintFlag() ? 1 : 0, "no_partition_constraints_override_constraint_flag");
   WRITE_FLAG(cinfo->getNoSaoConstraintFlag() ? 1 : 0, "no_sao_constraint_flag");
   WRITE_FLAG(cinfo->getNoAlfConstraintFlag() ? 1 : 0, "no_alf_constraint_flag");
-  WRITE_FLAG(cinfo->getNoPcmConstraintFlag() ? 1 : 0, "no_pcm_constraint_flag");
+  WRITE_FLAG(cinfo->getNoJointCbCrConstraintFlag() ? 1 : 0, "no_joint_cbcr_constraint_flag");
   WRITE_FLAG(cinfo->getNoRefWraparoundConstraintFlag() ? 1 : 0, "no_ref_wraparound_constraint_flag");
   WRITE_FLAG(cinfo->getNoTemporalMvpConstraintFlag() ? 1 : 0, "no_temporal_mvp_constraint_flag");
   WRITE_FLAG(cinfo->getNoSbtmvpConstraintFlag() ? 1 : 0, "no_sbtmvp_constraint_flag");
   WRITE_FLAG(cinfo->getNoAmvrConstraintFlag() ? 1 : 0, "no_amvr_constraint_flag");
   WRITE_FLAG(cinfo->getNoBdofConstraintFlag() ? 1 : 0, "no_bdof_constraint_flag");
+  WRITE_FLAG(cinfo->getNoDmvrConstraintFlag() ? 1 : 0, "no_dmvr_constraint_flag");
   WRITE_FLAG(cinfo->getNoCclmConstraintFlag() ? 1 : 0, "no_cclm_constraint_flag");
   WRITE_FLAG(cinfo->getNoMtsConstraintFlag() ? 1 : 0, "no_mts_constraint_flag");
+  WRITE_FLAG(cinfo->getNoSbtConstraintFlag() ? 1 : 0, "no_sbt_constraint_flag");
   WRITE_FLAG(cinfo->getNoAffineMotionConstraintFlag() ? 1 : 0, "no_affine_motion_constraint_flag");
-  WRITE_FLAG(cinfo->getNoGbiConstraintFlag() ? 1 : 0, "no_gbi_constraint_flag");
-  WRITE_FLAG(cinfo->getNoMhIntraConstraintFlag() ? 1 : 0, "no_mh_intra_constraint_flag");
+  WRITE_FLAG(cinfo->getNoBcwConstraintFlag() ? 1 : 0, "no_bcw_constraint_flag");
+  WRITE_FLAG(cinfo->getNoIbcConstraintFlag() ? 1 : 0, "no_ibc_constraint_flag");
+  WRITE_FLAG(cinfo->getNoCiipConstraintFlag() ? 1 : 0, "no_ciip_constraint_flag");
+  WRITE_FLAG(cinfo->getNoFPelMmvdConstraintFlag() ? 1 : 0, "no_fpel_mmvd_constraint_flag");
   WRITE_FLAG(cinfo->getNoTriangleConstraintFlag() ? 1 : 0, "no_triangle_constraint_flag");
   WRITE_FLAG(cinfo->getNoLadfConstraintFlag() ? 1 : 0, "no_ladf_constraint_flag");
-  WRITE_FLAG(cinfo->getNoCurrPicRefConstraintFlag() ? 1 : 0, "no_curr_pic_ref_constraint_flag");
+  WRITE_FLAG(cinfo->getNoTransformSkipConstraintFlag() ? 1 : 0, "no_transform_skip_constraint_flag");
+  WRITE_FLAG(cinfo->getNoBDPCMConstraintFlag() ? 1 : 0, "no_bdpcm_constraint_flag");
   WRITE_FLAG(cinfo->getNoQpDeltaConstraintFlag() ? 1 : 0, "no_qp_delta_constraint_flag");
   WRITE_FLAG(cinfo->getNoDepQuantConstraintFlag() ? 1 : 0, "no_dep_quant_constraint_flag");
   WRITE_FLAG(cinfo->getNoSignDataHidingConstraintFlag() ? 1 : 0, "no_sign_data_hiding_constraint_flag");
+  WRITE_FLAG(cinfo->getNoTrailConstraintFlag() ? 1 : 0, "no_trail_constraint_flag");
+  WRITE_FLAG(cinfo->getNoStsaConstraintFlag() ? 1 : 0, "no_stsa_constraint_flag");
+  WRITE_FLAG(cinfo->getNoRaslConstraintFlag() ? 1 : 0, "no_rasl_constraint_flag");
+  WRITE_FLAG(cinfo->getNoRadlConstraintFlag() ? 1 : 0, "no_radl_constraint_flag");
+  WRITE_FLAG(cinfo->getNoIdrConstraintFlag() ? 1 : 0, "no_idr_constraint_flag");
+  WRITE_FLAG(cinfo->getNoCraConstraintFlag() ? 1 : 0, "no_cra_constraint_flag");
+  WRITE_FLAG(cinfo->getNoGdrConstraintFlag() ? 1 : 0, "no_gdr_constraint_flag");
+  WRITE_FLAG(cinfo->getNoApsConstraintFlag() ? 1 : 0, "no_aps_constraint_flag");
 }
 
 
@@ -1469,9 +2120,16 @@ void  HLSWriter::codeProfileTierLevel    ( const ProfileTierLevel* ptl, int maxN
   WRITE_CODE( int(ptl->getProfileIdc()), 7 ,   "general_profile_idc"                     );
   WRITE_FLAG( ptl->getTierFlag()==Level::HIGH, "general_tier_flag"                       );
 
-  codeConstraintInfo(ptl->getConstraintInfo());
+  codeConstraintInfo( ptl->getConstraintInfo() );
+
+  WRITE_CODE( int( ptl->getLevelIdc() ), 8, "general_level_idc" );
+
+  WRITE_CODE(ptl->getNumSubProfile(), 8, "num_sub_profiles");
+  for (int i = 0; i < ptl->getNumSubProfile(); i++)
+  {
+    WRITE_CODE(ptl->getSubProfileIdc(i) , 32, "general_sub_profile_idc[i]");
+  }
 
-  WRITE_CODE( int(ptl->getLevelIdc()), 8 ,     "general_level_idc"                     );
 
   for (int i = 0; i < maxNumSubLayersMinus1; i++)
   {
@@ -1493,92 +2151,7 @@ void  HLSWriter::codeProfileTierLevel    ( const ProfileTierLevel* ptl, int maxN
 
 }
 
-#else
-void HLSWriter::codePTL( const PTL* pcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1)
-{
-  if(profilePresentFlag)
-  {
-    codeProfileTier(pcPTL->getGeneralPTL(), false);    // general_...
-  }
-  WRITE_CODE( int(pcPTL->getGeneralPTL()->getLevelIdc()), 8, "general_level_idc" );
-
-  for (int i = 0; i < maxNumSubLayersMinus1; i++)
-  {
-    WRITE_FLAG( pcPTL->getSubLayerProfilePresentFlag(i), "sub_layer_profile_present_flag[i]" );
-    WRITE_FLAG( pcPTL->getSubLayerLevelPresentFlag(i),   "sub_layer_level_present_flag[i]" );
-  }
-
-  if (maxNumSubLayersMinus1 > 0)
-  {
-    for (int i = maxNumSubLayersMinus1; i < 8; i++)
-    {
-      WRITE_CODE(0, 2, "reserved_zero_2bits");
-    }
-  }
-
-  for(int i = 0; i < maxNumSubLayersMinus1; i++)
-  {
-    if( pcPTL->getSubLayerProfilePresentFlag(i) )
-    {
-      codeProfileTier(pcPTL->getSubLayerPTL(i), true);  // sub_layer_...
-    }
-    if( pcPTL->getSubLayerLevelPresentFlag(i) )
-    {
-      WRITE_CODE( int(pcPTL->getSubLayerPTL(i)->getLevelIdc()), 8, "sub_layer_level_idc[i]" );
-    }
-  }
-}
-
-#if ENABLE_TRACING || RExt__DECODER_DEBUG_BIT_STATISTICS
-void HLSWriter::codeProfileTier( const ProfileTierLevel* ptl, const bool bIsSubLayer )
-#define PTL_TRACE_TEXT(txt) bIsSubLayer?("sub_layer_" txt) : ("general_" txt)
-#else
-void HLSWriter::codeProfileTier( const ProfileTierLevel* ptl, const bool /*bIsSubLayer*/ )
-#define PTL_TRACE_TEXT(txt) txt
-#endif
-{
-  WRITE_CODE( ptl->getProfileSpace(), 2 ,      PTL_TRACE_TEXT("profile_space"                   ));
-  WRITE_FLAG( ptl->getTierFlag()==Level::HIGH, PTL_TRACE_TEXT("tier_flag"                       ));
-  WRITE_CODE( int(ptl->getProfileIdc()), 5 ,   PTL_TRACE_TEXT("profile_idc"                     ));
-  for(int j = 0; j < 32; j++)
-  {
-    WRITE_FLAG( ptl->getProfileCompatibilityFlag(j), PTL_TRACE_TEXT("profile_compatibility_flag[][j]" ));
-  }
-
-  WRITE_FLAG(ptl->getProgressiveSourceFlag(),   PTL_TRACE_TEXT("progressive_source_flag"         ));
-  WRITE_FLAG(ptl->getInterlacedSourceFlag(),    PTL_TRACE_TEXT("interlaced_source_flag"          ));
-  WRITE_FLAG(ptl->getNonPackedConstraintFlag(), PTL_TRACE_TEXT("non_packed_constraint_flag"      ));
-  WRITE_FLAG(ptl->getFrameOnlyConstraintFlag(), PTL_TRACE_TEXT("frame_only_constraint_flag"      ));
-
-  if (ptl->getProfileIdc() == Profile::MAINREXT || ptl->getProfileIdc() == Profile::HIGHTHROUGHPUTREXT )
-  {
-    const uint32_t         bitDepthConstraint=ptl->getBitDepthConstraint();
-    WRITE_FLAG(bitDepthConstraint<=12,          PTL_TRACE_TEXT("max_12bit_constraint_flag"       ));
-    WRITE_FLAG(bitDepthConstraint<=10,          PTL_TRACE_TEXT("max_10bit_constraint_flag"       ));
-    WRITE_FLAG(bitDepthConstraint<= 8,          PTL_TRACE_TEXT("max_8bit_constraint_flag"        ));
-    const ChromaFormat chromaFmtConstraint=ptl->getChromaFormatConstraint();
-    WRITE_FLAG(chromaFmtConstraint==CHROMA_422||chromaFmtConstraint==CHROMA_420||chromaFmtConstraint==CHROMA_400, PTL_TRACE_TEXT("max_422chroma_constraint_flag" ));
-    WRITE_FLAG(chromaFmtConstraint==CHROMA_420||chromaFmtConstraint==CHROMA_400,                                  PTL_TRACE_TEXT("max_420chroma_constraint_flag" ));
-    WRITE_FLAG(chromaFmtConstraint==CHROMA_400,                                                                   PTL_TRACE_TEXT("max_monochrome_constraint_flag"));
-    WRITE_FLAG(ptl->getIntraConstraintFlag(),          PTL_TRACE_TEXT("intra_constraint_flag"           ));
-    WRITE_FLAG(ptl->getOnePictureOnlyConstraintFlag(), PTL_TRACE_TEXT("one_picture_only_constraint_flag"));
-    WRITE_FLAG(ptl->getLowerBitRateConstraintFlag(),   PTL_TRACE_TEXT("lower_bit_rate_constraint_flag"  ));
-    WRITE_CODE(0 , 16, PTL_TRACE_TEXT("reserved_zero_34bits[0..15]"     ));
-    WRITE_CODE(0 , 16, PTL_TRACE_TEXT("reserved_zero_34bits[16..31]"    ));
-    WRITE_CODE(0 ,  2, PTL_TRACE_TEXT("reserved_zero_34bits[32..33]"    ));
-  }
-  else
-  {
-    WRITE_CODE(0x0000 , 16, PTL_TRACE_TEXT("reserved_zero_43bits[0..15]"     ));
-    WRITE_CODE(0x0000 , 16, PTL_TRACE_TEXT("reserved_zero_43bits[16..31]"    ));
-    WRITE_CODE(0x000  , 11, PTL_TRACE_TEXT("reserved_zero_43bits[32..42]"    ));
-  }
-  WRITE_FLAG(false,   PTL_TRACE_TEXT("reserved_zero_bit" ));
-#undef PTL_TRACE_TEXT
-}
-#endif
 
-#if HEVC_TILES_WPP
 /**
 * Write tiles and wavefront substreams sizes for the slice header (entry points).
 *
@@ -1586,7 +2159,8 @@ void HLSWriter::codeProfileTier( const ProfileTierLevel* ptl, const bool /*bIsSu
 */
 void  HLSWriter::codeTilesWPPEntryPoint( Slice* pSlice )
 {
-  if (!pSlice->getPPS()->getTilesEnabledFlag() && !pSlice->getPPS()->getEntropyCodingSyncEnabledFlag())
+  pSlice->setNumEntryPoints( pSlice->getPPS() );
+  if( pSlice->getNumEntryPoints() == 0 )
   {
     return;
   }
@@ -1605,10 +2179,9 @@ void  HLSWriter::codeTilesWPPEntryPoint( Slice* pSlice )
   while (maxOffset >= (1u << (offsetLenMinus1 + 1)))
   {
     offsetLenMinus1++;
-    CHECK(offsetLenMinus1 + 1 >= 32, "Invalid offset lenght minus 1");
+    CHECK(offsetLenMinus1 + 1 >= 32, "Invalid offset length minus 1");
   }
 
-  WRITE_UVLC(pSlice->getNumberOfSubstreamSizes(), "num_entry_point_offsets");
   if (pSlice->getNumberOfSubstreamSizes()>0)
   {
     WRITE_UVLC(offsetLenMinus1, "offset_len_minus1");
@@ -1619,7 +2192,6 @@ void  HLSWriter::codeTilesWPPEntryPoint( Slice* pSlice )
     }
   }
 }
-#endif
 
 
 // ====================================================================================================================
@@ -1708,37 +2280,29 @@ void HLSWriter::xCodePredWeightTable( Slice* pcSlice )
   }
 }
 
-#if HEVC_USE_SCALING_LISTS
 /** code quantization matrix
 *  \param scalingList quantization matrix information
 */
 void HLSWriter::codeScalingList( const ScalingList &scalingList )
 {
   //for each size
-  for(uint32_t sizeId = SCALING_LIST_FIRST_CODED; sizeId <= SCALING_LIST_LAST_CODED; sizeId++)
+  WRITE_FLAG(scalingList.getDisableScalingMatrixForLfnstBlks(), "scaling_matrix_for_lfnst_disabled_flag"); 
+  for (uint32_t scalingListId = 0; scalingListId < 28; scalingListId++)
   {
-    const int predListStep = (sizeId == SCALING_LIST_32x32? (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES) : 1); // if 32x32, skip over chroma entries.
-
-    for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId+=predListStep)
+    bool scalingListCopyModeFlag = scalingList.getScalingListCopyModeFlag(scalingListId);
+    WRITE_FLAG(scalingListCopyModeFlag, "scaling_list_copy_mode_flag"); //copy mode
+    if (!scalingListCopyModeFlag)// Copy Mode
     {
-      bool scalingListPredModeFlag = scalingList.getScalingListPredModeFlag(sizeId, listId);
-      WRITE_FLAG( scalingListPredModeFlag, "scaling_list_pred_mode_flag" );
-      if(!scalingListPredModeFlag)// Copy Mode
-      {
-        if (sizeId == SCALING_LIST_32x32)
-        {
-          // adjust the code, to cope with the missing chroma entries
-          WRITE_UVLC( ((int)listId - (int)scalingList.getRefMatrixId (sizeId,listId)) / (SCALING_LIST_NUM/NUMBER_OF_PREDICTION_MODES), "scaling_list_pred_matrix_id_delta");
-        }
-        else
-        {
-          WRITE_UVLC( (int)listId - (int)scalingList.getRefMatrixId (sizeId,listId), "scaling_list_pred_matrix_id_delta");
-        }
-      }
-      else// DPCM Mode
-      {
-        xCodeScalingList(&scalingList, sizeId, listId);
-      }
+      WRITE_FLAG(scalingList.getScalingListPreditorModeFlag(scalingListId), "scaling_list_predictor_mode_flag");
+    }
+    if ((scalingListCopyModeFlag || scalingList.getScalingListPreditorModeFlag(scalingListId)) && scalingListId!= SCALING_LIST_1D_START_2x2 && scalingListId != SCALING_LIST_1D_START_4x4 && scalingListId != SCALING_LIST_1D_START_8x8)
+    {
+      WRITE_UVLC((int)scalingListId - (int)scalingList.getRefMatrixId(scalingListId), "scaling_list_pred_matrix_id_delta");
+    }
+    if (!scalingListCopyModeFlag)
+    {
+      //DPCM
+      xCodeScalingList(&scalingList, scalingListId, scalingList.getScalingListPreditorModeFlag(scalingListId));
     }
   }
   return;
@@ -1748,35 +2312,55 @@ void HLSWriter::codeScalingList( const ScalingList &scalingList )
 * \param sizeId      size index
 * \param listId      list index
 */
-void HLSWriter::xCodeScalingList(const ScalingList* scalingList, uint32_t sizeId, uint32_t listId)
+void HLSWriter::xCodeScalingList(const ScalingList* scalingList, uint32_t scalingListId, bool isPredictor)
 {
-  int coefNum = std::min( MAX_MATRIX_COEF_NUM, ( int ) g_scalingListSize[sizeId] );
-  uint32_t* scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )][gp_sizeIdxInfo->idxFrom( 1 << ( sizeId == SCALING_LIST_FIRST_CODED ? 2 : 3 ) )];
-  int nextCoef = SCALING_LIST_START_VALUE;
+  int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : ((scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8);
+  int coefNum = matrixSize * matrixSize;
+  ScanElement *scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][gp_sizeIdxInfo->idxFrom(matrixSize)][gp_sizeIdxInfo->idxFrom(matrixSize)];
+  int nextCoef = (isPredictor) ? 0 : SCALING_LIST_START_VALUE;
+
   int data;
-  const int *src = scalingList->getScalingListAddress(sizeId, listId);
-  if( sizeId > SCALING_LIST_8x8 )
+  const int *src = scalingList->getScalingListAddress(scalingListId);
+  int PredListId = scalingList->getRefMatrixId(scalingListId);
+  const int *srcPred = (isPredictor) ? ((scalingListId==PredListId) ? scalingList->getScalingListDefaultAddress(scalingListId) : scalingList->getScalingListAddress(PredListId)) : NULL;
+  int deltasrc[65] = { 0 };
+
+  if (isPredictor)
   {
-    WRITE_SVLC( scalingList->getScalingListDC(sizeId,listId) - 8, "scaling_list_dc_coef_minus8");
-    nextCoef = scalingList->getScalingListDC(sizeId,listId);
+    if (scalingListId >= SCALING_LIST_1D_START_16x16)
+    {
+      deltasrc[64] = scalingList->getScalingListDC(scalingListId) - ((PredListId >= SCALING_LIST_1D_START_16x16) ? ((scalingListId == PredListId) ? 16 : scalingList->getScalingListDC(PredListId)) : srcPred[scan[0].idx]);
+    }
+    for (int i = 0; i < coefNum; i++)
+    {
+      deltasrc[i] = (src[scan[i].idx] - srcPred[scan[i].idx]);
+    }
   }
-  for(int i=0;i<coefNum;i++)
+  if (scalingListId >= SCALING_LIST_1D_START_16x16)
   {
-    data = src[scan[i]] - nextCoef;
-    nextCoef = src[scan[i]];
-    if(data > 127)
+    if (isPredictor)
     {
-      data = data - 256;
+      data = deltasrc[64];
+      nextCoef = deltasrc[64];
     }
-    if(data < -128)
+    else
     {
-      data = data + 256;
+      data = scalingList->getScalingListDC(scalingListId) - nextCoef;
+      nextCoef = scalingList->getScalingListDC(scalingListId);
     }
-
-    WRITE_SVLC( data,  "scaling_list_delta_coef");
+    data = ((data + 128) & 255) - 128;
+    WRITE_SVLC((int8_t)data, "scaling_list_dc_coef");
+  }
+  for(int i=0;i<coefNum;i++)
+  {
+    if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4)
+      continue;
+    data = (isPredictor) ? (deltasrc[i] - nextCoef) : (src[scan[i].idx] - nextCoef);
+    nextCoef = (isPredictor) ? deltasrc[i] : src[scan[i].idx];
+    data = ((data + 128) & 255) - 128;
+    WRITE_SVLC((int8_t)data, "scaling_list_delta_coef");
   }
 }
-#endif
 
 bool HLSWriter::xFindMatchingLTRP(Slice* pcSlice, uint32_t *ltrpsIndex, int ltrpPOC, bool usedFlag)
 {
@@ -1793,174 +2377,62 @@ bool HLSWriter::xFindMatchingLTRP(Slice* pcSlice, uint32_t *ltrpsIndex, int ltrp
   return false;
 }
 
-
-void HLSWriter::alfGolombEncode( int coeff, int k )
+void HLSWriter::alfGolombEncode( int coeff, int k, const bool signed_coeff )
 {
-  int symbol = abs( coeff );
-
-  int m = (int)pow( 2.0, k );
-  int q = symbol / m;
-
-  for( int i = 0; i < q; i++ )
+  unsigned int symbol = abs( coeff );
+  while ( symbol >= (unsigned int)( 1 << k ) )
   {
-    xWriteFlag( 1 );
+    symbol -= 1 << k;
+    k++;
+    WRITE_FLAG( 0, "alf_coeff_abs_prefix" );
   }
-  xWriteFlag( 0 );
-  // write one zero
+  WRITE_FLAG( 1, "alf_coeff_abs_prefix" );
 
-  for( int i = 0; i < k; i++ )
+  if ( k > 0 )
   {
-    xWriteFlag( symbol & 0x01 );
-    symbol >>= 1;
+    WRITE_CODE( symbol, k, "alf_coeff_abs_suffix" );
   }
-
-  if( coeff != 0 )
+  if ( signed_coeff && coeff != 0 )
   {
-    int sign = ( coeff > 0 ) ? 1 : 0;
-    xWriteFlag( sign );
+    WRITE_FLAG( (coeff < 0) ? 1 : 0, "alf_coeff_sign" );
   }
 }
 
-void HLSWriter::alfFilter( const AlfSliceParam& alfSliceParam, const bool isChroma )
+void HLSWriter::alfFilter( const AlfParam& alfParam, const bool isChroma, const int altIdx )
 {
-  if( !isChroma )
-  {
-    WRITE_FLAG( alfSliceParam.alfLumaCoeffDeltaFlag, "alf_luma_coeff_delta_flag" );
-    if( !alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      if( alfSliceParam.numLumaFilters > 1 )
-      {
-        WRITE_FLAG( alfSliceParam.alfLumaCoeffDeltaPredictionFlag, "alf_luma_coeff_delta_prediction_flag" );
-      }
-    }
-  }
-
-  static int bitsCoeffScan[EncAdaptiveLoopFilter::m_MAX_SCAN_VAL][EncAdaptiveLoopFilter::m_MAX_EXP_GOLOMB];
-  memset( bitsCoeffScan, 0, sizeof( bitsCoeffScan ) );
-  AlfFilterShape alfShape( isChroma ? 5 : 7 );
-  const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType );
-  const short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff;
-  const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters;
+  AlfFilterShape alfShape(isChroma ? 5 : 7);
+  const short* coeff = isChroma ? alfParam.chromaCoeff[altIdx] : alfParam.lumaCoeff;
+  const short* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp;
+  const int numFilters = isChroma ? 1 : alfParam.numLumaFilters;
 
   // vlc for all
-  for( int ind = 0; ind < numFilters; ++ind )
-  {
-    if( isChroma || !alfSliceParam.alfLumaCoeffDeltaFlag || alfSliceParam.alfLumaCoeffFlag[ind] )
-    {
-      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
-      {
-        int coeffVal = abs( coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] );
-
-        for( int k = 1; k < 15; k++ )
-        {
-          bitsCoeffScan[alfShape.golombIdx[i]][k] += EncAdaptiveLoopFilter::lengthGolomb( coeffVal, k );
-        }
-      }
-    }
-  }
-
-  static int kMinTab[MAX_NUM_ALF_COEFF];
-  int kMin = EncAdaptiveLoopFilter::getGolombKMin( alfShape, numFilters, kMinTab, bitsCoeffScan );
-
-  // Golomb parameters
-  WRITE_UVLC( kMin - 1,  isChroma ? "alf_chroma_min_eg_order_minus1" : "alf_luma_min_eg_order_minus1" );
-
-  for( int idx = 0; idx < maxGolombIdx; idx++ )
-  {
-    bool golombOrderIncreaseFlag = ( kMinTab[idx] != kMin ) ? true : false;
-    CHECK( !( kMinTab[idx] <= kMin + 1 ), "ALF Golomb parameter not consistent" );
-    WRITE_FLAG( golombOrderIncreaseFlag, isChroma ? "alf_chroma_eg_order_increase_flag"  : "alf_luma_eg_order_increase_flag" );
-    kMin = kMinTab[idx];
-  }
-
-  if( !isChroma )
-  {
-    if( alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      for( int ind = 0; ind < numFilters; ++ind )
-      {
-        WRITE_FLAG( alfSliceParam.alfLumaCoeffFlag[ind], "alf_luma_coeff_flag[i]" );
-      }
-    }
-  }
 
   // Filter coefficients
   for( int ind = 0; ind < numFilters; ++ind )
   {
-    if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag )
-    {
-      continue;
-    }
 
     for( int i = 0; i < alfShape.numCoeff - 1; i++ )
     {
-      alfGolombEncode( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], kMinTab[alfShape.golombIdx[i]] );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
+      alfGolombEncode( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], 3 );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
     }
   }
-}
 
-void HLSWriter::xWriteTruncBinCode( uint32_t uiSymbol, const int uiMaxSymbol )
-{
-  int uiThresh;
-  if( uiMaxSymbol > 256 )
+  // Clipping values coding
+#if JVET_Q0249_ALF_CHROMA_CLIPFLAG
+  if( alfParam.nonLinearFlag[isChroma] )
+#else
+  if( alfParam.nonLinearFlag[isChroma][altIdx] )
+#endif
   {
-    int uiThreshVal = 1 << 8;
-    uiThresh = 8;
-    while( uiThreshVal <= uiMaxSymbol )
+    for (int ind = 0; ind < numFilters; ++ind)
     {
-      uiThresh++;
-      uiThreshVal <<= 1;
+      for (int i = 0; i < alfShape.numCoeff - 1; i++)
+      {
+        WRITE_CODE(clipp[ind* MAX_NUM_ALF_LUMA_COEFF + i], 2, "alf_clipping_index");
+      }
     }
-    uiThresh--;
-  }
-  else
-  {
-    uiThresh = g_tbMax[uiMaxSymbol];
-  }
-
-  int uiVal = 1 << uiThresh;
-  assert( uiVal <= uiMaxSymbol );
-  assert( ( uiVal << 1 ) > uiMaxSymbol );
-  assert( uiSymbol < uiMaxSymbol );
-  int b = uiMaxSymbol - uiVal;
-  assert( b < uiVal );
-  if( uiSymbol < uiVal - b )
-  {
-    xWriteCode( uiSymbol, uiThresh );
-  }
-  else
-  {
-    uiSymbol += uiVal - b;
-    assert( uiSymbol < ( uiVal << 1 ) );
-    assert( ( uiSymbol >> 1 ) >= uiVal - b );
-    xWriteCode( uiSymbol, uiThresh + 1 );
   }
 }
 
-void HLSWriter::truncatedUnaryEqProb( int symbol, const int maxSymbol )
-{
-  if( maxSymbol == 0 )
-  {
-    return;
-  }
-
-  bool codeLast = ( maxSymbol > symbol );
-  int bins = 0;
-  int numBins = 0;
-
-  while( symbol-- )
-  {
-    bins <<= 1;
-    bins++;
-    numBins++;
-  }
-  if( codeLast )
-  {
-    bins <<= 1;
-    numBins++;
-  }
-  CHECK( !( numBins <= 32 ), "Unspecified error" );
-  xWriteCode( bins, numBins );
-}
 
 //! \}
diff --git a/source/Lib/EncoderLib/VLCWriter.h b/source/Lib/EncoderLib/VLCWriter.h
index 2ec729bde447451bb29916fcabeaaa337a05bb17..7816710363be97eb40bad3bca4f212ca57290756 100644
--- a/source/Lib/EncoderLib/VLCWriter.h
+++ b/source/Lib/EncoderLib/VLCWriter.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2019, ITU/ISO/IEC
+* Copyright (c) 2010-2020, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
 
 #if ENABLE_TRACING
 
+#define WRITE_SCODE( value, length, name)   xWriteSCodeTr ( value, length, name )
 #define WRITE_CODE( value, length, name)    xWriteCodeTr ( value, length, name )
 #define WRITE_UVLC( value,         name)    xWriteUvlcTr ( value,         name )
 #define WRITE_SVLC( value,         name)    xWriteSvlcTr ( value,         name )
@@ -56,7 +57,7 @@
 
 extern bool g_HLSTraceEnable;
 #else
-
+#define WRITE_SCODE( value, length, name)    xWriteSCode ( value, length )
 #define WRITE_CODE( value, length, name)     xWriteCode ( value, length )
 #define WRITE_UVLC( value,         name)     xWriteUvlc ( value )
 #define WRITE_SVLC( value,         name)     xWriteSvlc ( value )
@@ -76,21 +77,20 @@ protected:
   virtual ~VLCWriter() {}
 
   void  setBitstream          ( OutputBitstream* p )  { m_pcBitIf = p;  }
-
+  void  xWriteSCode           ( int  code,  uint32_t length );
   void  xWriteCode            ( uint32_t uiCode, uint32_t uiLength );
   void  xWriteUvlc            ( uint32_t uiCode );
   void  xWriteSvlc            ( int  iCode   );
   void  xWriteFlag            ( uint32_t uiCode );
 #if ENABLE_TRACING
+  void  xWriteSCodeTr         ( int value,  uint32_t  length, const char *pSymbolName);
   void  xWriteCodeTr          ( uint32_t value, uint32_t  length, const char *pSymbolName);
   void  xWriteUvlcTr          ( uint32_t value,               const char *pSymbolName);
   void  xWriteSvlcTr          ( int  value,               const char *pSymbolName);
   void  xWriteFlagTr          ( uint32_t value,               const char *pSymbolName);
 #endif
   void  xWriteRbspTrailingBits();
-#if JVET_M0101_HLS
   bool isByteAligned()      { return (m_pcBitIf->getNumBitsUntilByteAligned() == 0); } ;
-#endif
 };
 
 
@@ -113,46 +113,35 @@ public:
   virtual ~HLSWriter() {}
 
 private:
-  void xCodeShortTermRefPicSet  ( const ReferencePictureSet* pcRPS, bool calledFromSliceHeader, int idx );
+  void xCodeRefPicList( const ReferencePictureList* rpl, bool isLongTermPresent, uint32_t ltLsbBitsCount, const bool isForbiddenZeroDeltaPoc );
   bool xFindMatchingLTRP        ( Slice* pcSlice, uint32_t *ltrpsIndex, int ltrpPOC, bool usedFlag );
   void xCodePredWeightTable     ( Slice* pcSlice );
-#if HEVC_USE_SCALING_LISTS
-  void xCodeScalingList         ( const ScalingList* scalingList, uint32_t sizeId, uint32_t listId);
-#endif
+  void xCodeScalingList         ( const ScalingList* scalingList, uint32_t scalinListId, bool isPredictor);
 public:
   void  setBitstream            ( OutputBitstream* p )  { m_pcBitIf = p;  }
   uint32_t  getNumberOfWrittenBits  ()                      { return m_pcBitIf->getNumberOfWrittenBits();  }
   void  codeVUI                 ( const VUI *pcVUI, const SPS* pcSPS );
   void  codeSPS                 ( const SPS* pcSPS );
-  void  codePPS                 ( const PPS* pcPPS );
-  void  codeAPS                 ( APS* pcAPS);
-#if HEVC_VPS
+  void  codePPS                 ( const PPS* pcPPS, const SPS* pcSPS );
+  void  codeAPS                 ( APS* pcAPS );
+  void  codeAlfAps              ( APS* pcAPS );
+  void  codeLmcsAps             ( APS* pcAPS );
+  void  codeScalingListAps      ( APS* pcAPS );
   void  codeVPS                 ( const VPS* pcVPS );
-#endif
+  void  codeDPS                 ( const DPS* dps );
+  void  codePictureHeader       ( PicHeader* picHeader );
   void  codeSliceHeader         ( Slice* pcSlice );
-#if !JVET_M0101_HLS
-  void  codePTL                 ( const PTL* pcPTL, bool profilePresentFlag, int maxNumSubLayersMinus1);
-  void  codeProfileTier         ( const ProfileTierLevel* ptl, const bool bIsSubLayer );
-#else
   void  codeConstraintInfo      ( const ConstraintInfo* cinfo );
   void  codeProfileTierLevel    ( const ProfileTierLevel* ptl, int maxNumSubLayersMinus1 );
-#endif
-  void  codeHrdParameters       ( const HRD *hrd, bool commonInfPresentFlag, uint32_t maxNumSubLayersMinus1 );
-#if HEVC_TILES_WPP
+  void  codeHrdParameters       ( const HRDParameters *hrd, const uint32_t firstSubLayer, const uint32_t maxNumSubLayersMinus1);
+
   void  codeTilesWPPEntryPoint  ( Slice* pSlice );
-#endif
-#if HEVC_USE_SCALING_LISTS
   void  codeScalingList         ( const ScalingList &scalingList );
-#endif
 
-  void alfFilter( const AlfSliceParam& alfSliceParam, const bool isChroma );
+  void alfFilter( const AlfParam& alfParam, const bool isChroma, const int altIdx );
 
 private:
-  void xWriteTruncBinCode( uint32_t uiSymbol, const int uiMaxSymbol );
-  void alfGolombEncode( const int coeff, const int k );
-  void truncatedUnaryEqProb( int symbol, int maxSymbol );
-
-  void  codeReshaper            ( const SliceReshapeInfo& pSliceReshaperInfo, const SPS* pcSPS, const bool isIntra);
+  void alfGolombEncode( const int coeff, const int k, const bool signed_coeff=true );
 };
 
 //! \}
diff --git a/source/Lib/EncoderLib/WeightPredAnalysis.cpp b/source/Lib/EncoderLib/WeightPredAnalysis.cpp
index d6e28387a28e4388ec1bf4b5d05f636b0114e2a3..5117c79d98d9a4e5cc9b05c7c79e576a55bc439f 100644
--- a/source/Lib/EncoderLib/WeightPredAnalysis.cpp
+++ b/source/Lib/EncoderLib/WeightPredAnalysis.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/WeightPredAnalysis.h b/source/Lib/EncoderLib/WeightPredAnalysis.h
index ca3a1e95e909cbf6bc5509fbdd0a4a8f4410239c..76c91be5f30d5966dfef874927d97c1ce8f50a9c 100644
--- a/source/Lib/EncoderLib/WeightPredAnalysis.h
+++ b/source/Lib/EncoderLib/WeightPredAnalysis.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/Utilities/ColourRemapping.cpp b/source/Lib/Utilities/ColourRemapping.cpp
index 480f7ab2b261e4d8ce198bcf92a1c08599fc32c1..0466dc6caa282580ce7393d3a66a81a4b4a465f8 100644
--- a/source/Lib/Utilities/ColourRemapping.cpp
+++ b/source/Lib/Utilities/ColourRemapping.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,8 @@
 #include <stdio.h>
 #include <fcntl.h>
 
+#if HEVC_SEI
+
 #include "ColourRemapping.h"
 #include "DecoderLib/AnnexBread.h"
 #include "DecoderLib/NALread.h"
@@ -413,3 +415,5 @@ void applyColourRemapping(const PelUnitBuf& pic, SEIColourRemappingInfo& criSEI,
 }
 
 //! \}
+#endif
+
diff --git a/source/Lib/Utilities/ColourRemapping.h b/source/Lib/Utilities/ColourRemapping.h
index f090191448477fa9835cc619c4c74bd6d69f0753..41901ebeba8d6fd43ff21f080803b9ab44c2043d 100644
--- a/source/Lib/Utilities/ColourRemapping.h
+++ b/source/Lib/Utilities/ColourRemapping.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,8 @@
 #pragma once
 #endif // _MSC_VER > 1000
 
+#if HEVC_SEI
+
 #include "CommonLib/Picture.h"
 #include "CommonLib/SEI.h"
 #include <fstream>
@@ -71,3 +73,4 @@ public:
 
 #endif
 
+#endif
diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp
index e6808bf71094ee8fca22b48db7e7fb104b6a80f1..720d06212579459767a676b6174502cee7cb3caa 100644
--- a/source/Lib/Utilities/VideoIOYuv.cpp
+++ b/source/Lib/Utilities/VideoIOYuv.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -453,7 +453,7 @@ static bool verifyPlane(Pel* dst,
  * @param fileBitDepth component bit depth in file
  * @return true for success, false in case of error
  */
-static bool writePlane(ostream& fd, const Pel* src,
+static bool writePlane( uint32_t orgWidth, uint32_t orgHeight, ostream& fd, const Pel* src,
                        const bool is16bit,
                        const uint32_t stride_src,
                        uint32_t width444, uint32_t height444,
@@ -471,7 +471,10 @@ static bool writePlane(ostream& fd, const Pel* src,
   const uint32_t width_file  = width444  >> csx_file;
   const uint32_t height_file = height444 >> csy_file;
   const bool     writePYUV   = (packedYUVOutputMode > 0) && (fileBitDepth == 10 || fileBitDepth == 12) && ((width_file & (1 + (fileBitDepth & 3))) == 0);
-  const uint32_t stride_file = writePYUV ? (width444 * fileBitDepth) >> (csx_file + 3) : (width444 * (is16bit ? 2 : 1)) >> csx_file;
+
+  CHECK( writePYUV, "Not supported" );
+  CHECK( csx_file != csx_src, "Not supported" );
+  const uint32_t stride_file = writePYUV ? ( orgWidth * fileBitDepth ) >> ( csx_file + 3 ) : ( orgWidth * ( is16bit ? 2 : 1 ) ) >> csx_file;
 
   std::vector<uint8_t> bufVec(stride_file);
   uint8_t *buf=&(bufVec[0]);
@@ -669,6 +672,41 @@ static bool writePlane(ostream& fd, const Pel* src,
         pSrcBuf += srcbuf_stride;
       }
     }
+
+    // here height444 and orgHeight are luma heights
+    for( uint32_t y444 = height444; y444 < orgHeight; y444++ )
+    {
+      if( ( y444 & mask_y_file ) == 0 ) // if this is chroma, determine whether to skip every other row
+      {
+
+        if( !is16bit )
+        {
+          for( uint32_t x = 0; x < ( orgWidth >> csx_file ); x++ )
+          {
+            buf[x] = 0;
+          }
+        }
+        else
+        {
+          for( uint32_t x = 0; x < ( orgWidth >> csx_file ); x++ )
+          {
+            buf[2 * x] = 0;
+            buf[2 * x + 1] = 0;
+          }
+        }
+        fd.write( reinterpret_cast<const char*>( buf ), stride_file );
+        if( fd.eof() || fd.fail() )
+        {
+          return false;
+        }
+      }
+
+      if( ( y444 & mask_y_src ) == 0 )
+      {
+        pSrcBuf += srcbuf_stride;
+      }
+    }
+
   }
   return true;
 }
@@ -903,6 +941,9 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp
 #else
   ColourSpaceConvert( picOrg, pic, ipcsc, true);
 #endif
+
+  picOrg.copyFrom(pic);
+
   return true;
 }
 
@@ -919,7 +960,8 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp
  * @param format           chroma format
  * @return true for success, false in case of error
  */
-bool VideoIOYuv::write( const CPelUnitBuf& pic,
+ // here orgWidth and orgHeight are for luma
+bool VideoIOYuv::write( uint32_t orgWidth, uint32_t orgHeight, const CPelUnitBuf& pic,
                         const InputColourSpaceConversion ipCSC,
                         const bool bPackedYUVOutputMode,
                         int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool bClipToRec709 )
@@ -993,7 +1035,7 @@ bool VideoIOYuv::write( const CPelUnitBuf& pic,
     const uint32_t    csy         = ::getComponentScaleY(compID, format);
     const CPelBuf     area        = picO.get(compID);
     const int         planeOffset = (confLeft >> csx) + (confTop >> csy) * area.stride;
-    if (!writePlane (m_cHandle, area.bufAt (0, 0) + planeOffset, is16bit, area.stride,
+    if( !writePlane( orgWidth, orgHeight, m_cHandle, area.bufAt( 0, 0 ) + planeOffset, is16bit, area.stride,
                      width444, height444, compID, picO.chromaFormat, format, m_fileBitdepth[ch],
                      bPackedYUVOutputMode ? 1 : 0))
     {
@@ -1086,9 +1128,9 @@ bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom,
     const uint32_t    width444   = areaTopY.width  - (confLeft + confRight);
     const uint32_t    height444  = areaTopY.height - (confTop + confBottom);
 
-    CHECK(areaTop.width  == areaBottom.width , "Incompatible formats");
-    CHECK(areaTop.height == areaBottom.height, "Incompatible formats");
-    CHECK(areaTop.stride == areaBottom.stride, "Incompatible formats");
+    CHECK(areaTop.width  != areaBottom.width , "Incompatible formats");
+    CHECK(areaTop.height != areaBottom.height, "Incompatible formats");
+    CHECK(areaTop.stride != areaBottom.stride, "Incompatible formats");
 
     if ((width444 == 0) || (height444 == 0))
     {
@@ -1177,4 +1219,86 @@ void VideoIOYuv::ColourSpaceConvert(const CPelUnitBuf &src, PelUnitBuf &dest, co
   }
 }
 
+bool VideoIOYuv::writeUpscaledPicture( const SPS& sps, const PPS& pps, const CPelUnitBuf& pic, const InputColourSpaceConversion ipCSC, const bool bPackedYUVOutputMode, int outputChoice, ChromaFormat format, const bool bClipToRec709 )
+{
+  ChromaFormat chromaFormatIDC = sps.getChromaFormatIdc();
+  bool ret = false;
+
+  static Window confFullResolution;
+  static Window afterScaleWindowFullResolution;
+
+  // decoder does not have information about upscaled picture scaling and conformance windows, store this information when full resolution picutre is encountered
+  if( sps.getMaxPicWidthInLumaSamples() == pps.getPicWidthInLumaSamples() && sps.getMaxPicHeightInLumaSamples() == pps.getPicHeightInLumaSamples() )
+  {
+    afterScaleWindowFullResolution = pps.getScalingWindow();
+    afterScaleWindowFullResolution = pps.getConformanceWindow();
+  }
+
+  if( outputChoice && ( sps.getMaxPicWidthInLumaSamples() != pic.get( COMPONENT_Y ).width || sps.getMaxPicHeightInLumaSamples() != pic.get( COMPONENT_Y ).height ) )
+  {
+    if( outputChoice == 2 )
+    {
+      PelStorage upscaledPic;
+      upscaledPic.create( chromaFormatIDC, Area( Position(), Size( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples() ) ) );
+
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+      int curPicWidth = sps.getMaxPicWidthInLumaSamples()   - SPS::getWinUnitX( sps.getChromaFormatIdc() ) * ( afterScaleWindowFullResolution.getWindowLeftOffset() + afterScaleWindowFullResolution.getWindowRightOffset() );
+      int curPicHeight = sps.getMaxPicHeightInLumaSamples() - SPS::getWinUnitY( sps.getChromaFormatIdc() ) * ( afterScaleWindowFullResolution.getWindowTopOffset()  + afterScaleWindowFullResolution.getWindowBottomOffset() );
+#else
+      int curPicWidth = sps.getMaxPicWidthInLumaSamples() - afterScaleWindowFullResolution.getWindowLeftOffset() - afterScaleWindowFullResolution.getWindowRightOffset();
+      int curPicHeight = sps.getMaxPicHeightInLumaSamples() - afterScaleWindowFullResolution.getWindowTopOffset() - afterScaleWindowFullResolution.getWindowBottomOffset();
+#endif
+
+      const Window& beforeScalingWindow = pps.getScalingWindow();
+#if JVET_Q0487_SCALING_WINDOW_ISSUES
+      int refPicWidth = pps.getPicWidthInLumaSamples()   - SPS::getWinUnitX( sps.getChromaFormatIdc() ) * ( beforeScalingWindow.getWindowLeftOffset() + beforeScalingWindow.getWindowRightOffset() );
+      int refPicHeight = pps.getPicHeightInLumaSamples() - SPS::getWinUnitY( sps.getChromaFormatIdc() ) * ( beforeScalingWindow.getWindowTopOffset()  + beforeScalingWindow.getWindowBottomOffset() );
+#else
+      int refPicWidth = pps.getPicWidthInLumaSamples() - beforeScalingWindow.getWindowLeftOffset() - beforeScalingWindow.getWindowRightOffset();
+      int refPicHeight = pps.getPicHeightInLumaSamples() - beforeScalingWindow.getWindowTopOffset() - beforeScalingWindow.getWindowBottomOffset();
+#endif
+
+      int xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth;
+      int yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight;
+
+      Picture::rescalePicture( std::pair<int, int>( xScale, yScale ), pic, pps.getScalingWindow(), upscaledPic, afterScaleWindowFullResolution, chromaFormatIDC, sps.getBitDepths(), false, false, sps.getHorCollocatedChromaFlag(), sps.getVerCollocatedChromaFlag() );
 
+      ret = write( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples(), upscaledPic,
+        ipCSC,
+        bPackedYUVOutputMode,
+        confFullResolution.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+        confFullResolution.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+        confFullResolution.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+        confFullResolution.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+        NUM_CHROMA_FORMAT, bClipToRec709 );
+    }
+    else
+    {
+      const Window &conf = pps.getConformanceWindow();
+
+      ret = write( sps.getMaxPicWidthInLumaSamples(), sps.getMaxPicHeightInLumaSamples(), pic,
+        ipCSC,
+        bPackedYUVOutputMode,
+        conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+        conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+        conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+        conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+        NUM_CHROMA_FORMAT, bClipToRec709 );
+    }
+  }
+  else
+  {
+    const Window &conf = pps.getConformanceWindow();
+
+    ret = write( pic.get( COMPONENT_Y ).width, pic.get( COMPONENT_Y ).height, pic,
+      ipCSC,
+      bPackedYUVOutputMode,
+      conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+      conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+      conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+      conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+      NUM_CHROMA_FORMAT, bClipToRec709 );
+  }
+
+  return ret;
+}
diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h
index 150ecced5da91190f2a3d8b2b5dfa4ab45d84f86..27504973ee54b0f2248b64a08d54927e89ce78e8 100644
--- a/source/Lib/Utilities/VideoIOYuv.h
+++ b/source/Lib/Utilities/VideoIOYuv.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,9 @@ using namespace std;
 // Class definition
 // ====================================================================================================================
 
+#include "CommonLib/Slice.h"
+#include "CommonLib/Picture.h"
+
 /// YUV file I/O class
 class VideoIOYuv
 {
@@ -77,7 +80,7 @@ public:
   bool  read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSpaceConversion ipcsc, int aiPad[2], ChromaFormat fileFormat=NUM_CHROMA_FORMAT, const bool bClipToRec709=false );     ///< read one frame with padding parameter
 
   // If fileFormat=NUM_CHROMA_FORMAT, use the format defined by pPicYuv
-  bool  write( const CPelUnitBuf& pic,
+  bool  write( uint32_t orgWidth, uint32_t orgHeight, const CPelUnitBuf& pic,
                const InputColourSpaceConversion ipCSC,
                const bool bPackedYUVOutputMode,
                int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one YUV frame with padding parameter
@@ -92,7 +95,10 @@ public:
 
   bool  isEof ();                                           ///< check for end-of-file
   bool  isFail();                                           ///< check for failure
+  bool  isOpen() { return m_cHandle.is_open(); }
 
+  bool  writeUpscaledPicture( const SPS& sps, const PPS& pps, const CPelUnitBuf& pic,
+    const InputColourSpaceConversion ipCSC, const bool bPackedYUVOutputMode, int outputChoice = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one upsaled YUV frame
 
 };
 
diff --git a/source/Lib/Utilities/program_options_lite.cpp b/source/Lib/Utilities/program_options_lite.cpp
index 0c4bba0502cc08c2caa01b4cd61f66554dfe30ab..859c59117d4d8cbb09ff21397a9b00e514bad74f 100644
--- a/source/Lib/Utilities/program_options_lite.cpp
+++ b/source/Lib/Utilities/program_options_lite.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -96,8 +96,22 @@ namespace df
         }
         else
         {
+#if JVET_O0549_ENCODER_ONLY_FILTER_POL
+          if (opt_name.size() > 0 && opt_name.back() == '*')
+          {
+            string prefix_name = opt_name.substr(0, opt_name.size() - 1);
+            names->opt_prefix.push_back(prefix_name);
+            opt_prefix_map[prefix_name].push_back(names);
+          }
+          else
+          {
+            names->opt_long.push_back(opt_name);
+            opt_long_map[opt_name].push_back(names);
+          }
+#else
           names->opt_long.push_back(opt_name);
           opt_long_map[opt_name].push_back(names);
+#endif
         }
         opt_start += opt_end + 1;
       }
@@ -150,6 +164,12 @@ namespace df
       {
         out << "--" << entry.opt_long.front();
       }
+#if JVET_O0549_ENCODER_ONLY_FILTER_POL
+      else if (!entry.opt_prefix.empty())
+      {
+      out << "--" << entry.opt_prefix.front() << "*";
+      }
+#endif
     }
 
     /* format the help text */
@@ -271,6 +291,9 @@ namespace df
     bool OptionWriter::storePair(bool allow_long, bool allow_short, const string& name, const string& value)
     {
       bool found = false;
+#if JVET_O0549_ENCODER_ONLY_FILTER_POL
+      std::string val = value;
+#endif
       Options::NamesMap::iterator opt_it;
       if (allow_long)
       {
@@ -290,15 +313,34 @@ namespace df
           found = true;
         }
       }
-
+#if JVET_O0549_ENCODER_ONLY_FILTER_POL
+      bool allow_prefix = allow_long;
+      if (allow_prefix && !found)
+      {
+        for (opt_it = opts.opt_prefix_map.begin(); opt_it != opts.opt_prefix_map.end(); opt_it++)
+        {
+          std::string name_prefix = name.substr(0, opt_it->first.size());
+          if (name_prefix == opt_it->first)
+          {
+            // prepend value matching *
+            val = name.substr(name_prefix.size()) + std::string(" ") + val;
+            found = true;
+            break;
+          }
+        }
+      }
+#endif
       if (!found)
       {
         error_reporter.error(where())
           << "Unknown option `" << name << "' (value:`" << value << "')\n";
         return false;
       }
-
+#if JVET_O0549_ENCODER_ONLY_FILTER_POL
+      setOptions((*opt_it).second, val, error_reporter);
+#else
       setOptions((*opt_it).second, value, error_reporter);
+#endif
       return true;
     }
 
diff --git a/source/Lib/Utilities/program_options_lite.h b/source/Lib/Utilities/program_options_lite.h
index 2ce2bd26ed80c6066ec93401034513b2b4b71b4a..6fc3dd33789460bd2dd28733cf73851f333de439 100644
--- a/source/Lib/Utilities/program_options_lite.h
+++ b/source/Lib/Utilities/program_options_lite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,8 @@
 #include <list>
 #include <map>
 
+#define JVET_O0549_ENCODER_ONLY_FILTER_POL 1 // JVET-O0549: Encoder-only GOP-based temporal filter. Program Options Lite related changes.
+
 #ifndef __PROGRAM_OPTIONS_LITE__
 #define __PROGRAM_OPTIONS_LITE__
 
@@ -196,6 +198,7 @@ namespace df
         }
         std::list<std::string> opt_long;
         std::list<std::string> opt_short;
+        std::list<std::string> opt_prefix;
         OptionBase* opt;
       };
 
@@ -207,6 +210,7 @@ namespace df
       typedef std::map<std::string, NamesPtrList> NamesMap;
       NamesMap opt_long_map;
       NamesMap opt_short_map;
+      NamesMap opt_prefix_map;
     };
 
     /* Class with templated overloaded operator(), for use by Options::addOptions() */
@@ -228,7 +232,61 @@ namespace df
         parent.addOption(new Option<T>(name, storage, default_val, desc));
         return *this;
       }
+      template<typename T>
+      OptionSpecific&
+        operator()(const std::string& name, T* storage, T default_val, unsigned uiMaxNum, const std::string& desc = "")
+      {
+        std::string cNameBuffer;
+        std::string cDescriptionBuffer;
+
+        for (unsigned int uiK = 0; uiK < uiMaxNum; uiK++)
+        {
+          // it needs to be reset when extra digit is added, e.g. number 10 and above
+          cNameBuffer.resize(name.size() + 10);
+          cDescriptionBuffer.resize(desc.size() + 10);
+
+          // isn't there are sprintf function for string??
+          sprintf((char*)cNameBuffer.c_str(), name.c_str(), uiK, uiK);
+          sprintf((char*)cDescriptionBuffer.c_str(), desc.c_str(), uiK, uiK);
 
+          size_t pos = cNameBuffer.find_first_of('\0');
+          if (pos != std::string::npos)
+          {
+            cNameBuffer.resize(pos);
+          }
+
+          parent.addOption(new Option<T>(cNameBuffer, (storage[uiK]), default_val, cDescriptionBuffer));
+        }
+
+        return *this;
+      }
+
+      template<typename T>
+      OptionSpecific&
+        operator()(const std::string& name, T** storage, T default_val, unsigned uiMaxNum, const std::string& desc = "")
+      {
+        std::string cNameBuffer;
+        std::string cDescriptionBuffer;
+
+        for (unsigned int uiK = 0; uiK < uiMaxNum; uiK++)
+        {
+          // it needs to be reset when extra digit is added, e.g. number 10 and above
+          cNameBuffer.resize(name.size() + 10);
+          cDescriptionBuffer.resize(desc.size() + 10);
+
+          // isn't there are sprintf function for string??
+          sprintf((char*)cNameBuffer.c_str(), name.c_str(), uiK, uiK);
+          sprintf((char*)cDescriptionBuffer.c_str(), desc.c_str(), uiK, uiK);
+
+          size_t pos = cNameBuffer.find_first_of('\0');
+          if (pos != std::string::npos)
+            cNameBuffer.resize(pos);
+
+          parent.addOption(new Option<T>(cNameBuffer, *(storage[uiK]), default_val, cDescriptionBuffer));
+        }
+
+        return *this;
+      }
       /**
        * Add option described by name to the parent Options list,
        *   with desc as an optional help description
diff --git a/source/Lib/libmd5/MD5.h b/source/Lib/libmd5/MD5.h
index 25835bde110b88d353aca3c62499fb1f15f9163e..d41a07424199208d2c75fcdb1e634a955594ee4f 100644
--- a/source/Lib/libmd5/MD5.h
+++ b/source/Lib/libmd5/MD5.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/libmd5/libmd5.h b/source/Lib/libmd5/libmd5.h
index 3859b049215fd4344d5c760723dda5515482de71..4554c73d5d0235a30a7188719b199af46d5a159f 100644
--- a/source/Lib/libmd5/libmd5.h
+++ b/source/Lib/libmd5/libmd5.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2019, ITU/ISO/IEC
+ * Copyright (c) 2010-2020, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without